diff --git a/Project.toml b/Project.toml
index e8b6f82..bd4dd8c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FiniteElementContainers"
 uuid = "d08262e4-672f-4e7f-a976-f2cea5767631"
-version = "0.13.1"
+version = "0.13.2"
 authors = ["Craig M. Hamel <cmhamel32@gmail.com> and contributors"]
 
 [deps]
diff --git a/examples/app/src/MyApp.jl b/examples/app/src/MyApp.jl
index eb288ee..c9ccd83 100644
--- a/examples/app/src/MyApp.jl
+++ b/examples/app/src/MyApp.jl
@@ -19,16 +19,13 @@ function app_main(ARGS::Vector{String})
     #####################################
     # need to define some types
     #####################################
+    ET = ExodusDatabase{Int32, Int32, Int32, Float64}
     FT = AT.ExpressionFunction{Float64}
     SPT = FEC.CSCMatrix()
 
     #####################################
     # setup function space
     #####################################
-    # block_ids = FEC._setup_juliac_safe_block_to_ref_fe_id(sim.mesh)
-    # for n in 1:16
-    #     println(Core.stdout, "Block $n element type $(block_ids[n])")
-    # end
     V = FunctionSpace{true}(sim.mesh, H1Field, Lagrange)
     u = ScalarFunction(V, "u")
     dof = DofManager{false}(u)
@@ -44,41 +41,21 @@ function app_main(ARGS::Vector{String})
     end
 
     times = TimeStepper(0.0, 0.0, 1)
-    p = FEC.TypeStableParameters{FT}(sim.mesh, asm, physics, props, sim.ics, sim.dbcs, times)
-    # U = create_field(asm)
-    # Uu = create_unknowns(asm)
-
-    # # setting up ics
-    # # ics = InitialConditions{FT}(sim.mesh, dof, sim.ics)
-    # X = sim.mesh.nodal_coords
-    # # println(Core.stdout, "X size 1 = $(size(X, 1))")
-    # # println(Core.stdout, "X size 2 = $(size(X, 2))")
-    # # for block_id in 1:1
-    # #     ref_fe = FEC.block_reference_element(V, block_id)
-    # #     println(Core.stdout, ref_fe)
-    # # end
-
+    p = FEC.TypeStableParameters{FT}(
+        sim.mesh, asm,
+        physics, props,
+        sim.ics, sim.dbcs, sim.nbcs, sim.srcs, times
+    )
     FEC.initialize!(p)
-    # FEC._update_for_assembly!(p, dof, Uu)
 
-    # # lsolver = DirectLinearSolver(asm)
-    # # lsolver = x -> IterativeLinearSolver(x, :cg)
     preconditioner = I
     timer = TimerOutput()
     workspace = CgWorkspace(stiffness(asm), residual(asm))
     ΔUu = create_unknowns(asm)
     lsolver = IterativeLinearSolver(asm, preconditioner, workspace, timer, ΔUu)
     nlsolver = NewtonSolver(lsolver)
-
-    # println(Core.stdout, "Number of blocks = $(length(sim.mesh.element_types))")
-    # # N = length(sim.mesh.element_types)
-    # # # N = Val(N)
-    # # N = Val{N}()
-    # # ref_fe = _get_ref_fe(V, N)
-    # # indices = ntuple(x -> x, N)
-    # # indices = ntuple(x -> x, Val(N))
-    # # s = MyStruct{N}(N)::MyStruct{N}
     integrator = QuasiStaticIntegrator(nlsolver)
+
     println(sim.log_file.io, "Setup complete")
     println(sim.log_file.io, "Solving...")
     evolve!(integrator, p)
@@ -86,11 +63,15 @@ function app_main(ARGS::Vector{String})
     println(Core.stdout, maximum(p.field.data))
     println(Core.stdout, minimum(p.field.data))
 
-    # pp = PostProcessor(sim.mesh, "juliac_output.e", u)
-
+    pp = PostProcessor{ET}(FEC.ExodusMesh, sim.mesh, "juliac_output.e")
+    FEC.add_function!(pp, u)
+    FEC.finalize_setup!(pp)
+    write_times(pp, 1, 0.0)
+    write_field(pp, 1, ("u",), p.field)
+    close(pp)
 end
 
 function @main(ARGS::Vector{String})
     app_main(ARGS)
     return 0
-end
\ No newline at end of file
+end
diff --git a/examples/expression/Project.toml b/examples/expression/Project.toml
index 790eaaa..e9d5877 100644
--- a/examples/expression/Project.toml
+++ b/examples/expression/Project.toml
@@ -1,5 +1,6 @@
 name = "MyApp"
 
 [deps]
+DynamicExpressions = "a40a106e-89c9-4ca8-8020-a735e8728b6b"
 FiniteElementContainers = "d08262e4-672f-4e7f-a976-f2cea5767631"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
diff --git a/examples/expression/build.jl b/examples/expression/make.jl
similarity index 100%
rename from examples/expression/build.jl
rename to examples/expression/make.jl
diff --git a/examples/expression/src/MyApp.jl b/examples/expression/src/MyApp.jl
index a929232..3d90057 100644
--- a/examples/expression/src/MyApp.jl
+++ b/examples/expression/src/MyApp.jl
@@ -1,7 +1,11 @@
 import FiniteElementContainers: Expressions as Exp
 import FiniteElementContainers.Expressions: Parser
+using DynamicExpressions
 using StaticArrays
 
+const operators = OperatorEnum(1 => (sin, cos, exp), 2 => (+, -, *, /))
+
+
 function test_method()
     # constant
     println(Core.stdout, "Number")
@@ -54,6 +58,14 @@ function test_method()
     func = Exp.ExpressionFunction{Float64}(string, ["x", "y"])
     val = func([1.0, 2.0])
     println(Core.stdout, "func = $val")
+
+    # operators = OperatorEnum(1 => (sin, cos, exp), 2 => (+, -, *, /))
+    variable_names = ["x", "y"]
+    parsed_expr = parse_expression(
+        :(sin(2.0 * x + exp(y + 5.0))); 
+        operators = operators,
+        variable_names = variable_names
+    )
 end
 
 function @main(ARGS)
diff --git a/src/Fields.jl b/src/Fields.jl
index d8a2160..a1328bc 100644
--- a/src/Fields.jl
+++ b/src/Fields.jl
@@ -102,7 +102,7 @@ struct Connectivity{
     nelems::Vector{T}
     offsets::Vector{T}
 
-    function Connectivity(mats::Vector{<:AbstractArray{<:Integer, 2}})
+    function Connectivity(mats::Vector{<:AbstractMatrix{<:Integer}})
         nblocks = length(mats)
         nepes = map(x -> size(x, 1), mats)
         nelems = map(x -> size(x, 2), mats)
@@ -135,6 +135,8 @@ function Adapt.adapt_structure(to, conn::Connectivity{T, D}) where {T, D}
     )
 end
 
+Base.eltype(::Connectivity{T, D}) where {T, D} = T
+
 # NOT GPU safe
 function connectivity(conn::Connectivity, b::Int)
     nepe = conn.nepes[b]
diff --git a/src/FunctionSpaces.jl b/src/FunctionSpaces.jl
index b5ac77c..1f9239a 100644
--- a/src/FunctionSpaces.jl
+++ b/src/FunctionSpaces.jl
@@ -88,18 +88,15 @@ end
 
 # Lagrange elements
 # defaulting to fully integrated elements for now
-function _setup_juliac_safe_ref_fes(::Type{Lagrange}, q_type::Type{QT}) where {QT <: AbstractQuadratureType}
-  ref_fes = (
-    ReferenceFE(Hex{Lagrange, 1}(), QT(2, 2)),  # HEX8 for Lagrange
-    ReferenceFE(Quad{Lagrange, 1}(), QT(2, 2)), # QUAD4 for Lagrange
-    ReferenceFE(Quad{Lagrange, 2}(), QT(2, 2)), # QUAD9 for Lagrange
-    ReferenceFE(Tri{Lagrange, 1}(), QT(2, 2)),  # Tri3 for Lagrange
-    ReferenceFE(Tri{Lagrange, 2}(), QT(2, 2)),  # Tri3 for Lagrange
-    ReferenceFE(Tet{Lagrange, 1}(), QT(2, 2)),  # Tri3 for Lagrange
-    ReferenceFE(Tet{Lagrange, 2}(), QT(2, 2))   # Tri3 for Lagrange
-  )
-  return ref_fes
-end
+const _juliac_safe_ref_fes = (
+  ReferenceFE(Hex{Lagrange, 1}(), GaussLobattoLegendre(2, 2)),  # HEX8 for Lagrange
+  ReferenceFE(Quad{Lagrange, 1}(), GaussLobattoLegendre(2, 2)), # QUAD4 for Lagrange
+  ReferenceFE(Quad{Lagrange, 2}(), GaussLobattoLegendre(2, 2)), # QUAD9 for Lagrange
+  ReferenceFE(Tri{Lagrange, 1}(), GaussLobattoLegendre(2, 2)),  # Tri3 for Lagrange
+  ReferenceFE(Tri{Lagrange, 2}(), GaussLobattoLegendre(2, 2)),  # Tri3 for Lagrange
+  ReferenceFE(Tet{Lagrange, 1}(), GaussLobattoLegendre(2, 2)),  # Tri3 for Lagrange
+  ReferenceFE(Tet{Lagrange, 2}(), GaussLobattoLegendre(2, 2))   # Tri3 for Lagrange
+)
 
 """
 default code path that sets up ref fes as a namedtuple
@@ -167,10 +164,10 @@ struct FunctionSpace{
   RefFEs
 } <: AbstractFunctionSpace
   block_names::Vector{String}
-  # block_to_ref_fe_id::Vector{IT} # only used in juliac builds
   block_to_ref_fe_id::BTRE
   coords::Coords
   elem_conns::Connectivity{IT, IV}
+  # need to remove this mabye?
   elem_id_maps::Vector{Vector{IT}} # TODO create new type for ID map similar to connectivity
   ref_fes::RefFEs
 
@@ -209,13 +206,15 @@ function FunctionSpace{is_juliac_safe}(
     elseif p_degree == 0
       @assert false "TODO 0 order elements"
     else
+      # TODO all of this needs TLC
       ref_fes = _setup_ref_fes(mesh, interp_type, p_degree, q_type, q_degree)
       coords, conns = create_higher_order_mesh(mesh, H1Field, interp_type, p_degree)
       conns = Connectivity([val for val in values(conns)])
     end
   else
     if is_juliac_safe
-      ref_fes = _setup_juliac_safe_ref_fes(interp_type, q_type)
+      # ref_fes = _setup_juliac_safe_ref_fes(interp_type, q_type)
+      ref_fes = _juliac_safe_ref_fes
     else
       ref_fes = _setup_ref_fes(mesh, interp_type, nothing, q_type, q_degree)
     end
@@ -268,7 +267,8 @@ function Adapt.adapt_structure(to, fspace::FunctionSpace)
     fspace.block_names, fspace.block_to_ref_fe_id,
     adapt(to, fspace.coords),
     adapt(to, fspace.elem_conns), 
-    fspace.elem_id_maps,
+    # fspace.elem_id_maps,
+    map(x -> adapt(to, x), fspace.elem_id_maps),
     map(x -> adapt(to, x), fspace.ref_fes)
   )
 end
@@ -282,7 +282,7 @@ function Base.show(io::IO, fspace::FunctionSpace)
   end
 end
 
-function _is_juliac_safe(::FunctionSpace{B, I, V, C, R}) where {B, I, V, C, R}
+function _is_juliac_safe(::FunctionSpace{B, I, V, BTRE, C, R}) where {B, I, V, BTRE, C, R}
   return B
 end
 
@@ -290,15 +290,46 @@ function block_entity_size(fspace::FunctionSpace, b::Int)
   return (num_entities_per_element(fspace, b), num_elements(fspace, b))
 end
 
-function block_reference_element(fspace::FunctionSpace{false, I, V, C, R}, block_id::Int) where {I, V, C, R}
+function block_reference_element(fspace::FunctionSpace{false, I, V, BTRE, C, R}, block_id::Int) where {I, V, BTRE, C, R}
   return fspace.ref_fes[block_id]
 end
 
-function block_reference_element(fspace::FunctionSpace{true, I, V, C, R}, block_id::Int) where {I, V, C, R}
-  ref_fe_id = fspace.block_to_ref_fe_id[block_id]
-  return fspace.ref_fes[ref_fe_id]
+@generated function block_reference_element(
+  fspace::FunctionSpace{true, IT, IV, BTRE, C, R},
+  block_id::Int
+) where {IT, IV, BTRE, C, R}
+
+  n_refs = length(BTRE.parameters)
+
+  cases = map(1:n_refs) do j
+      quote
+          if block_id == $j
+              return fspace.ref_fes[$j]
+          end
+      end
+  end
+
+  quote
+      id = fspace.block_to_ref_fe_id[block_id]  # runtime value (OK)
+
+      if id == -1
+          error("Inactive block ", block_id)
+      end
+
+      $(cases...)  # ONLY depends on compile-time j
+
+      error("Invalid ref_fe id ", id)
+  end
 end
 
+# function block_reference_element(
+#   fspace::FunctionSpace{true, IT, IV, BTRE, C, R},
+#   block_id::Int
+# ) where {IT, IV, BTRE, C, R}
+#   index = fspace.block_to_ref_fe_id[block_id]
+#   return _block_reference_element(fspace, Val{index}())
+# end
+
 # this does not work behind juliac
 function block_quadrature_size(fspace::FunctionSpace{false}, b::Int)
   ref_fe = block_reference_element(fspace, b)
diff --git a/src/Parameters.jl b/src/Parameters.jl
index 590a4c1..06caca4 100644
--- a/src/Parameters.jl
+++ b/src/Parameters.jl
@@ -111,26 +111,6 @@ function Parameters(
   end
 
   # setup state variables
-  # state_old = Array{Float64, 3}[]
-  # state_new = Array{Float64, 3}[]
-  # for (b, val) in enumerate(values(physics))
-  #   # create state variables for this block physics
-  #   NS = num_states(val)
-  #   NQ, NE = block_quadrature_size(fspace, b)
-
-  #   state_old_temp = zeros(NS, NQ, NE)
-  #   state_new_temp = zeros(NS, NQ, NE)
-  #   for e in 1:NE
-  #     for q in 1:NQ
-  #       state_old_temp[:, q, e] = create_initial_state(val)
-  #       state_new_temp[:, q, e] = create_initial_state(val)
-  #     end
-  #   end
-  #   push!(state_old, state_old_temp)
-  #   push!(state_new, state_new_temp)
-  # end
-  # state_old = L2Field(state_old)
-  # state_new = L2Field(state_new)
   state_old, state_new = _setup_state_variables(fspace, physics)
 
   # scratch
@@ -214,10 +194,7 @@ struct TypeStableParameters{
   RT     <: Number,
   IV     <: AbstractVector{IT},
   RV     <: AbstractVector{RT},
-  # RM1    <: AbstractMatrix,
-  # RM2    <: AbstractMatrix,
-  # RM3    <: AbstractMatrix,
-  # RM4    <: AbstractMatrix,
+  RM     <: AbstractMatrix{<:SVector},
   Phys,
   Props,
   Coords <: AbstractField,
@@ -225,9 +202,9 @@ struct TypeStableParameters{
 } <: AbstractParameters
   ics::InitialConditions{Vector{InitialConditionFunction{FuncT}}, IV, RV}
   dirichlet_bcs::DirichletBCs{Vector{DirichletBCFunction{FuncT, FuncT, FuncT}}, IV, RV}
-  # neumann_bcs::NeumannBCs{NBCFuncs, IT, IV, RM1}
+  neumann_bcs::NeumannBCs{Vector{NeumannBCFunction{FuncT}}, IT, IV, RM}
   # robin_bcs::RobinBCs{RBCFuncs, IT, IV, RM2, RM3}
-  # sources::Sources{SRCFuncs, RM4}
+  sources::Sources{Vector{SourceFunction{FuncT}}, RM}
   times::TimeStepper{RT}
   physics::Phys
   properties::Props
@@ -239,11 +216,14 @@ struct TypeStableParameters{
   # scratch fields
   hvp_scratch_field::Field
 
-  function TypeStableParameters{F}(mesh, assembler, physics, props, ics, dbcs, times) where F
+  function TypeStableParameters{F}(mesh, assembler, physics, props, ics, dbcs, nbcs, srcs, times) where F
     dof = assembler.dof
+    ND = size(dof, 1)
     fspace = function_space(dof)
     ics = InitialConditions{F}(mesh, dof, ics)
     dbcs = DirichletBCs{F}(mesh, dof, dbcs)
+    nbcs = NeumannBCs{F}(mesh, dof, nbcs)
+    srcs = Sources{F}(mesh, dof, srcs)
 
     state_old, state_new = _setup_state_variables(fspace, physics)
 
@@ -256,27 +236,13 @@ struct TypeStableParameters{
     update_dofs!(assembler, dbcs)
 
     new{
-      F, Int, Float64, Vector{Int}, Vector{Float64},
+      F, Int, Float64, Vector{Int}, Vector{Float64}, Matrix{SVector{ND, Float64}},
       typeof(physics), typeof(props), typeof(mesh.nodal_coords), typeof(field)
     }(
-      ics, dbcs, times, 
+      ics, dbcs, nbcs, srcs,
+      times, 
       physics, props, state_old, state_new, coords, field, field_old, hvp_scratch_field
     )
-
-    # TODO need to settle on expected format for nbcs, rbcs, and sources...
-    # should we always expect a staticarrays even for a 1 dof problem?
-    # RM1 = Matrix{Float64}
-    # RM2 = Matrix{Float64}
-    # RM3 = Matrix{Float64}
-    # RM4 = Matrix{Float64}
-
-    # new{
-    #   F, Int, Float64, Vector{Int}, Vector{Float64}, RM1, RM2, RM3, RM4,
-    #   typeof(physics), typeof(props), typeof(mesh.nodal_coords), typeof(field)
-    # }(
-    #   ics, dbcs, times, 
-    #   physics, props, state_old, state_new, coords, field, field_old, hvp_scratch_field
-    # )
   end
 end
 
@@ -360,8 +326,8 @@ function update_bc_values!(p::TypeStableParameters, assembler)
   X = coordinates(p)
   t = current_time(p)
   update_bc_values!(p.dirichlet_bcs, X, t)
-  # update_bc_values!(p.neumann_bcs, assembler, X, t)
-  # update_source_values!(p.sources, assembler, X, t)
+  update_bc_values!(p.neumann_bcs, assembler, X, t)
+  update_source_values!(p.sources, assembler, X, t)
 
   # TODO how to handle Robin BCs?
   # currently assembly methods handle updating the field
diff --git a/src/PostProcessors.jl b/src/PostProcessors.jl
index 13fe36a..90a41bc 100644
--- a/src/PostProcessors.jl
+++ b/src/PostProcessors.jl
@@ -1,8 +1,27 @@
 abstract type AbstractPostProcessor end
 
 # TODO add a csv file for global stuff
+
+# shoudl change the whole struct to allow for 
+# for set up all at once or sequentially
+#
+# need to store var names, and also allocate some memory
+# here in the form of a Dict{String, Any} that can
+# map simple keys like "displ" => H1Field for instance
+# then this struct can handle all host side memory
+# and give a Pair of key => DeviceArray we can copy
+# into the host side and then write
+#
 struct PostProcessor{O}
+  output_file_name::String
   field_output_db::O
+  nodal_names::Vector{String}
+  element_names::Vector{String}
+  global_names::Vector{String}
+
+  function PostProcessor(f, db::O, nn, en, gn) where O
+    new{O}(f, db, nn, en, gn)
+  end
 end
 
 function PostProcessor(
@@ -16,8 +35,8 @@ function PostProcessor(
   end
 
   ext = splitext(output_file)
-  if occursin(".g", output_file) || occursin(".e", output_file) || occursin(".exo", output_file)
-    pp = PostProcessor(ExodusMesh, output_file, vars...; extra_nodal_names=extra_nodal_names)
+  if occursin(".e", output_file) || occursin(".exo", output_file)
+    pp = PostProcessor(ExodusMesh, output_file, vars...; extra_nodal_names = extra_nodal_names)
   else
     throw(ErrorException("Unsupported file type with extension $ext"))
   end
@@ -27,5 +46,37 @@ end
 
 Base.close(pp::PostProcessor) = Base.close(pp.field_output_db)
 
+# TODO finish me...
+# also start using me in the exodus pp
+function add_function!(pp::PostProcessor, u)
+  if isa(u.fspace.coords, H1Field)
+    append!(pp.nodal_names, names(u))
+  # elseif isa(u.fspace.coords, L2Field)
+  #   append!(pp.element_names, names(u))
+  # below will be deprecated soon
+  # elseif isa(u.fspace.coords, NamedTuple)
+  #   max_q_points = map(num_quadrature_points, values(u.fspace.ref_fes)) |> maximum
+  #   temp_names = Symbol[]
+  #   for name in names(var)
+  #     for q in 1:max_q_points
+  #       # push!(temp_names, Symbol(String(name) * "_$q"))
+  #       push!(temp_names, "$(name)_$q")
+  #     end
+  #   end
+  else
+    @assert false "Unsupported variable type"
+  end
+end
+
+function finalize_setup!(pp::PostProcessor)
+  if length(pp.nodal_names) > 0
+    write_names(pp.field_output_db, NodalVariable, pp.nodal_names)
+  end
+
+  if length(pp.element_names) > 0
+    write_names(pp.field_output_db, ElementVariable, pp.element_names)
+  end
+end
+
 function write_field end
 function write_times end
diff --git a/src/Utils.jl b/src/Utils.jl
index 220fe87..e7bc9c5 100644
--- a/src/Utils.jl
+++ b/src/Utils.jl
@@ -154,9 +154,9 @@ end
 # this one only does the right thing when physics is a namedtuple
 @generated function foreach_block(
     f,
-    fspace::FunctionSpace{IT, IV, C, R},
+    fspace::FunctionSpace,
     p::Parameters
-) where {IT, IV, C, R}
+)
     stmts = Expr(:block)
     # NOTE need to grab one of the NamedTuple types to get the field count
     physics_type = fieldtype(p, :physics)
@@ -196,4 +196,57 @@ end
         end
     end
     quote $(exprs...) end
-end
\ No newline at end of file
+end
+# function foreach_block(
+#     f,
+#     fspace,
+#     p::TypeStableParameters
+# )
+#     for b in 1:num_blocks(fspace)
+#         f(p.physics[b], p.properties[b], block_reference_element(fspace, b), b)
+#     end
+# end
+
+# assumes ref_fes is a namedtuple
+@generated function foreach_block(
+    f,
+    fspace::FunctionSpace{false, IT, IV, BTRE, C, R}
+) where {IT, IV, BTRE, C, R}
+    stmts = Expr(:block)
+    # NOTE need to grab one of the NamedTuple types to get the field count
+    ref_fes_type = fieldtype(fspace, :ref_fes)
+    N = fieldcount(ref_fes_type)
+    for k in 1:N
+        push!(stmts.args, quote
+            f(
+                block_reference_element(fspace, $k), $k
+            )
+        end)
+    end
+    return stmts
+end
+
+@generated function foreach_block(
+    f,
+    fspace::FunctionSpace{true, IT, IV, BTRE, C, R}
+) where {IT, IV, BTRE, C, R}
+    exprs = map(1:MAX_BLOCKS) do i
+        # For each slot i, generate a branch over ALL possible ref_fe indices
+        # block_to_ref_fe_id[i] == -1 means inactive block
+        # Otherwise it's 1..length(REFS) — enumerate them all statically
+        n_refs = length(BTRE.parameters)
+        ref_dispatches = map(1:n_refs) do j
+            quote
+                if fspace.block_to_ref_fe_id[$i] == $j
+                    f(fspace.ref_fes[$j], $i)
+                end
+            end
+        end
+        quote
+            if fspace.block_to_ref_fe_id[$i] != -1
+                $(ref_dispatches...)
+            end
+        end
+    end
+    quote $(exprs...) end
+end
diff --git a/src/assemblers/Assemblers.jl b/src/assemblers/Assemblers.jl
index 42917ef..aa42b99 100644
--- a/src/assemblers/Assemblers.jl
+++ b/src/assemblers/Assemblers.jl
@@ -276,15 +276,13 @@ end
 
 """
 $(TYPEDSIGNATURES)
+The Hessian operator currently aliases the assembled stiffness — no FE code
+path in this repository writes a separate Hessian buffer.  If a future caller
+needs `H ≠ K`, add a dedicated `hessian_storage` field on the assembler and
+specialize this method accordingly.
 """
 function hessian(asm::AbstractAssembler)
-  H = _sparse_matrix(asm, asm.hessian_storage)
-
-  if _is_condensed(asm.dof)
-    _adjust_matrix_entries_for_constraints!(H, asm.constraint_storage)
-  end
-
-  return H
+  return stiffness(asm)
 end
 
 # new approach requiring access to the v that makes Hv
@@ -311,6 +309,10 @@ end
 $(TYPEDSIGNATURES)
 """
 function mass(asm::AbstractAssembler)
+  if isempty(asm.mass_storage)
+    return _zero_sparse_matrix(asm)
+  end
+
   M = _sparse_matrix(asm, asm.mass_storage)
 
   if _is_condensed(asm.dof)
@@ -348,6 +350,10 @@ end
 $(TYPEDSIGNATURES)
 """
 function stiffness(asm::AbstractAssembler)
+  if isempty(asm.stiffness_storage)
+    return _zero_sparse_matrix(asm)
+  end
+
   K = _sparse_matrix(asm, asm.stiffness_storage)
 
   if _is_condensed(asm.dof)
@@ -357,6 +363,18 @@ function stiffness(asm::AbstractAssembler)
   return K
 end
 
+# Zero sparse matrix of the right shape for an assembler that hasn't (yet)
+# assembled.  Matches the shape `_sparse_matrix` would produce: condensed →
+# full ND*NN; uncondensed → length(unknown_dofs).
+function _zero_sparse_matrix(asm::AbstractAssembler)
+  if _is_condensed(asm.dof)
+    n = length(asm.dof)
+  else
+    n = length(asm.dof.unknown_dofs)
+  end
+  return SparseArrays.spzeros(n, n)
+end
+
 function _assemble_block!(
   field,
   conns::Conn, coffset::Int,
diff --git a/src/assemblers/Matrix.jl b/src/assemblers/Matrix.jl
index fb001aa..ca0fbb6 100644
--- a/src/assemblers/Matrix.jl
+++ b/src/assemblers/Matrix.jl
@@ -1,6 +1,7 @@
 function assemble_mass!(
   assembler, func::F, Uu, p
 ) where F <: Function
+  _check_matrix_assembly_supported(assembler, "assemble_mass!")
   assemble_matrix!(
     assembler.mass_storage, assembler.matrix_pattern, assembler.dof,
     func, Uu, p;
@@ -11,6 +12,7 @@ end
 function assemble_stiffness!(
   assembler, func::F, Uu, p
 ) where F <: Function
+  _check_matrix_assembly_supported(assembler, "assemble_stiffness!")
   assemble_matrix!(
     assembler.stiffness_storage, assembler.matrix_pattern, assembler.dof,
     func, Uu, p;
@@ -18,6 +20,13 @@ function assemble_stiffness!(
   )
 end
 
+@inline function _check_matrix_assembly_supported(asm, fname::AbstractString)
+  if asm isa SparseMatrixAssembler && _is_matrix_free(asm)
+    error("$fname called on a matrix-free SparseMatrixAssembler.  " *
+          "Re-create the assembler with matrix_free=false to enable matrix assembly.")
+  end
+end
+
 """
 $(TYPEDSIGNATURES)
 Note this is hard coded to storing the assembled sparse matrix in 
diff --git a/src/assemblers/Source.jl b/src/assemblers/Source.jl
index a0a6ffc..03f43e9 100644
--- a/src/assemblers/Source.jl
+++ b/src/assemblers/Source.jl
@@ -26,18 +26,18 @@ function assemble_vector_source!(storage, pattern, dof, Uu, p)
   X = coordinates(p)
   sources = p.sources
   conns = fspace.elem_conns
-  for (block_id, source) in zip(
-    sources.source_block_ids, sources.source_caches
-  )
-    # ref_fe = getfield(fspace.ref_fes, block_name)
-    ref_fe = fspace.ref_fes[block_id]
-    vals   = source.vals
-
-    _assemble_block_vector_source!(
-      storage,
-      conns.data, conns.offsets[block_id],
-      ref_fe, X, U, vals,
-    )
+  foreach_block(fspace) do ref_fe, b
+    block_id = sources.block_id_to_source[b]
+    if block_id == -1
+      # do nothing
+    else
+      cache = sources.source_caches[block_id]
+      _assemble_block_vector_source!(
+        storage,
+        conns.data, conns.offsets[block_id],
+        ref_fe, X, U, cache.vals
+      )
+    end
   end
   return nothing
 end
@@ -45,6 +45,7 @@ end
 function _assemble_block_vector_source!(
   field::AbstractField, conns, coffset, ref_fe, X, U, vals
 )
+  # need to maybe make this fec_foreach
   fec_foraxes(vals, 2) do e
     conn = connectivity(ref_fe, conns, e, coffset)
     X_el = _element_level_fields_flat(X, ref_fe, conn)
diff --git a/src/assemblers/SparseMatrixAssembler.jl b/src/assemblers/SparseMatrixAssembler.jl
index fb42f86..add3280 100644
--- a/src/assemblers/SparseMatrixAssembler.jl
+++ b/src/assemblers/SparseMatrixAssembler.jl
@@ -18,8 +18,6 @@ struct SparseMatrixAssembler{
   matrix_pattern::SparseMatrixPattern{IV, RV}
   vector_pattern::SparseVectorPattern{IV}
   constraint_storage::RV
-  damping_storage::RV
-  hessian_storage::RV
   mass_storage::RV
   residual_storage::FieldStorage
   residual_unknowns::RV
@@ -35,7 +33,7 @@ struct SparseMatrixAssembler{
     UseSparseVec
   }(
     dof, matrix_pattern, vector_pattern, constraint_storage,
-    damping_storage, hessian_storage, mass_storage,
+    mass_storage,
     residual_storage, residual_unknowns,
     scalar_quadrature_storage,
     stiffness_storage, stiffness_action_storage, stiffness_action_unknowns
@@ -46,13 +44,11 @@ struct SparseMatrixAssembler{
       typeof(dof.var), typeof(stiffness_action_storage)
     }(
       dof, matrix_pattern, vector_pattern,
-      constraint_storage, 
-      damping_storage, 
-      hessian_storage,
+      constraint_storage,
       mass_storage,
       residual_storage, residual_unknowns,
       scalar_quadrature_storage,
-      stiffness_storage, 
+      stiffness_storage,
       stiffness_action_storage, stiffness_action_unknowns
     )
   end
@@ -69,19 +65,27 @@ function SparseMatrixAssembler(
   dof::DofManager;
   sparse_matrix_type::Symbol = :csc,
   use_inplace_methods::Bool = false,
-  use_sparse_vector::Bool = false
+  use_sparse_vector::Bool = false,
+  matrix_free::Bool = false
 )
   return SparseMatrixAssembler{
     _sym_to_sparse_matrix_type(sparse_matrix_type),
     use_inplace_methods,
     use_sparse_vector
-  }(dof)
+  }(dof; matrix_free = matrix_free)
 end
 
 function SparseMatrixAssembler{SparseMatrixType, UseInPlaceMethods, UseSparseVec}(
-  dof::DofManager
+  dof::DofManager;
+  matrix_free::Bool = false
 ) where {SparseMatrixType, UseInPlaceMethods, UseSparseVec}
-  matrix_pattern = SparseMatrixPattern(dof)
+  # When matrix_free=true, the matrix-side storage (the sparse pattern and the
+  # matrix-value buffers) are constructed empty.  This avoids ~7 GB of
+  # allocations on a ~530 k-DOF mesh for integrators that never assemble a
+  # global matrix (e.g. central difference, L-BFGS).  Calling assemble_matrix!
+  # on such an assembler errors with a clear message; the mass/stiffness
+  # accessors return a zero sparse matrix of the correct shape.
+  matrix_pattern = matrix_free ? _empty_matrix_pattern(dof) : SparseMatrixPattern(dof)
   vector_pattern = SparseVectorPattern(dof)
 
   ND, NN = size(dof)
@@ -89,13 +93,11 @@ function SparseMatrixAssembler{SparseMatrixType, UseInPlaceMethods, UseSparseVec
   constraint_storage = zeros(n_total_dofs)
   constraint_storage[dof.dirichlet_dofs] .= 1.
 
-  damping_storage = zeros(num_entries(matrix_pattern))
-  hessian_storage = zeros(num_entries(matrix_pattern))
-  mass_storage = zeros(num_entries(matrix_pattern))
+  n_matrix_entries = matrix_free ? 0 : num_entries(matrix_pattern)
+  mass_storage = zeros(n_matrix_entries)
   residual_storage = create_field(dof)
-  # residual_storage = create_assembler_cache(matrix_pattern, AssembledVector())
   residual_unknowns = create_unknowns(dof)
-  stiffness_storage = zeros(num_entries(matrix_pattern))
+  stiffness_storage = zeros(n_matrix_entries)
   stiffness_action_storage = create_field(dof)
   stiffness_action_unknowns = create_unknowns(dof)
 
@@ -109,23 +111,13 @@ function SparseMatrixAssembler{SparseMatrixType, UseInPlaceMethods, UseSparseVec
     SparseMatrixType,
     UseInPlaceMethods,
     UseSparseVec
-    # _sym_to_sparse_matrix_type(sparse_matrix_type),
-    # sparse_matrix_type,
-    # use_inplace_methods,
-    # use_sparse_vector,
-    # typeof(dof.unknown_dofs),
-    # typeof(residual_storage.data),
-    # typeof(dof.var),
-    # typeof(residual_storage)
   }(
     dof, matrix_pattern, vector_pattern,
-    constraint_storage, 
-    damping_storage, 
-    hessian_storage,
+    constraint_storage,
     mass_storage,
     residual_storage, residual_unknowns,
     scalar_quadrature_storage,
-    stiffness_storage, 
+    stiffness_storage,
     stiffness_action_storage, stiffness_action_unknowns
   )
 end
@@ -139,28 +131,44 @@ function SparseMatrixAssembler(
   return SparseMatrixAssembler(dof; kwargs...)
 end
 
+# Construct a SparseMatrixPattern with all internal arrays empty.  Used as a
+# placeholder for matrix-free assemblers; carries no sparsity information and
+# cannot be used by SparseArrays.sparse! or block_view.
+function _empty_matrix_pattern(dof::DofManager)
+  IV = typeof(dof.unknown_dofs)
+  return SparseMatrixPattern{IV, Vector{Float64}}(
+    similar(dof.unknown_dofs, 0),  # Is
+    similar(dof.unknown_dofs, 0),  # Js
+    similar(dof.unknown_dofs, 0),  # unknown_dofs
+    Int[],                          # block_start_indices
+    [0],                            # max_entries  (single zero — see create_assembler_cache)
+    similar(dof.unknown_dofs, 0),  # klasttouch
+    similar(dof.unknown_dofs, 0),  # csrrowptr
+    similar(dof.unknown_dofs, 0),  # csrcolval
+    Float64[],                      # csrnzval
+    similar(dof.unknown_dofs, 0),  # csccolptr
+    similar(dof.unknown_dofs, 0),  # cscrowval
+    Float64[],                      # cscnzval
+    similar(dof.unknown_dofs, 0)   # permutation
+  )
+end
+
+# True if this assembler was constructed with matrix_free=true: the sparsity
+# pattern and matrix-value buffers are empty and assemble_matrix! is forbidden.
+_is_matrix_free(asm::SparseMatrixAssembler) = isempty(asm.matrix_pattern.Is)
+
 function Adapt.adapt_structure(to, asm::SparseMatrixAssembler)
-  # dof = adapt(to, asm.dof)
-  # residual_storage = adapt(to, asm.residual_storage)
   return SparseMatrixAssembler{
     _is_condensed(asm.dof),
     _sparse_matrix_type(asm),
     _use_inplace_methods(asm),
     _use_sparse_vector(asm),
-    # typeof(dof.unknown_dofs),
-    # typeof(residual_storage.data),
-    # typeof(dof.var),
-    # typeof(residual_storage)
   }(
-    # dof,
     adapt(to, asm.dof),
     adapt(to, asm.matrix_pattern),
     adapt(to, asm.vector_pattern),
     adapt(to, asm.constraint_storage),
-    adapt(to, asm.damping_storage),
-    adapt(to, asm.hessian_storage),
     adapt(to, asm.mass_storage),
-    # residual_storage,
     adapt(to, asm.residual_storage),
     adapt(to, asm.residual_unknowns),
     adapt(to, asm.scalar_quadrature_storage),
@@ -181,6 +189,10 @@ function create_assembler_cache(asm::SparseMatrixAssembler, ::AssembledSparseVec
 end
 
 function create_assembler_cache(asm::SparseMatrixAssembler, ::AssembledMatrix)
+  if _is_matrix_free(asm)
+    error("create_assembler_cache(::AssembledMatrix) called on a matrix-free " *
+          "SparseMatrixAssembler.  Re-create the assembler with matrix_free=false.")
+  end
   backend = KA.get_backend(asm)
   return KA.zeros(backend, Float64, asm.matrix_pattern.max_entries[1])
 end
@@ -233,8 +245,15 @@ function update_dofs!(assembler::AbstractAssembler, dirichlet_bcs::DirichletBCs)
   else
     resize!(assembler.residual_unknowns, length(assembler.dof.unknown_dofs))
     resize!(assembler.stiffness_action_unknowns, length(assembler.dof.unknown_dofs))
-  
-    _update_dofs!(assembler.matrix_pattern, assembler.dof, ddofs)
+
+    # Skip matrix-pattern update on matrix-free assemblers — _update_dofs!
+    # would otherwise rebuild the pattern from scratch and silently flip the
+    # assembler back into the matrix-bearing mode.
+    if assembler isa SparseMatrixAssembler && _is_matrix_free(assembler)
+      # no-op: matrix pattern stays empty
+    else
+      _update_dofs!(assembler.matrix_pattern, assembler.dof, ddofs)
+    end
     _update_dofs!(assembler.vector_pattern, assembler.dof, ddofs)
   end
   return nothing
diff --git a/src/assemblers/WeaklyEnforcedBCs.jl b/src/assemblers/WeaklyEnforcedBCs.jl
index bd27147..5c59894 100644
--- a/src/assemblers/WeaklyEnforcedBCs.jl
+++ b/src/assemblers/WeaklyEnforcedBCs.jl
@@ -38,15 +38,19 @@ $(TYPEDSIGNATURES)
 function assemble_vector_weakly_enforced_bc!(
   storage, dof, U, X, bcs
 )
-  # do not zero!
-  for (n, bc) in enumerate(bcs.bc_caches)
-    block_id = bcs.block_ids[n]
-    ref_fe = block_reference_element(function_space(dof), block_id)
-    _assemble_block_vector_weakly_enforced_bc!(
-      storage,
-      U, X,
-      bc.element_conns.data, ref_fe, bc.sides, bc.vals
-    )
+  fspace = function_space(dof)
+  foreach_block(fspace) do ref_fe, b
+    block_id = bcs.block_id_to_bc[b]
+    if block_id == -1
+      # do nothing
+    else
+      cache = bcs.bc_caches[block_id]
+      _assemble_block_vector_weakly_enforced_bc!(
+        storage,
+        U, X,
+        cache.element_conns.data, ref_fe, cache.sides, cache.vals
+      )
+    end
   end
 end
 
diff --git a/src/bcs/BoundaryConditions.jl b/src/bcs/BoundaryConditions.jl
index f434899..d8848b1 100644
--- a/src/bcs/BoundaryConditions.jl
+++ b/src/bcs/BoundaryConditions.jl
@@ -279,10 +279,7 @@ function _setup_weakly_enforced_bc_container(mesh, dof, bcs, type)
 
       # TODO update nodes and dofs
       new_bk = BCBookKeeping(new_blocks, bk.dofs, new_elements, bk.nodes, new_sides, new_side_nodes)
-      # id = indexin(block_name, mesh.element_block_names)
       id = findfirst(x -> x == block_name, mesh.element_block_names)
-      # ref_fe = getproperty(fspace.ref_fes, block_name)
-      # ref_fe = fspace.ref_fes[mesh.element_types]
       ref_fe = fspace.ref_fes[id]
       NQ = num_surface_quadrature_points(ref_fe)
       ND = length(dof.var)
@@ -316,3 +313,79 @@ function _setup_weakly_enforced_bc_container(mesh, dof, bcs, type)
   end
   return new_bcs, new_funcs, block_ids, block_names
 end
+
+function _setup_sideset(mesh, dof, bc)
+  # just check that this is here..., if its not
+  # below method call will throw an error
+  _dof_index_from_var_name(dof, bc.var_name)
+
+  # get sset specific fields
+  sideset_name = bc.sset_name
+  elements = mesh.sideset_elems[sideset_name]
+  sides = mesh.sideset_sides[sideset_name]
+  side_nodes = mesh.sideset_side_nodes[sideset_name]
+
+  # gather the blocks that are present in this sideset
+  # and also map global element id to local element id
+  blocks = Vector{Int64}(undef, 0)
+  for (n, val) in enumerate(values(mesh.element_id_maps))
+    # note these are the local elem id to the block, e.g. starting from 1.
+    indices_in_sset = indexin(val, elements)
+    filter!(x -> x !== nothing, indices_in_sset)
+
+    if length(indices_in_sset) > 0
+      append!(blocks, repeat([n], length(indices_in_sset)))
+    end
+  end
+
+  unique_block_ids = sort(unique(blocks))
+  @assert length(unique_block_ids) == 1 "Sidesets need to be in a single block"
+  block_name = mesh.element_block_names[unique_block_ids[1]]
+  ids = findall(x -> x == unique_block_ids[1], blocks)
+  indices_in_sset = indexin(elements, mesh.element_id_maps[block_name])
+  filter!(x -> x !== nothing, indices_in_sset)
+  elements = convert(Vector{Int}, indices_in_sset)
+
+  # end bc bookkeeping
+
+  block_id = findfirst(x -> x == block_name, mesh.element_block_names)
+
+  # this probably doesn't do anything for when we just have one block
+  blocks = blocks[ids]
+  elements = elements[ids]
+  sides = sides[ids]
+  side_nodes = side_nodes[ids]
+
+  # setup cache and connectivity
+  fspace = function_space(dof)
+  # NQs = Int[]
+  # # foreach_block(fspace.block_to_ref_fe_id) do ref_fe, b
+  # foreach_block(fspace) do ref_fe, b
+  #   if b == block_id
+  #     # NQ = num_surface_quadrature_points(ref_fe)::Int
+  #     # NQ = 1
+  #     # push!(NQs, NQ)
+  #     push!(NQs, NQ)
+  #   end
+  # end
+  # @assert length(NQs) == 1
+  # NQ = NQs[1]
+  # @assert NQ != -1 "Bad quadrature"
+  nqs = -1 * ones(Int, MAX_BLOCKS)
+  foreach_block(fspace) do ref_fe, b
+    if b == block_id
+      nq = num_surface_quadrature_points(ref_fe)
+      nqs[b] = nq
+    end
+  end
+  index = findfirst(x -> x != -1, nqs)
+  NQ = nqs[index]
+  ND = size(dof, 1)
+
+  # need to carefully check this guy
+  conns = mesh.element_conns[block_name][:, elements]
+  conns = Connectivity(Matrix{Int}[conns])
+  vals = zeros(SVector{ND, Float64}, NQ, length(sides))
+
+  return block_id, block_name, conns, elements, sides, vals
+end
diff --git a/src/bcs/NeumannBCs.jl b/src/bcs/NeumannBCs.jl
index ab330fb..ab29eb6 100644
--- a/src/bcs/NeumannBCs.jl
+++ b/src/bcs/NeumannBCs.jl
@@ -44,6 +44,8 @@ struct NeumannBCContainer{
   end
 end
 
+_eltype(::Vector{NeumannBCContainer{IT, IV, RM}}) where {IT, IV, RM} = IT
+_indices_type(::Vector{NeumannBCContainer{IT, IV, RM}}) where {IT, IV, RM} = IV
 _vals_type(::Vector{NeumannBCContainer{IT, IV, RM}}) where {IT, IV, RM} = RM
 
 function Adapt.adapt_structure(to, bc::NeumannBCContainer)
@@ -81,34 +83,55 @@ struct NeumannBCs{
   IV      <: AbstractVector{IT},
   RM      <: AbstractMatrix{<:SVector},
 } <: AbstractBCs{BCFuncs}
+  block_id_to_bc::NTuple{MAX_BLOCKS, IT}
   bc_caches::Vector{NeumannBCContainer{IT, IV, RM}}
   bc_funcs::BCFuncs
-  block_ids::Vector{Int}
+  block_ids::Vector{IT}
   block_names::Vector{String}
-end
 
-# note this method has the potential to make 
-# bookkeeping.dofs and bookkeeping.nodes nonsensical
-# since we're splitting things off but not properly updating
-# these to match the current nodes and sides
-# TODO modify method to actually properly update
-# nodes and dofs
-
-# TODO below method also currently likely doesn't
-# handle blocks correclty 
-function NeumannBCs(mesh, dof::DofManager, neumann_bcs::Vector{NeumannBC})
-  if length(neumann_bcs) == 0
+  function NeumannBCs(mesh, dof::DofManager, neumann_bcs)
+    temp = NeumannBCs{Function}(mesh, dof, neumann_bcs)
+    funcs = map(x -> NeumannBCFunction(x.func), neumann_bcs)
+    return NeumannBCs(
+      temp.block_id_to_bc,
+      temp.bc_caches,
+      funcs,
+      temp.block_ids,
+      temp.block_names
+    )
+  end
+
+  function NeumannBCs{F}(mesh, dof::DofManager, neumann_bcs) where F
     ND = size(dof, 1)
     bc_caches = NeumannBCContainer{Int, Vector{Int}, Matrix{SVector{ND, Float64}}}[]
-    return NeumannBCs(bc_caches, NeumannBCFunction[], Int[], String[])
-  end
+    if length(neumann_bcs) == 0
+      block_id_to_bc = ntuple(i -> -1, Val(MAX_BLOCKS))
+      return NeumannBCs(block_id_to_bc, bc_caches, NeumannBCFunction{F}[], Int[], String[])
+    end
 
-  new_bcs, new_funcs, block_ids, block_names = _setup_weakly_enforced_bc_container(mesh, dof, neumann_bcs, NeumannBCContainer)
+    bc_funcs = map(bc -> NeumannBCFunction{F}(bc.func), neumann_bcs)
+    block_ids = Int[]
+    block_names = String[]
+    for bc in neumann_bcs
+      block_id, block_name, conns, elements, sides, vals = _setup_sideset(mesh, dof, bc)
+      push!(bc_caches, NeumannBCContainer(conns, elements, sides, vals))
+      push!(block_ids, block_id)
+      push!(block_names, block_name)
+    end
+    block_id_to_bc = ntuple(
+      i -> i <= length(block_ids) ? 
+      block_ids[i] : 
+      -1,
+      Val(MAX_BLOCKS)
+    )
+    return NeumannBCs(block_id_to_bc, bc_caches, bc_funcs, block_ids, block_names)
+  end
 
-  # TODO fix this up eventually
-  new_bcs = convert(Vector{typeof(new_bcs[1])}, new_bcs)
-  new_funcs = NeumannBCFunction.(new_funcs)
-  return NeumannBCs(new_bcs, new_funcs, block_ids, block_names)
+  function NeumannBCs(block_id_to_bc, bc_caches, bc_funcs, block_ids, block_names)
+    new{typeof(bc_funcs), eltype(block_ids), _indices_type(bc_caches), _vals_type(bc_caches)}(
+      block_id_to_bc, bc_caches, bc_funcs, block_ids, block_names
+    )
+  end
 end
 
 function Adapt.adapt_structure(to, bcs::NeumannBCs)
@@ -124,6 +147,7 @@ function Adapt.adapt_structure(to, bcs::NeumannBCs)
     bc_caches = NeumannBCContainer{Int, typeof(temp_int), typeof(temp_vals)}[]
   end
   return NeumannBCs(
+    bcs.block_id_to_bc,
     bc_caches,
     bcs.bc_funcs,
     bcs.block_ids,
@@ -133,12 +157,15 @@ end
 
 function update_bc_values!(bcs::NeumannBCs, assembler, X, t)
   fspace = function_space(assembler.dof)
-  for n in axes(bcs.block_ids, 1)
-    bc = bcs.bc_caches[n]
-    block_id = bcs.block_ids[n]
-    func = bcs.bc_funcs[n]
-    ref_fe = block_reference_element(fspace, block_id)
-    _update_bc_values!(bc.vals, func, ref_fe, bc.element_conns.data, bc.sides, X, t)
+  foreach_block(fspace) do ref_fe, b
+    block_id = bcs.block_id_to_bc[b]
+    if block_id == -1
+      # do nothing
+    else
+      cache = bcs.bc_caches[block_id]
+      func = bcs.bc_funcs[block_id]
+      _update_bc_values!(cache.vals, func, ref_fe, cache.element_conns.data, cache.sides, X, t)
+    end
   end
   return nothing
 end
diff --git a/src/bcs/RobinBCs.jl b/src/bcs/RobinBCs.jl
index 865bf54..c8bd9b6 100644
--- a/src/bcs/RobinBCs.jl
+++ b/src/bcs/RobinBCs.jl
@@ -86,30 +86,38 @@ function _update_bc_values!(vals, dvalsdu, func, ref_fe, conns, sides, X, t, U)
 end
 
 struct RobinBCs{
-    BCFuncs,
-    IT       <: Integer,
-    IV       <: AbstractVector{IT},
-    RM       <: AbstractMatrix{<:SVector},
-    dRM      <: AbstractMatrix{<:SMatrix}  
+  BCFuncs,
+  IT       <: Integer,
+  IV       <: AbstractVector{IT},
+  RM       <: AbstractMatrix{<:SVector},
+  dRM      <: AbstractMatrix{<:SMatrix}  
 } <: AbstractBCs{BCFuncs}
-    bc_caches::Vector{RobinBCContainer{IT, IV, RM, dRM}}
-    bc_funcs::BCFuncs
-    block_ids::Vector{Int}
-    block_names::Vector{String}
+  block_id_to_bc::NTuple{MAX_BLOCKS, IT}
+  bc_caches::Vector{RobinBCContainer{IT, IV, RM, dRM}}
+  bc_funcs::BCFuncs
+  block_ids::Vector{Int}
+  block_names::Vector{String}
 end
 
 function RobinBCs(mesh, dof::DofManager, robin_bcs::Vector{RobinBC})
-    if length(robin_bcs) == 0
-      ND = size(dof, 1)
-      bc_caches = RobinBCContainer{Int, Vector{Int}, Matrix{SVector{ND, Float64}}, Matrix{SMatrix{ND, ND, Float64, ND * ND}}}[]
-      return RobinBCs(bc_caches, RobinBCFunction[], Int[], String[])
-    end
+  if length(robin_bcs) == 0
+    ND = size(dof, 1)
+    bc_caches = RobinBCContainer{Int, Vector{Int}, Matrix{SVector{ND, Float64}}, Matrix{SMatrix{ND, ND, Float64, ND * ND}}}[]
+    block_id_to_bc = ntuple(i -> -1, Val(MAX_BLOCKS))
+    return RobinBCs(block_id_to_bc, bc_caches, RobinBCFunction[], Int[], String[])
+  end
 
-    new_bcs, new_funcs, block_ids, block_names = _setup_weakly_enforced_bc_container(mesh, dof, robin_bcs, RobinBCContainer)
-    # TODO fix this up eventually
-    new_bcs = convert(Vector{typeof(new_bcs[1])}, new_bcs)
-    new_funcs = RobinBCFunction.(new_funcs)
-    return RobinBCs(new_bcs, new_funcs, block_ids, block_names)
+  new_bcs, new_funcs, block_ids, block_names = _setup_weakly_enforced_bc_container(mesh, dof, robin_bcs, RobinBCContainer)
+  # TODO fix this up eventually
+  new_bcs = convert(Vector{typeof(new_bcs[1])}, new_bcs)
+  new_funcs = RobinBCFunction.(new_funcs)
+  block_id_to_bc = ntuple(
+      i -> i <= length(block_ids) ? 
+      block_ids[i] : 
+      -1,
+      Val(MAX_BLOCKS)
+    )
+  return RobinBCs(block_id_to_bc, new_bcs, new_funcs, block_ids, block_names)
 end
 
 function Adapt.adapt_structure(to, bcs::RobinBCs)
@@ -126,6 +134,7 @@ function Adapt.adapt_structure(to, bcs::RobinBCs)
     bc_caches = RobinBCContainer{Int, typeof(temp_int), typeof(temp_vals), typeof(temp_dvals)}[]
   end
   return RobinBCs(
+    bcs.block_id_to_bc,
     bc_caches,
     bcs.bc_funcs,
     bcs.block_ids,
@@ -135,12 +144,15 @@ end
 
 function update_bc_values!(bcs::RobinBCs, assembler, X, t, U)
   fspace = function_space(assembler.dof)
-  for n in axes(bcs.block_ids, 1)
-    bc = values(bcs.bc_caches)[n]
-    block_id = bcs.block_ids[n]
-    func = values(bcs.bc_funcs)[n]
-    ref_fe = block_reference_element(fspace, block_id)
-    _update_bc_values!(bc.vals, bc.dvalsdu, func, ref_fe, bc.element_conns.data, bc.sides, X, t, U)
+  foreach_block(fspace) do ref_fe, b
+    block_id = bcs.block_id_to_bc[b]
+    if block_id == -1
+      # do nothing
+    else
+      cache = bcs.bc_caches[block_id]
+      func = bcs.bc_funcs[block_id]
+      _update_bc_values!(cache.vals, cache.dvalsdu, func, ref_fe, cache.element_conns.data, cache.sides, X, t, U)
+    end
   end
   return nothing
 end
diff --git a/src/bcs/Sources.jl b/src/bcs/Sources.jl
index 9916676..c537037 100644
--- a/src/bcs/Sources.jl
+++ b/src/bcs/Sources.jl
@@ -32,9 +32,7 @@ volume quadrature points for one block.
   TODO remove element_conns and ref_fes
   and have assemblers just ping the correct block
 """
-struct SourceContainer{
-  RM <: AbstractMatrix{<:SVector}
-}
+struct SourceContainer{RM <: AbstractMatrix{<:SVector}}
   vals::RM                    # size (NQ_cell, nelem) of SVector{ND}
   is_constant::Bool           # skip re-evaluation after first call
   initialized::Base.RefValue{Bool}
@@ -85,24 +83,90 @@ struct Sources{
   SourceFuncs,
   RM           <: AbstractMatrix{<:SVector},
 } <: AbstractBCs{SourceFuncs}
+  block_id_to_source::NTuple{MAX_BLOCKS, Int}
   source_block_ids::Vector{Int} # note this is the id order in FEC not the id in exodus
   source_block_names::Vector{String}
   source_caches::Vector{SourceContainer{RM}}
   source_funcs::SourceFuncs
 
   function Sources{SourceFuncs, RM}(
-    source_block_ids, source_block_names, source_caches, source_funcs
+    block_id_to_source, source_block_ids, source_block_names, source_caches, source_funcs
   ) where {SourceFuncs, RM}
-    new{SourceFuncs, RM}(source_block_ids, source_block_names, source_caches, source_funcs)
+    new{SourceFuncs, RM}(block_id_to_source, source_block_ids, source_block_names, source_caches, source_funcs)
   end
 
-  # TODO should we really allow for scalar funcs for this in the case of scalar variables?
+  # # TODO should we really allow for scalar funcs for this in the case of scalar variables?
+  # function Sources(mesh, dof::DofManager, sources::Vector{Source}, ::Type{RT} = Float64) where RT <: Number
+  #   ND = size(dof, 1)
+  #   caches = SourceContainer{Matrix{SVector{ND, RT}}}[]
+  #   funcs = SourceFunction[]
+  #   if length(sources) == 0
+  #     block_id_to_source = ntuple(i -> -1, Val(MAX_BLOCKS))
+  #     return Sources{typeof(funcs), Matrix{SVector{ND, RT}}}(
+  #       block_id_to_source, Int[], String[], SourceContainer{Matrix{SVector{ND, RT}}}[], funcs
+  #     )
+  #   end
+
+  #   fspace = function_space(dof)
+  #   ND = length(dof.var)
+  #   source_block_ids = Int[]
+  #   source_block_names = String[]
+  #   for source in sources
+  #     block_name = source.block_name
+  #     block_id = findfirst(x -> x == block_name, mesh.element_block_names)
+  #     push!(source_block_ids, block_id)
+  #     push!(source_block_names, block_name)
+  #     NQ, NE = block_quadrature_size(fspace, block_id)
+  #     # conns = Connectivity([conns_full])
+  #     # if ND == 1
+  #     #   vals = zeros(Float64, NQ, nelem)
+  #     # else
+  #     elements = fspace.elem_id_maps[block_id]
+  #     vals  = zeros(SVector{ND, Float64}, NQ, NE)
+  #     # REMOVE me
+  #     @assert length(elements) == size(vals, 2)
+  #     # end
+
+  #     # Detect constant: the is_constant flag is set by the caller (Carina)
+  #     # via the Source struct. For now, default to false (re-evaluate every step).
+  #     is_constant = false
+
+  #     push!(caches, SourceContainer(vals, is_constant, Ref(false)))
+  #     push!(funcs, SourceFunction(source.func))
+  #   end
+
+  #   block_id_to_source = ntuple(
+  #     i -> i <= length(source_block_ids) ? 
+  #     source_block_ids[i] : 
+  #     -1,
+  #     Val(MAX_BLOCKS)
+  #   )
+  #   # return Sources(source_block_ids, source_block_names, caches, funcs)
+  #   return Sources{typeof(funcs), Matrix{SVector{ND, RT}}}(
+  #     block_id_to_source, source_block_ids, source_block_names, caches, funcs
+  #   )
+  # end
   function Sources(mesh, dof::DofManager, sources::Vector{Source}, ::Type{RT} = Float64) where RT <: Number
+    temp = Sources{Function}(mesh, dof, sources)
+    # need to do below for typesafe funcs on GPU
+    funcs = map(x -> SourceFunction(x.func), sources)
+    return Sources{typeof(funcs), _vals_type(temp.source_caches)}(
+      temp.block_id_to_source,
+      temp.source_block_ids,
+      temp.source_block_names,
+      temp.source_caches,
+      funcs
+    )
+  end
+
+  function Sources{F}(mesh, dof::DofManager, sources) where F
     ND = size(dof, 1)
-    caches = SourceContainer{Matrix{SVector{ND, RT}}}[]
-    funcs = SourceFunction[]
+    RM = Matrix{SVector{ND, Float64}}
+    caches = SourceContainer{RM}[]
+    funcs = SourceFunction{F}[]
     if length(sources) == 0
-      return Sources{typeof(funcs), Matrix{SVector{ND, RT}}}(Int[], String[], SourceContainer{Matrix{SVector{ND, RT}}}[], funcs)
+      block_id_to_source = ntuple(i -> -1, Val(MAX_BLOCKS))
+      return Sources{typeof(funcs), RM}(block_id_to_source, Int[], String[], SourceContainer{RM}[], funcs)
     end
 
     fspace = function_space(dof)
@@ -127,12 +191,18 @@ struct Sources{
       is_constant = false
 
       push!(caches, SourceContainer(vals, is_constant, Ref(false)))
-      push!(funcs, SourceFunction(source.func))
+      push!(funcs, SourceFunction{F}(source.func))
     end
 
+    block_id_to_source = ntuple(
+      i -> i <= length(source_block_ids) ? 
+      source_block_ids[i] : 
+      -1,
+      Val(MAX_BLOCKS)
+    )
     # return Sources(source_block_ids, source_block_names, caches, funcs)
-    return Sources{typeof(funcs), Matrix{SVector{ND, RT}}}(
-      source_block_ids, source_block_names, caches, funcs
+    return Sources{typeof(funcs), RM}(
+      block_id_to_source, source_block_ids, source_block_names, caches, funcs
     )
   end
 end
@@ -141,11 +211,12 @@ function Adapt.adapt(to, sources::Sources{SourceFuncs, RM}) where {SourceFuncs,
   if length(sources.source_caches) > 0
     caches = map(x -> adapt(to, x), sources.source_caches)
   else
-    temp = adapt(to, _vals_type(sources.source_caches)(undef, 0, 0))
-    caches = SourceContainer{typeof(temp)}[]
+    temp_floats = adapt(to, _vals_type(sources.source_caches)(undef, 0, 0))
+    caches = SourceContainer{typeof(temp_floats)}[]
   end
 
   return Sources{SourceFuncs, _vals_type(caches)}(
+    sources.block_id_to_source,
     sources.source_block_ids,
     sources.source_block_names,
     caches,
@@ -165,19 +236,22 @@ end
 
 function update_source_values!(sources::Sources, assembler, X, t)
   fspace = function_space(assembler)
-  for n in axes(sources.source_block_ids, 1)
-    block_id = sources.source_block_ids[n]
-    cache = sources.source_caches[n]
-    if cache.is_constant && cache.initialized[]
-      continue
+  foreach_block(fspace) do ref_fe, b
+    block_id = sources.block_id_to_source[b]
+    if block_id == -1
+      # do nothing, can't use continue since we're in foreach_block
+    else
+      cache = sources.source_caches[block_id]
+      if cache.is_constant && cache.initialized[]
+        # do nothing
+      else
+        func = sources.source_funcs[block_id]
+        conns_data = fspace.elem_conns.data
+        coffset = fspace.elem_conns.offsets[block_id]
+        _update_source_values!(cache.vals, func, ref_fe, conns_data, coffset, X, t)
+        cache.initialized[] = true
+      end
     end
-
-    func = sources.source_funcs[n]
-    ref_fe = block_reference_element(fspace, block_id)
-    conns_data = fspace.elem_conns.data
-    coffset = fspace.elem_conns.offsets[block_id]
-    _update_source_values!(cache.vals, func, ref_fe, conns_data, coffset, X, t)
-    cache.initialized[] = true
   end
   return nothing
 end
diff --git a/src/meshes/Exodus.jl b/src/meshes/Exodus.jl
index 61cbc3e..ef328f3 100644
--- a/src/meshes/Exodus.jl
+++ b/src/meshes/Exodus.jl
@@ -5,6 +5,12 @@ function copy_mesh(mesh, file_2::String, ::Type{ExodusMesh})
   return nothing
 end
 
+function copy_mesh(mesh, file_2::String, ::Type{ExodusMesh}, exo_type::Type{ExodusDatabase{M, I, B, F}}) where {M, I, B, F}
+  file_1 = mesh.mesh_obj.file_name
+  Exodus.copy_mesh(exo_type, file_1, file_2)
+  return nothing
+end
+
 """
 $(TYPEDEF)
 $(TYPEDFIELDS)
@@ -191,7 +197,17 @@ function PostProcessor(
   end
   Exodus.write_time(exo, 1, 0.0)
   
-  return PostProcessor(exo)
+  return PostProcessor(
+    file_name, exo,
+    nodal_var_names, all_el_var_names, String[]
+  )
+end
+
+function PostProcessor{O}(::Type{<:ExodusMesh}, mesh, output_file) where O <: ExodusDatabase
+  copy_mesh(mesh, output_file, ExodusMesh, O)
+  exo = O(output_file, "rw")
+  # return PostProcessor{O}(output_file, exo, String[], String[], String[])
+  return PostProcessor(output_file, exo, String[], String[], String[])
 end
 
 function write_field(
diff --git a/test/TestFields.jl b/test/TestFields.jl
index 5da76d3..871cabe 100644
--- a/test/TestFields.jl
+++ b/test/TestFields.jl
@@ -16,6 +16,10 @@
       15 18 21 24 27
     ]
   ]
+  els_in = [
+    [1, 2, 3],
+    [4, 5, 6, 7, 8]
+  ]
   conn = Connectivity(conns_in)
   # testing block view
   block_conn = connectivity(conn, 1)