fix nobs var check in update_observed, keep previous args in update_observed, fix bs + tests

Maximilian-Stefan-Ernst · Maximilian-Stefan-Ernst · commit 24a79470657e · 2026-03-12T11:05:39.000+01:00
diff --git a/src/frontend/fit/standard_errors/bootstrap.jl b/src/frontend/fit/standard_errors/bootstrap.jl
@@ -1,10 +1,10 @@
 """
     bootstrap(
-        fitted::SemFit;
+        fitted::SemFit,
+        specification::SemSpecification;
         statistic = solution,
         n_boot = 3000,
         data = nothing,
-        specification = nothing,
         engine = :Optim,
         parallel = false,
         fit_kwargs = Dict(),
@@ -14,12 +14,11 @@ Return bootstrap samples for `statistic`.
 
 # Arguments
 - `fitted`: a fitted SEM.
+- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
 - `statistic`: any function that can be called on a `SemFit` object.
   The output will be returned as the bootstrap sample.
 - `n_boot`: number of boostrap samples
 - `data`: data to sample from. Only needed if different than the data from `sem_fit`
-- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
-   Necessary for FIML / WLS models.
 - `engine`: optimizer engine, passed to `fit`.
 - `parallel`: if `true`, run bootstrap samples in parallel on all available threads.
   The number of threads is controlled by the `JULIA_NUM_THREADS` environment variable or
@@ -40,11 +39,11 @@ bootstrap(
 ```
 """
 function bootstrap(
-    fitted::SemFit;
+    fitted::SemFit,
+    specification::SemSpecification;
     statistic = solution,
     n_boot = 3000,
     data = nothing,
-    specification = nothing,
     engine = :Optim,
     parallel = false,
     fit_kwargs = Dict(),
@@ -56,6 +55,7 @@ function bootstrap(
     # pre-allocations
     out = []
     conv = []
+    errors = []
     n_failed = Ref(0)
     # fit to bootstrap samples
     if !parallel
@@ -73,8 +73,9 @@ function bootstrap(
                 c = converged(new_fit)
                 push!(out, sample)
                 push!(conv, c)
-            catch
+            catch e
                 n_failed[] += 1
+                push!(errors, e)
             end
         end
     else
@@ -103,9 +104,10 @@ function bootstrap(
                     push!(out, sample)
                     push!(conv, c)
                 end
-            catch
+            catch e
                 lock(lk) do
                     n_failed[] += 1
+                    push!(errors, e)
                 end
             finally
                 put!(model_pool, thread_model)
@@ -119,19 +121,19 @@ function bootstrap(
     return Dict(
         :samples => out,
         :n_boot => n_boot,
-        :n_converged => sum(conv),
+        :n_converged => isempty(conv) ? 0 : sum(conv),
         :converged => conv,
         :n_errored => n_failed[],
+        :errors => errors
     )
 end
 
 """
     se_bootstrap(
-        fitted::SemFit;
+        fitted::SemFit,
+        specification::SemSpecification;
         n_boot = 3000,
         data = nothing,
-        specification = nothing,
-        engine = :Optim,
         parallel = false,
         fit_kwargs = Dict(),
         replace_kwargs = Dict())
@@ -140,10 +142,9 @@ Return bootstrap standard errors.
 
 # Arguments
 - `fitted`: a fitted SEM.
+- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
 - `n_boot`: number of boostrap samples
 - `data`: data to sample from. Only needed if different than the data from `sem_fit`
-- `specification`: a `ParameterTable` or `RAMMatrices` object passed to `replace_observed`.
-   Necessary for FIML / WLS models.
 - `engine`: optimizer engine, passed to `fit`.
 - `parallel`: if `true`, run bootstrap samples in parallel on all available threads.
   The number of threads is controlled by the `JULIA_NUM_THREADS` environment variable or
@@ -165,10 +166,10 @@ se_bootstrap(
 ```
 """
 function se_bootstrap(
-    fitted::SemFit;
+    fitted::SemFit,
+    specification::SemSpecification;
     n_boot = 3000,
     data = nothing,
-    specification = nothing,
     engine = :Optim,
     parallel = false,
     fit_kwargs = Dict(),
diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl
@@ -196,9 +196,13 @@ end
 ############################################################################################
 
 function update_observed(implied::RAM, observed::SemObserved; kwargs...)
-    if nobserved_vars(observed) == size(implied.Σ, 1)
+    if nobserved_vars(observed) == nobserved_vars(implied)
         return implied
     else
-        return RAM(; observed = observed, kwargs...)
+        return RAM(;
+            observed = observed,
+            gradient_required = !isnothing(implied.∇A),
+            meanstructure = MeanStruct(implied) == HasMeanStruct,
+            kwargs...)
     end
 end
diff --git a/src/implied/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl
@@ -210,10 +210,17 @@ end
 ############################################################################################
 
 function update_observed(implied::RAMSymbolic, observed::SemObserved; kwargs...)
-    if nobserved_vars(observed) == size(implied.Σ, 1)
+    if nobserved_vars(observed) == nobserved_vars(implied)
         return implied
     else
-        return RAMSymbolic(; observed = observed, kwargs...)
+        return RAMSymbolic(;
+        observed = observed,
+        vech = implied.Σ isa Vector,
+        gradient = !isnothing(implied.∇Σ),
+        hessian = !isnothing(implied.∇²Σ),
+        meanstructure = MeanStruct(implied) == HasMeanStruct,
+        approximate_hessian = isnothing(implied.∇²Σ),
+        kwargs...)
     end
 end
 
diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl
@@ -237,6 +237,9 @@ function update_observed(lossfun::SemML, observed::SemObserved; kwargs...)
     if size(lossfun.Σ⁻¹) == size(obs_cov(observed))
         return lossfun
     else
-        return SemML(; observed = observed, kwargs...)
+        return SemML(;
+        observed = observed,
+        approximate_hessian = HessianEval(lossfun) == ApproxHessian,
+        kwargs...)
     end
 end
diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl
@@ -173,5 +173,7 @@ end
 ### Recommended methods
 ############################################################################################
 
-update_observed(lossfun::SemWLS, observed::SemObserved; kwargs...) =
-    SemWLS(; observed = observed, kwargs...)
+update_observed(lossfun::SemWLS, observed::SemObserved; kwargs...) = SemWLS(;
+    observed = observed,
+    meanstructure = MeanStruct(kwargs[:implied]) == HasMeanStruct,
+    kwargs...)
diff --git a/test/examples/helper.jl b/test/examples/helper.jl
@@ -136,17 +136,28 @@ function test_estimates(
     end
 end
 
-function test_bootstrap(model_fit, spec; n_boot = 500)
+function test_bootstrap(model_fit, spec; compare_hessian = true, compare_bs = true, n_boot = 500)
+    se_bs = se_bootstrap(model_fit, spec; n_boot = n_boot)
     # hessian and bootstrap se are close
-    se_he = se_hessian(model_fit)
-    se_bs = se_bootstrap(model_fit; specification = spec, n_boot = n_boot)
-    @test isapprox(se_bs, se_he, rtol = 0.2)
+    if compare_hessian
+        se_he = se_hessian(model_fit)
+        @test isapprox(se_bs, se_he, rtol = 0.2)
+    end
     # se_bootstrap and bootstrap |> se are close
-    bs_samples = bootstrap(model_fit; specification = spec, n_boot = n_boot)
-    @test bs_samples[:n_converged] > 0.95*n_boot
-    bs_samples = cat(bs_samples[:samples][BitVector(bs_samples[:converged])]..., dims = 2)
-    se_bs_2 = sqrt.(var(bs_samples, corrected = false, dims = 2))
-    @test isapprox(se_bs_2, se_bs, rtol = 0.05)
+    if compare_bs
+        bs_samples = bootstrap(model_fit, spec; n_boot = n_boot)
+        @test bs_samples[:n_converged] > 0.95*n_boot
+        bs_samples = cat(bs_samples[:samples][BitVector(bs_samples[:converged])]..., dims = 2)
+        se_bs_2 = sqrt.(var(bs_samples, corrected = false, dims = 2))
+        @test isapprox(se_bs_2, se_bs, rtol = 0.05)
+    end
+end
+
+function smoketest_bootstrap(model_fit, spec; n_boot = 5)
+    # hessian and bootstrap se are close
+    se_bs = se_bootstrap(model_fit, spec; n_boot = n_boot)
+    bs_samples = bootstrap(model_fit, spec; n_boot = n_boot)
+    return se_bs, bs_samples
 end
 
 function smoketest_CI_z(model_fit, partable)
diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl
@@ -293,7 +293,7 @@ end
         lav_col = :se,
         lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),
     )
-    test_bootstrap(solution_ls, partable)
+    test_bootstrap(solution_ls, partable; compare_bs = false)
     smoketest_CI_z(solution_ls, partable)
 end
 
diff --git a/test/examples/multigroup/multigroup.jl b/test/examples/multigroup/multigroup.jl
@@ -1,6 +1,9 @@
 using StructuralEquationModels, Test, FiniteDiff, Suppressor
 using LinearAlgebra: diagind, LowerTriangular
 using Statistics: var
+using Random
+
+Random.seed!(948723)
 
 const SEM = StructuralEquationModels
 
diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
@@ -161,7 +161,7 @@ end
         lav_col = :se,
     )
 
-    test_bootstrap(solution_ls, partable)
+    test_bootstrap(solution_ls, partable; compare_bs = false)
     smoketest_CI_z(solution_ls, partable)
 end
 
@@ -373,7 +373,8 @@ end
         lav_col = :se,
     )
 
-    test_bootstrap(solution_ls, partable_mean)
+    test_bootstrap(solution_ls, partable_mean, compare_bs = false)
+    # smoketest_bootstrap(solution_ls, partable_mean)
     smoketest_CI_z(solution_ls, partable_mean)
 end
 
@@ -507,6 +508,7 @@ end
         lav_col = :se,
     )
 
-    test_bootstrap(solution_ml, partable_mean)
+    # test_bootstrap(solution_ml, partable_mean) # too much compute
+    smoketest_bootstrap(solution_ml, partable_mean)
     smoketest_CI_z(solution_ml, partable_mean)
 end
diff --git a/test/examples/political_democracy/political_democracy.jl b/test/examples/political_democracy/political_democracy.jl
@@ -2,6 +2,8 @@ using StructuralEquationModels, Test, Suppressor, FiniteDiff
 using Statistics: cov, mean, var
 using Random, NLopt
 
+Random.seed!(464577)
+
 SEM = StructuralEquationModels
 
 include(

Original file line number	Diff line number	Diff line change
`@@ -293,7 +293,7 @@ end`
`293`	`293`	`lav_col = :se,`
`294`	`294`	`lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),`
`295`	`295`	`)`
`296`		`- test_bootstrap(solution_ls, partable)`
	`296`	`+ test_bootstrap(solution_ls, partable; compare_bs = false)`
`297`	`297`	`smoketest_CI_z(solution_ls, partable)`
`298`	`298`	`end`
`299`	`299`