diff --git a/Project.toml b/Project.toml index 2fa168a9e..2b0075e39 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StructuralEquationModels" uuid = "383ca8c5-e4ff-4104-b0a9-f7b279deed53" authors = ["Maximilian Ernst", "Aaron Peikert"] -version = "0.2.4" +version = "0.3.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -12,20 +12,21 @@ LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c" -NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StenoGraphs = "78862bba-adae-4a83-bb4d-33c106177f81" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" +SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b" [compat] -julia = "1.9, 1.10" -StenoGraphs = "0.2" +julia = "1.9, 1.10, 1.11" +StenoGraphs = "0.2 - 0.3, 0.4.1 - 0.5" DataFrames = "1" Distributions = "0.25" FiniteDiff = "2" @@ -34,11 +35,21 @@ NLSolversBase = "7" NLopt = "0.6, 1" Optim = "1" PrettyTables = "2" +ProximalAlgorithms = "0.7" StatsBase = "0.33, 0.34" -Symbolics = "4, 5" +Symbolics = "4, 5, 6" +SymbolicUtils = "1.4 - 1.5, 1.7, 2, 3" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test"] + +[weakdeps] +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" +ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" + +[extensions] +SEMNLOptExt = "NLopt" +SEMProximalOptExt = "ProximalAlgorithms" diff --git a/README.md b/README.md index 3eeafd332..9754a8c20 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,11 @@ It is still *in development*. Models you can fit include - Linear SEM that can be specified in RAM (or LISREL) notation - ML, GLS and FIML estimation -- Regularization +- Regularized SEM (Ridge, Lasso, L0, ...) - Multigroup SEM - Sums of arbitrary loss functions (everything the optimizer can handle). -# What are the merrits? +# What are the merits? We provide fast objective functions, gradients, and for some cases hessians as well as approximations thereof. As a user, you can easily define custom loss functions. @@ -35,6 +35,7 @@ The package makes use of - Symbolics.jl for symbolically precomputing parts of the objective and gradients to generate fast, specialized functions. - SparseArrays.jl to speed up symbolic computations. - Optim.jl and NLopt.jl to provide a range of different Optimizers/Linesearches. +- ProximalAlgorithms.jl for regularization. - FiniteDiff.jl and ForwardDiff.jl to provide gradients for user-defined loss functions. # At the moment, we are still working on: diff --git a/docs/Project.toml b/docs/Project.toml index 9da7f0ab4..42f6718a9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,6 @@ [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" +ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" diff --git a/docs/make.jl b/docs/make.jl index 4a55d55ce..4542cf48f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -32,7 +32,7 @@ makedocs( "Developer documentation" => [ "Extending the package" => "developer/extending.md", "Custom loss functions" => "developer/loss.md", - "Custom imply types" => "developer/imply.md", + "Custom implied types" => "developer/implied.md", "Custom optimizer types" => "developer/optimizer.md", "Custom observed types" => "developer/observed.md", "Custom model types" => "developer/sem.md", diff --git a/docs/src/assets/concept.svg b/docs/src/assets/concept.svg index 2a7a0b42a..fa222a0d9 100644 --- a/docs/src/assets/concept.svg +++ b/docs/src/assets/concept.svg @@ -7,38 +7,12 @@ stroke-linecap="square" stroke-miterlimit="10" id="svg57" - sodipodi:docname="Unbenannte Präsentation.svg" width="610.56537" height="300.26614" - inkscape:version="1.1.2 (0a00cf5339, 2022-02-04, custom)" - xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" - xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns="http://www.w3.org/2000/svg" xmlns:svg="http://www.w3.org/2000/svg"> - - Vector of parameter labels +- `nparams(::RAMSymbolic)` -> Number of parameters + +## Implementation +Subtype of `SemImplied`. +""" +struct ImpliedEmpty{A, B, C} <: SemImplied + hessianeval::A + meanstruct::B + ram_matrices::C +end + +############################################################################################ +### Constructors +############################################################################################ + +function ImpliedEmpty(;specification, meanstruct = NoMeanStruct(), hessianeval = ExactHessian(), kwargs...) + return ImpliedEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification)) +end + +############################################################################################ +### methods +############################################################################################ + +update!(targets::EvaluationTargets, implied::ImpliedEmpty, par, model) = nothing + +############################################################################################ +### Recommended methods +############################################################################################ + +update_observed(implied::ImpliedEmpty, observed::SemObserved; kwargs...) = implied +``` + +As you see, similar to [Custom loss functions](@ref) we implement a method for `update_observed`. \ No newline at end of file diff --git a/docs/src/developer/imply.md b/docs/src/developer/imply.md deleted file mode 100644 index 44e0f6ff4..000000000 --- a/docs/src/developer/imply.md +++ /dev/null @@ -1,87 +0,0 @@ -# Custom imply types - -We recommend to first read the part [Custom loss functions](@ref), as the overall implementation is the same and we will describe it here more briefly. - -Imply types are of subtype `SemImply`. To implement your own imply type, you should define a struct - -```julia -struct MyImply <: SemImply - ... -end -``` - -and at least a method to compute the objective - -```julia -import StructuralEquationModels: objective! - -function objective!(imply::MyImply, par, model::AbstractSemSingle) - ... - return nothing -end -``` - -This method should compute and store things you want to make available to the loss functions, and returns `nothing`. For example, as we have seen in [Second example - maximum likelihood](@ref), the `RAM` imply type computes the model-implied covariance matrix and makes it available via `Σ(imply)`. -To make stored computations available to loss functions, simply write a function - for example, for the `RAM` imply type we defined - -```julia -Σ(imply::RAM) = imply.Σ -``` - -Additionally, you can specify methods for `gradient` and `hessian` as well as the combinations described in [Custom loss functions](@ref). - -The last thing nedded to make it work is a method for `n_par` that takes your imply type and returns the number of parameters of the model: - -```julia -n_par(imply::MyImply) = ... -``` - -Just as described in [Custom loss functions](@ref), you may define a constructor. Typically, this will depend on the `specification = ...` argument that can be a `ParameterTable` or a `RAMMatrices` object. - -We implement an `ImplyEmpty` type in our package that does nothing but serving as an imply field in case you are using a loss function that does not need any imply type at all. You may use it as a template for defining your own imply type, as it also shows how to handle the specification objects: - -```julia -############################################################################ -### Types -############################################################################ - -struct ImplyEmpty{V, V2} <: SemImply - identifier::V2 - n_par::V -end - -############################################################################ -### Constructors -############################################################################ - -function ImplyEmpty(; - specification, - kwargs...) - - ram_matrices = RAMMatrices(specification) - identifier = StructuralEquationModels.identifier(ram_matrices) - - n_par = length(ram_matrices.parameters) - - return ImplyEmpty(identifier, n_par) -end - -############################################################################ -### methods -############################################################################ - -objective!(imply::ImplyEmpty, par, model) = nothing -gradient!(imply::ImplyEmpty, par, model) = nothing -hessian!(imply::ImplyEmpty, par, model) = nothing - -############################################################################ -### Recommended methods -############################################################################ - -identifier(imply::ImplyEmpty) = imply.identifier -n_par(imply::ImplyEmpty) = imply.n_par - -update_observed(imply::ImplyEmpty, observed::SemObserved; kwargs...) = imply -``` - -As you see, similar to [Custom loss functions](@ref) we implement a method for `update_observed`. Additionally, you should store the `identifier` from the specification object and write a method for `identifier`, as this will make it possible to access parameter indices by label. \ No newline at end of file diff --git a/docs/src/developer/loss.md b/docs/src/developer/loss.md index 8bd654bf1..931c2d0e5 100644 --- a/docs/src/developer/loss.md +++ b/docs/src/developer/loss.md @@ -20,17 +20,22 @@ end ``` We store the hyperparameter α and the indices I of the parameters we want to regularize. -Additionaly, we need to define a *method* to compute the objective: +Additionaly, we need to define a *method* of the function `evaluate!` to compute the objective: ```@example loss -import StructuralEquationModels: objective! +import StructuralEquationModels: evaluate! -objective!(ridge::Ridge, par, model::AbstractSemSingle) = ridge.α*sum(par[ridge.I].^2) +evaluate!(objective::Number, gradient::Nothing, hessian::Nothing, ridge::Ridge, model::AbstractSem, par) = + ridge.α * sum(i -> par[i]^2, ridge.I) ``` +The function `evaluate!` recognizes by the types of the arguments `objective`, `gradient` and `hessian` whether it should compute the objective value, gradient or hessian of the model w.r.t. the parameters. +In this case, `gradient` and `hessian` are of type `Nothing`, signifying that they should not be computed, but only the objective value. + That's all we need to make it work! For example, we can now fit [A first model](@ref) with ridge regularization: We first give some parameters labels to be able to identify them as targets for the regularization: + ```@example loss observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] latent_vars = [:ind60, :dem60, :dem65] @@ -60,11 +65,12 @@ graph = @StenoGraph begin end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars +) -parameter_indices = get_identifier_indices([:a, :b, :c], partable) +parameter_indices = getindex.([param_indices(partable)], [:a, :b, :c]) myridge = Ridge(0.01, parameter_indices) model = SemFiniteDiff( @@ -73,7 +79,7 @@ model = SemFiniteDiff( loss = (SemML, myridge) ) -model_fit = sem_fit(model) +model_fit = fit(model) ``` This is one way of specifying the model - we now have **one model** with **multiple loss functions**. Because we did not provide a gradient for `Ridge`, we have to specify a `SemFiniteDiff` model that computes numerical gradients with finite difference approximation. @@ -85,15 +91,23 @@ Note that the last argument to the `objective!` method is the whole model. There By far the biggest improvements in performance will result from specifying analytical gradients. We can do this for our example: ```@example loss -import StructuralEquationModels: gradient! - -function gradient!(ridge::Ridge, par, model::AbstractSemSingle) - gradient = zero(par) - gradient[ridge.I] .= 2*ridge.α*par[ridge.I] - return gradient +function evaluate!(objective, gradient, hessian::Nothing, ridge::Ridge, model::AbstractSem, par) + # compute gradient + if !isnothing(gradient) + fill!(gradient, 0) + gradient[ridge.I] .= 2 * ridge.α * par[ridge.I] + end + # compute objective + if !isnothing(objective) + return ridge.α * sum(i -> par[i]^2, ridge.I) + end end ``` +As you can see, in this method definition, both `objective` and `gradient` can be different from `nothing`. +We then check whether to compute the objective value and/or the gradient with `isnothing(objective)`/`isnothing(gradient)`. +This syntax makes it possible to compute objective value and gradient at the same time, which is beneficial when the the objective and gradient share common computations. + Now, instead of specifying a `SemFiniteDiff`, we can use the normal `Sem` constructor: ```@example loss @@ -103,7 +117,7 @@ model_new = Sem( loss = (SemML, myridge) ) -model_fit = sem_fit(model_new) +model_fit = fit(model_new) ``` The results are the same, but we can verify that the computational costs are way lower (for this, the julia package `BenchmarkTools` has to be installed): @@ -111,53 +125,14 @@ The results are the same, but we can verify that the computational costs are way ```julia using BenchmarkTools -@benchmark sem_fit(model) +@benchmark fit(model) -@benchmark sem_fit(model_new) +@benchmark fit(model_new) ``` The exact results of those benchmarks are of course highly depended an your system (processor, RAM, etc.), but you should see that the median computation time with analytical gradients drops to about 5% of the computation without analytical gradients. -Additionally, you may provide analytic hessians by writing a method of the form - -```julia -function hessian!(ridge::Ridge, par, model::AbstractSemSingle) - ... - return hessian -end -``` - -however, this will only matter if you use an optimization algorithm that makes use of the hessians. Our default algorithmn `LBFGS` from the package `Optim.jl` does not use hessians (for example, the `Newton` algorithmn from the same package does). - -To improve performance even more, you can write a method of the form - -```julia -function objective_gradient!(ridge::Ridge, par, model::AbstractSemSingle) - ... - return objective, gradient -end -``` - -This is beneficial when the computation of the objective and gradient share common computations. For example, in maximum likelihood estimation, the model implied covariance matrix has to be inverted to both compute the objective and gradient. Whenever the optimization algorithmn asks for the objective value and gradient at the same point, we call `objective_gradient!` and only have to do the shared computations - in this case the matrix inversion - once. - -If you want to do hessian-based optimization, there are also the following methods: - -```julia -function objective_hessian!(ridge::Ridge, par, model::AbstractSemSingle) - ... - return objective, hessian -end - -function gradient_hessian!(ridge::Ridge, par, model::AbstractSemSingle) - ... - return gradient, hessian -end - -function objective_gradient_hessian!(ridge::Ridge, par, model::AbstractSemSingle) - ... - return objective, gradient, hessian -end -``` +Additionally, you may provide analytic hessians by writing a respective method for `evaluate!`. However, this will only matter if you use an optimization algorithm that makes use of the hessians. Our default algorithmn `LBFGS` from the package `Optim.jl` does not use hessians (for example, the `Newton` algorithmn from the same package does). ## Convenient @@ -170,7 +145,7 @@ function MyLoss(;arg1 = ..., arg2, kwargs...) end ``` -All keyword arguments that a user passes to the Sem constructor are passed to your loss function. In addition, all previously constructed parts of the model (imply and observed part) are passed as keyword arguments as well as the number of parameters `n_par = ...`, so your constructor may depend on those. For example, the constructor for `SemML` in our package depends on the additional argument `meanstructure` as well as the observed part of the model to pre-allocate arrays of the same size as the observed covariance matrix and the observed mean vector: +All keyword arguments that a user passes to the Sem constructor are passed to your loss function. In addition, all previously constructed parts of the model (implied and observed part) are passed as keyword arguments as well as the number of parameters `n_par = ...`, so your constructor may depend on those. For example, the constructor for `SemML` in our package depends on the additional argument `meanstructure` as well as the observed part of the model to pre-allocate arrays of the same size as the observed covariance matrix and the observed mean vector: ```julia function SemML(;observed, meanstructure = false, approx_H = false, kwargs...) @@ -194,7 +169,7 @@ end ### Update observed data If you are planing a simulation study where you have to fit the **same model** to many **different datasets**, it is computationally beneficial to not build the whole model completely new everytime you change your data. -Therefore, we provide a function to update the data of your model, `swap_observed(model(semfit); data = new_data)`. However, we can not know beforehand in what way your loss function depends on the specific datasets. The solution is to provide a method for `update_observed`. Since `Ridge` does not depend on the data at all, this is quite easy: +Therefore, we provide a function to update the data of your model, `replace_observed(model(semfit); data = new_data)`. However, we can not know beforehand in what way your loss function depends on the specific datasets. The solution is to provide a method for `update_observed`. Since `Ridge` does not depend on the data at all, this is quite easy: ```julia import StructuralEquationModels: update_observed @@ -220,9 +195,9 @@ To keep it simple, we only cover models without a meanstructure. The maximum lik F_{ML} = \log \det \Sigma_i + \mathrm{tr}\left(\Sigma_{i}^{-1} \Sigma_o \right) ``` -where ``\Sigma_i`` is the model implied covariance matrix and ``\Sigma_o`` is the observed covariance matrix. We can query the model implied covariance matrix from the `imply` par of our model, and the observed covariance matrix from the `observed` path of our model. +where ``\Sigma_i`` is the model implied covariance matrix and ``\Sigma_o`` is the observed covariance matrix. We can query the model implied covariance matrix from the `implied` par of our model, and the observed covariance matrix from the `observed` path of our model. -To get information on what we can access from a certain `imply` or `observed` type, we can check it`s documentation an the pages [API - model parts](@ref) or via the help mode of the REPL: +To get information on what we can access from a certain `implied` or `observed` type, we can check it`s documentation an the pages [API - model parts](@ref) or via the help mode of the REPL: ```julia julia>? @@ -232,7 +207,7 @@ help?> RAM help?> SemObservedCommon ``` -We see that the model implied covariance matrix can be assessed as `Σ(imply)` and the observed covariance matrix as `obs_cov(observed)`. +We see that the model implied covariance matrix can be assessed as `Σ(implied)` and the observed covariance matrix as `obs_cov(observed)`. With this information, we write can implement maximum likelihood optimization as @@ -240,11 +215,11 @@ With this information, we write can implement maximum likelihood optimization as struct MaximumLikelihood <: SemLossFunction end using LinearAlgebra -import StructuralEquationModels: Σ, obs_cov, objective! +import StructuralEquationModels: obs_cov, evaluate! -function objective!(semml::MaximumLikelihood, parameters, model::AbstractSem) +function evaluate!(objective::Number, gradient::Nothing, hessian::Nothing, semml::MaximumLikelihood, model::AbstractSem, par) # access the model implied and observed covariance matrices - Σᵢ = Σ(imply(model)) + Σᵢ = implied(model).Σ Σₒ = obs_cov(observed(model)) # compute the objective if isposdef(Symmetric(Σᵢ)) # is the model implied covariance matrix positive definite? @@ -266,7 +241,7 @@ model_ml = SemFiniteDiff( loss = MaximumLikelihood() ) -model_fit = sem_fit(model_ml) +model_fit = fit(model_ml) ``` -If you want to differentiate your own loss functions via automatic differentiation, check out the [AutoDiffSEM](https://github.com/StructuralEquationModels/AutoDiffSEM) package (spoiler allert: it's really easy). +If you want to differentiate your own loss functions via automatic differentiation, check out the [AutoDiffSEM](https://github.com/StructuralEquationModels/AutoDiffSEM) package. diff --git a/docs/src/developer/observed.md b/docs/src/developer/observed.md index 2b695e597..240c1c34f 100644 --- a/docs/src/developer/observed.md +++ b/docs/src/developer/observed.md @@ -22,13 +22,13 @@ end To compute some fit indices, you need to provide methods for ```julia -# Number of observed datapoints -n_obs(observed::MyObserved) = ... -# Number of manifest variables -n_man(observed::MyObserved) = ... +# Number of samples (observations) in the dataset +nsamples(observed::MyObserved) = ... +# Number of observed variables +nobserved_vars(observed::MyObserved) = ... ``` -As always, you can add additional methods for properties that imply types and loss function want to access, for example (from the `SemObservedCommon` implementation): +As always, you can add additional methods for properties that implied types and loss function want to access, for example (from the `SemObservedCommon` implementation): ```julia obs_cov(observed::SemObservedCommon) = observed.obs_cov diff --git a/docs/src/developer/optimizer.md b/docs/src/developer/optimizer.md index 7480a9d91..a651ec636 100644 --- a/docs/src/developer/optimizer.md +++ b/docs/src/developer/optimizer.md @@ -1,83 +1,70 @@ # Custom optimizer types The optimizer part of a model connects it to the optimization backend. -The first part of the implementation is very similar to loss functions, so we just show the implementation of `SemOptimizerOptim` here as a reference: +Let's say we want to implement a new optimizer as `SemOptimizerName`. The first part of the implementation is very similar to loss functions, so we just show the implementation of `SemOptimizerOptim` here as a reference: ```julia -############################################################################ +############################################################################################ ### Types and Constructor -############################################################################ - -mutable struct SemOptimizerOptim{A, B} <: SemOptimizer +############################################################################################ +mutable struct SemOptimizerName{A, B} <: SemOptimizer{:Name} algorithm::A options::B end -function SemOptimizerOptim(; - algorithm = LBFGS(), - options = Optim.Options(;f_tol = 1e-10, x_tol = 1.5e-8), - kwargs...) - return SemOptimizerOptim(algorithm, options) -end +SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...) + +SemOptimizerName(; + algorithm = LBFGS(), + options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8), + kwargs..., +) = SemOptimizerName(algorithm, options) -############################################################################ +############################################################################################ ### Recommended methods -############################################################################ +############################################################################################ -update_observed(optimizer::SemOptimizerOptim, observed::SemObserved; kwargs...) = optimizer +update_observed(optimizer::SemOptimizerName, observed::SemObserved; kwargs...) = optimizer -############################################################################ +############################################################################################ ### additional methods -############################################################################ +############################################################################################ -algorithm(optimizer::SemOptimizerOptim) = optimizer.algorithm -options(optimizer::SemOptimizerOptim) = optimizer.options +algorithm(optimizer::SemOptimizerName) = optimizer.algorithm +options(optimizer::SemOptimizerName) = optimizer.options ``` -Now comes a part that is a little bit more complicated: We need to write methods for `sem_fit`: - -```julia -function sem_fit( - model::AbstractSemSingle{O, I, L, D}; - start_val = start_val, - kwargs...) where {O, I, L, D <: SemOptimizerOptim} - - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - optimization_result = ... +Note that your optimizer is a subtype of `SemOptimizer{:Name}`, where you can choose a `:Name` that can later be used as a keyword argument to `fit(engine = :Name)`. +Similarly, `SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...)` should be defined as well as a constructor that uses only keyword arguments: - ... +´´´julia +SemOptimizerName(; + algorithm = LBFGS(), + options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8), + kwargs..., +) = SemOptimizerName(algorithm, options) +´´´ +A method for `update_observed` and additional methods might be usefull, but are not necessary. - return SemFit(minimum, minimizer, start_val, model, optimization_result) -end -``` - -The method has to return a `SemFit` object that consists of the minimum of the objective at the solution, the minimizer (aka parameter estimates), the starting values, the model and the optimization result (which may be anything you desire for your specific backend). - -If we want our type to also work with `SemEnsemble` models, we also have to provide a method for that: +Now comes the substantive part: We need to provide a method for `fit`: ```julia -function sem_fit( - model::SemEnsemble{N, T , V, D, S}; - start_val = start_val, - kwargs...) where {N, T, V, D <: SemOptimizerOptim, S} - - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - +function fit( + optim::SemOptimizerName, + model::AbstractSem, + start_params::AbstractVector; + kwargs..., +) optimization_result = ... ... - return SemFit(minimum, minimizer, start_val, model, optimization_result) - + return SemFit(minimum, minimizer, start_params, model, optimization_result) end ``` +The method has to return a `SemFit` object that consists of the minimum of the objective at the solution, the minimizer (aka parameter estimates), the starting values, the model and the optimization result (which may be anything you desire for your specific backend). + In addition, you might want to provide methods to access properties of your optimization result: ```julia diff --git a/docs/src/developer/sem.md b/docs/src/developer/sem.md index c6b9f0523..c54ff26af 100644 --- a/docs/src/developer/sem.md +++ b/docs/src/developer/sem.md @@ -1,36 +1,22 @@ # Custom model types -The abstract supertype for all models is `AbstractSem`, which has two subtypes, `AbstractSemSingle{O, I, L, D}` and `AbstractSemCollection`. Currently, there are 2 subtypes of `AbstractSemSingle`: `Sem`, `SemFiniteDiff`. All subtypes of `AbstractSemSingle` should have at least observed, imply, loss and optimizer fields, and share their types (`{O, I, L, D}`) with the parametric abstract supertype. For example, the `SemFiniteDiff` type is implemented as +The abstract supertype for all models is `AbstractSem`, which has two subtypes, `AbstractSemSingle{O, I, L}` and `AbstractSemCollection`. Currently, there are 2 subtypes of `AbstractSemSingle`: `Sem`, `SemFiniteDiff`. All subtypes of `AbstractSemSingle` should have at least observed, implied, loss and optimizer fields, and share their types (`{O, I, L}`) with the parametric abstract supertype. For example, the `SemFiniteDiff` type is implemented as ```julia -struct SemFiniteDiff{ - O <: SemObserved, - I <: SemImply, - L <: SemLoss, - D <: SemOptimizer} <: AbstractSemSingle{O, I, L, D} +struct SemFiniteDiff{O <: SemObserved, I <: SemImplied, L <: SemLoss} <: + AbstractSemSingle{O, I, L} observed::O - imply::I + implied::I loss::L - optimizer::Dend +end ``` -Additionally, we need to define a method to compute at least the objective value, and if you want to use gradient based optimizers (which you most probably will), we need also to define a method to compute the gradient. For example, the respective fallback methods for all `AbstractSemSingle` models are defined as +Additionally, you can change how objective/gradient/hessian values are computed by providing methods for `evaluate!`, e.g. from `SemFiniteDiff`'s implementation: ```julia -function objective!(model::AbstractSemSingle, parameters) - objective!(imply(model), parameters, model) - return objective!(loss(model), parameters, model) -end - -function gradient!(gradient, model::AbstractSemSingle, parameters) - fill!(gradient, zero(eltype(gradient))) - gradient!(imply(model), parameters, model) - gradient!(gradient, loss(model), parameters, model) -end +evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params) = ... ``` -Note that the `gradient!` method takes a pre-allocated array that should be filled with the gradient values. - Additionally, we can define constructors like the one in `"src/frontend/specification/Sem.jl"`. It is also possible to add new subtypes for `AbstractSemCollection`. \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 8b2d6999e..add69459e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -32,7 +32,7 @@ For examples of how to use the package, see the Tutorials. Models you can fit out of the box include - Linear SEM that can be specified in RAM notation - ML, GLS and FIML estimation -- Ridge Regularization +- Ridge/Lasso/... Regularization - Multigroup SEM - Sums of arbitrary loss functions (everything the optimizer can handle) diff --git a/docs/src/internals/files.md b/docs/src/internals/files.md index 06c73444d..90ceceaaf 100644 --- a/docs/src/internals/files.md +++ b/docs/src/internals/files.md @@ -4,21 +4,24 @@ We briefly describe the file and folder structure of the package. ## Source code -All source code is in the `"src"` folder: +Source code is in the `"src"` folder: `"src"` - `"StructuralEquationModels.jl"` defines the module and the exported objects - `"types.jl"` defines all abstract types and the basic type hierarchy - `"objective_gradient_hessian.jl"` contains methods for computing objective, gradient and hessian values for different model types as well as generic fallback methods -- The four folders `"observed"`, `"imply"`, `"loss"` and `"diff"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function). -- `"optimizer"` contains connections to different optimization backends (aka methods for `sem_fit`) +- The four folders `"observed"`, `"implied"`, `"loss"` and `"diff"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function). +- `"optimizer"` contains connections to different optimization backends (aka methods for `fit`) - `"optim.jl"`: connection to the `Optim.jl` package - - `"NLopt.jl"`: connection to the `NLopt.jl` package - `"frontend"` contains user-facing functions - `"specification"` contains functionality for model specification - `"fit"` contains functionality for model assessment, like fit measures and standard errors - `"additional_functions"` contains helper functions for simulations, loading artifacts (example data) and various other things +Code for the package extentions can be found in the `"ext"` folder: +- `"SEMNLOptExt"` for connection to `NLopt.jl`. +- `"SEMProximalOptExt"` for connection to `ProximalAlgorithms.jl`. + ## Tests and Documentation Tests are in the `"test"` folder, documentation in the `"docs"` folder. \ No newline at end of file diff --git a/docs/src/internals/types.md b/docs/src/internals/types.md index 488127b29..e70a52ca4 100644 --- a/docs/src/internals/types.md +++ b/docs/src/internals/types.md @@ -3,11 +3,11 @@ The type hierarchy is implemented in `"src/types.jl"`. `AbstractSem`: the most abstract type in our package -- `AbstractSemSingle{O, I, L, D} <: AbstractSem` is an abstract parametric type that is a supertype of all single models +- `AbstractSemSingle{O, I, L} <: AbstractSem` is an abstract parametric type that is a supertype of all single models - `Sem`: models that do not need automatic differentiation or finite difference approximation - `SemFiniteDiff`: models whose gradients and/or hessians should be computed via finite difference approximation - `AbstractSemCollection <: AbstractSem` is an abstract supertype of all models that contain multiple `AbstractSem` submodels -Every `AbstractSemSingle` has to have `SemObserved`, `SemImply`, `SemLoss` and `SemOptimizer` fields (and can have additional fields). +Every `AbstractSemSingle` has to have `SemObserved`, `SemImplied`, and `SemLoss` fields (and can have additional fields). `SemLoss` is a container for multiple `SemLossFunctions`. \ No newline at end of file diff --git a/docs/src/performance/mixed_differentiation.md b/docs/src/performance/mixed_differentiation.md index 2ac937077..b7ae333b5 100644 --- a/docs/src/performance/mixed_differentiation.md +++ b/docs/src/performance/mixed_differentiation.md @@ -19,7 +19,7 @@ model_ridge = SemFiniteDiff( model_ml_ridge = SemEnsemble(model_ml, model_ridge) -model_ml_ridge_fit = sem_fit(model_ml_ridge) +model_ml_ridge_fit = fit(model_ml_ridge) ``` The results of both methods will be the same, but we can verify that the computation costs differ (the package `BenchmarkTools` has to be installed for this): @@ -27,7 +27,7 @@ The results of both methods will be the same, but we can verify that the computa ```julia using BenchmarkTools -@benchmark sem_fit(model) +@benchmark fit(model) -@benchmark sem_fit(model_ml_ridge) +@benchmark fit(model_ml_ridge) ``` \ No newline at end of file diff --git a/docs/src/performance/mkl.md b/docs/src/performance/mkl.md index 0d5467658..4361ab445 100644 --- a/docs/src/performance/mkl.md +++ b/docs/src/performance/mkl.md @@ -27,9 +27,9 @@ To check the performance implications for fitting a SEM, you can use the [`Bench ```julia using BenchmarkTools -@benchmark sem_fit($your_model) +@benchmark fit($your_model) using MKL -@benchmark sem_fit($your_model) +@benchmark fit($your_model) ``` \ No newline at end of file diff --git a/docs/src/performance/simulation.md b/docs/src/performance/simulation.md index 4b00df6a4..0cb2ea25d 100644 --- a/docs/src/performance/simulation.md +++ b/docs/src/performance/simulation.md @@ -4,15 +4,15 @@ We are currently working on an interface for simulation studies. Until we are finished with this, this page is just a collection of tips. -## Swap observed data +## Replace observed data In simulation studies, a common task is fitting the same model to many different datasets. It would be a waste of resources to reconstruct the complete model for each dataset. -We therefore provide the function `swap_observed` to change the `observed` part of a model, +We therefore provide the function `replace_observed` to change the `observed` part of a model, without necessarily reconstructing the other parts. For the [A first model](@ref), you would use it as -```@setup swap_observed +```@setup replace_observed using StructuralEquationModels observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] @@ -43,12 +43,13 @@ graph = @StenoGraph begin end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars +) ``` -```@example swap_observed +```@example replace_observed data = example_data("political_democracy") data_1 = data[1:30, :] @@ -60,33 +61,52 @@ model = Sem( data = data_1 ) -model_updated = swap_observed(model; data = data_2, specification = partable) +model_updated = replace_observed(model; data = data_2, specification = partable) ``` +If you are building your models by parts, you can also update each part seperately with the function `update_observed`. +For example, + +```@example replace_observed + +new_observed = SemObservedData(;data = data_2, specification = partable) + +my_optimizer = SemOptimizerOptim() + +new_optimizer = update_observed(my_optimizer, new_observed) +``` + +## Multithreading !!! danger "Thread safety" *This is only relevant when you are planning to fit updated models in parallel* - Models generated this way may share the same objects in memory (e.g. some parts of + Models generated by `replace_observed` may share the same objects in memory (e.g. some parts of `model` and `model_updated` are the same objects in memory.) Therefore, fitting both of these models in parallel will lead to **race conditions**, possibly crashing your computer. To avoid these problems, you should copy `model` before updating it. -If you are building your models by parts, you can also update each part seperately with the function `update_observed`. -For example, +Taking into account the warning above, fitting multiple models in parallel becomes as easy as: -```@example swap_observed +```julia +model1 = Sem( + specification = partable, + data = data_1 +) -new_observed = SemObservedData(;data = data_2, specification = partable) +model2 = deepcopy(replace_observed(model; data = data_2, specification = partable)) -my_optimizer = SemOptimizerOptim() +models = [model1, model2] +fits = Vector{SemFit}(undef, 2) -new_optimizer = update_observed(my_optimizer, new_observed) +Threads.@threads for i in 1:2 + fits[i] = fit(models[i]) +end ``` ## API ```@docs -swap_observed +replace_observed update_observed ``` \ No newline at end of file diff --git a/docs/src/performance/sorting.md b/docs/src/performance/sorting.md index 802720099..78fd09411 100644 --- a/docs/src/performance/sorting.md +++ b/docs/src/performance/sorting.md @@ -13,7 +13,7 @@ To automatically reorder your variables in a way that makes this optimization po We use it as ```julia -sort!(parameter_table) +sort_vars!(parameter_table) model = Sem( specification = parameter_table, diff --git a/docs/src/performance/starting_values.md b/docs/src/performance/starting_values.md index ba7b4f41d..2df8d94d4 100644 --- a/docs/src/performance/starting_values.md +++ b/docs/src/performance/starting_values.md @@ -1,9 +1,9 @@ # Starting values -The `sem_fit` function has a keyword argument that takes either a vector of starting values or a function that takes a model as input to compute starting values. Current options are `start_fabin3` for fabin 3 starting values [^Hägglund82] or `start_simple` for simple starting values. Additional keyword arguments to `sem_fit` are passed to the starting value function. For example, +The `fit` function has a keyword argument that takes either a vector of starting values or a function that takes a model as input to compute starting values. Current options are `start_fabin3` for fabin 3 starting values [^Hägglund82] or `start_simple` for simple starting values. Additional keyword arguments to `fit` are passed to the starting value function. For example, ```julia - sem_fit( + fit( model; start_val = start_simple, start_covariances_latent = 0.5 diff --git a/docs/src/performance/symbolic.md b/docs/src/performance/symbolic.md index 597d2c484..05729526e 100644 --- a/docs/src/performance/symbolic.md +++ b/docs/src/performance/symbolic.md @@ -13,6 +13,6 @@ If the model is acyclic, we can compute ``` for some ``n < \infty``. -Typically, the ``S`` and ``A`` matrices are sparse. In our package, we offer symbolic precomputation of ``\Sigma``, ``\nabla\Sigma`` and even ``\nabla^2\Sigma`` for acyclic models to optimally exploit this sparsity. To use this feature, simply use the `RAMSymbolic` imply type for your model. +Typically, the ``S`` and ``A`` matrices are sparse. In our package, we offer symbolic precomputation of ``\Sigma``, ``\nabla\Sigma`` and even ``\nabla^2\Sigma`` for acyclic models to optimally exploit this sparsity. To use this feature, simply use the `RAMSymbolic` implied type for your model. This can decrase model fitting time, but will also increase model building time (as we have to carry out the symbolic computations and compile specialised functions). As a result, this is probably not beneficial to use if you only fit a single model, but can lead to great improvements if you fit the same modle to multiple datasets (e.g. to compute bootstrap standard errors). \ No newline at end of file diff --git a/docs/src/tutorials/backends/nlopt.md b/docs/src/tutorials/backends/nlopt.md index d4c5fdf8f..2afa5e547 100644 --- a/docs/src/tutorials/backends/nlopt.md +++ b/docs/src/tutorials/backends/nlopt.md @@ -1,6 +1,7 @@ # Using NLopt.jl [`SemOptimizerNLopt`](@ref) implements the connection to `NLopt.jl`. +It is only available if the `NLopt` package is loaded alongside `StructuralEquationModel.jl` in the running Julia session. It takes a bunch of arguments: ```julia @@ -22,6 +23,8 @@ The defaults are LBFGS as the optimization algorithm and the standard options fr We can choose something different: ```julia +using NLopt + my_optimizer = SemOptimizerNLopt(; algorithm = :AUGLAG, options = Dict(:maxeval => 200), @@ -32,6 +35,8 @@ my_optimizer = SemOptimizerNLopt(; This uses an augmented lagrangian method with LBFGS as the local optimization algorithm, stops at a maximum of 200 evaluations and uses a relative tolerance of the objective value of `1e-6` as the stopping criterion for the local algorithm. +To see how to use the optimizer to actually fit a model now, check out the [Model fitting](@ref) section. + In the NLopt docs, you can find explanations about the different [algorithms](https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/) and a [tutorial](https://nlopt.readthedocs.io/en/latest/NLopt_Introduction/) that also explains the different options. To choose an algorithm, just pass its name without the 'NLOPT\_' prefix (for example, 'NLOPT\_LD\_SLSQP' can be used by passing `algorithm = :LD_SLSQP`). diff --git a/docs/src/tutorials/backends/optim.md b/docs/src/tutorials/backends/optim.md index aaaf4ac9b..cf287e773 100644 --- a/docs/src/tutorials/backends/optim.md +++ b/docs/src/tutorials/backends/optim.md @@ -17,6 +17,8 @@ my_optimizer = SemOptimizerOptim( ) ``` -A model with this optimizer object will use BFGS (!not L-BFGS) with a back tracking linesearch and a certain initial step length guess. Also, the trace of the optimization will be printed to the console. +This optimizer will use BFGS (!not L-BFGS) with a back tracking linesearch and a certain initial step length guess. Also, the trace of the optimization will be printed to the console. + +To see how to use the optimizer to actually fit a model now, check out the [Model fitting](@ref) section. For a list of all available algorithms and options, we refer to [this page](https://julianlsolvers.github.io/Optim.jl/stable/#user/config/) of the `Optim.jl` manual. \ No newline at end of file diff --git a/docs/src/tutorials/collection/collection.md b/docs/src/tutorials/collection/collection.md index 84fa00500..f60b7312c 100644 --- a/docs/src/tutorials/collection/collection.md +++ b/docs/src/tutorials/collection/collection.md @@ -15,11 +15,10 @@ model_2 = SemFiniteDiff(...) model_3 = Sem(...) -model_ensemble = SemEnsemble(model_1, model_2, model_3; optimizer = ...) +model_ensemble = SemEnsemble(model_1, model_2, model_3) ``` So you just construct the individual models (however you like) and pass them to `SemEnsemble`. -One important thing to note is that the individual optimizer entries of each model do not matter (as you can optimize your ensemble model only with one algorithmn from one optimization suite). Instead, `SemEnsemble` has its own optimizer part that specifies the backend for the whole ensemble model. You may also pass a vector of weigths to `SemEnsemble`. By default, those are set to ``N_{model}/N_{total}``, i.e. each model is weighted by the number of observations in it's data (which matches the formula for multigroup models). Multigroup models can also be specified via the graph interface; for an example, see [Multigroup models](@ref). diff --git a/docs/src/tutorials/collection/multigroup.md b/docs/src/tutorials/collection/multigroup.md index 4e6105128..1007f4563 100644 --- a/docs/src/tutorials/collection/multigroup.md +++ b/docs/src/tutorials/collection/multigroup.md @@ -6,20 +6,17 @@ using StructuralEquationModels As an example, we will fit the model from [the `lavaan` tutorial](https://lavaan.ugent.be/tutorial/groups.html) with loadings constrained to equality across groups. -We first load the example data and split it between groups: +We first load the example data. +We have to make sure that the column indicating the group (here called `school`) is a vector of `Symbol`s, not strings - so we convert it. ```@setup mg dat = example_data("holzinger_swineford") - -dat_g1 = dat[dat.school .== "Pasteur", :] -dat_g2 = dat[dat.school .== "Grant-White", :] +dat.school = ifelse.(dat.school .== "Pasteur", :Pasteur, :Grant_White) ``` ```julia dat = example_data("holzinger_swineford") - -dat_g1 = dat[dat.school .== "Pasteur", :] -dat_g2 = dat[dat.school .== "Grant-White", :] +dat.school = ifelse.(dat.school .== "Pasteur", :Pasteur, :Grant_White) ``` We then specify our model via the graph interface: @@ -61,46 +58,32 @@ You can then use the resulting graph to specify an `EnsembleParameterTable` ```@example mg; ansicolor = true groups = [:Pasteur, :Grant_White] -partable = EnsembleParameterTable(; - graph = graph, +partable = EnsembleParameterTable( + graph, observed_vars = observed_vars, latent_vars = latent_vars, groups = groups) ``` -The parameter table can be used to create a `Dict` of RAMMatrices with keys equal to the group names and parameter tables as values: +The parameter table can be used to create a `SemEnsemble` model: ```@example mg; ansicolor = true -specification = RAMMatrices(partable) +model_ml_multigroup = SemEnsemble( + specification = partable, + data = dat, + column = :school, + groups = groups) ``` -That is, you can asses the group-specific `RAMMatrices` as `specification[:group_name]`. - !!! note "A different way to specify" - Instead of choosing the workflow "Graph -> EnsembleParameterTable -> RAMMatrices", you may also directly specify RAMMatrices for each group (for an example see [this test](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/blob/main/test/examples/multigroup/multigroup.jl)). - -The next step is to construct the model: - -```@example mg; ansicolor = true -model_g1 = Sem( - specification = specification[:Pasteur], - data = dat_g1 -) - -model_g2 = Sem( - specification = specification[:Grant_White], - data = dat_g2 -) - -model_ml_multigroup = SemEnsemble(model_g1, model_g2) -``` + Instead of choosing the workflow "Graph -> EnsembleParameterTable -> model", you may also directly specify RAMMatrices for each group (for an example see [this test](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/blob/main/test/examples/multigroup/multigroup.jl)). We now fit the model and inspect the parameter estimates: ```@example mg; ansicolor = true -solution = sem_fit(model_ml_multigroup) -update_estimate!(partable, solution) -sem_summary(partable) +fit = fit(model_ml_multigroup) +update_estimate!(partable, fit) +details(partable) ``` Other things you can query about your fitted model (fit measures, standard errors, etc.) are described in the section [Model inspection](@ref) and work the same way for multigroup models. \ No newline at end of file diff --git a/docs/src/tutorials/concept.md b/docs/src/tutorials/concept.md index c63c15941..035144d62 100644 --- a/docs/src/tutorials/concept.md +++ b/docs/src/tutorials/concept.md @@ -1,12 +1,13 @@ # Our Concept of a Structural Equation Model -In our package, every Structural Equation Model (`Sem`) consists of four parts: +In our package, every Structural Equation Model (`Sem`) consists of three parts (four, if you count the optimizer): ![SEM concept](../assets/concept.svg) -Those parts are interchangable building blocks (like 'Legos'), i.e. there are different pieces available you can choose as the 'observed' slot of the model, and stick them together with other pieces that can serve as the 'imply' part. +Those parts are interchangable building blocks (like 'Legos'), i.e. there are different pieces available you can choose as the `observed` slot of the model, and stick them together with other pieces that can serve as the `implied` part. -The 'observed' part is for observed data, the imply part is what the model implies about your data (e.g. the model implied covariance matrix), the loss part compares the observed data and implied properties (e.g. weighted least squares difference between the observed and implied covariance matrix) and the optimizer part connects to the optimization backend (e.g. the type of optimization algorithm used). +The `observed` part is for observed data, the `implied` part is what the model implies about your data (e.g. the model implied covariance matrix), and the loss part compares the observed data and implied properties (e.g. weighted least squares difference between the observed and implied covariance matrix). +The optimizer part is not part of the model itself, but it is needed to fit the model as it connects to the optimization backend (e.g. the type of optimization algorithm used). For example, to build a model for maximum likelihood estimation with the NLopt optimization suite as a backend you would choose `SemML` as a loss function and `SemOptimizerNLopt` as the optimizer. @@ -20,24 +21,24 @@ So everything that can be used as the 'observed' part has to be of type `SemObse Here is an overview on the available building blocks: -|[`SemObserved`](@ref) | [`SemImply`](@ref) | [`SemLossFunction`](@ref) | [`SemOptimizer`](@ref) | +|[`SemObserved`](@ref) | [`SemImplied`](@ref) | [`SemLossFunction`](@ref) | [`SemOptimizer`](@ref) | |---------------------------------|-----------------------|---------------------------|-------------------------------| | [`SemObservedData`](@ref) | [`RAM`](@ref) | [`SemML`](@ref) | [`SemOptimizerOptim`](@ref) | | [`SemObservedCovariance`](@ref) | [`RAMSymbolic`](@ref) | [`SemWLS`](@ref) | [`SemOptimizerNLopt`](@ref) | -| [`SemObservedMissing`](@ref) | [`ImplyEmpty`](@ref) | [`SemFIML`](@ref) | | -| | | [`SemRidge`](@ref) | | -| | | [`SemConstant`](@ref) | | +| [`SemObservedMissing`](@ref) | [`ImpliedEmpty`](@ref)| [`SemFIML`](@ref) | | +| | | [`SemRidge`](@ref) | | +| | | [`SemConstant`](@ref) | | The rest of this page explains the building blocks for each part. First, we explain every part and give an overview on the different options that are available. After that, the [API - model parts](@ref) section serves as a reference for detailed explanations about the different options. (How to stick them together to a final model is explained in the section on [Model Construction](@ref).) ## The observed part aka [`SemObserved`](@ref) -The 'observed' part contains all necessary information about the observed data. Currently, we have three options: [`SemObservedData`](@ref) for fully observed datasets, [`SemObservedCovariance`](@ref) for observed covariances (and means) and [`SemObservedMissing`](@ref) for data that contains missing values. +The *observed* part contains all necessary information about the observed data. Currently, we have three options: [`SemObservedData`](@ref) for fully observed datasets, [`SemObservedCovariance`](@ref) for observed covariances (and means) and [`SemObservedMissing`](@ref) for data that contains missing values. -## The imply part aka [`SemImply`](@ref) -The imply part is what your model implies about the data, for example, the model-implied covariance matrix. -There are two options at the moment: [`RAM`](@ref), which uses the reticular action model to compute the model implied covariance matrix, and [`RAMSymbolic`](@ref) which does the same but symbolically pre-computes part of the model, which increases subsequent performance in model fitting (see [Symbolic precomputation](@ref)). There is also a third option, [`ImplyEmpty`](@ref) that can serve as a 'placeholder' for models that do not need an imply part. +## The implied part aka [`SemImplied`](@ref) +The *implied* part is what your model implies about the data, for example, the model-implied covariance matrix. +There are two options at the moment: [`RAM`](@ref), which uses the reticular action model to compute the model implied covariance matrix, and [`RAMSymbolic`](@ref) which does the same but symbolically pre-computes part of the model, which increases subsequent performance in model fitting (see [Symbolic precomputation](@ref)). There is also a third option, [`ImpliedEmpty`](@ref) that can serve as a 'placeholder' for models that do not need an implied part. ## The loss part aka `SemLoss` The loss part specifies the objective that is optimized to find the parameter estimates. @@ -51,12 +52,12 @@ Available loss functions are ## The optimizer part aka `SemOptimizer` The optimizer part of a model connects to the numerical optimization backend used to fit the model. It can be used to control options like the optimization algorithm, linesearch, stopping criteria, etc. -There are currently two available backends, [`SemOptimizerOptim`](@ref) connecting to the [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) backend, and [`SemOptimizerNLopt`](@ref) connecting to the [NLopt.jl](https://github.com/JuliaOpt/NLopt.jl) backend. -For more information about the available options see also the tutorials about [Using Optim.jl](@ref) and [Using NLopt.jl](@ref), as well as [Constrained optimization](@ref). +There are currently three available backends, [`SemOptimizerOptim`](@ref) connecting to the [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) backend, [`SemOptimizerNLopt`](@ref) connecting to the [NLopt.jl](https://github.com/JuliaOpt/NLopt.jl) backend and [`SemOptimizerProximal`](@ref) connecting to [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). +For more information about the available options see also the tutorials about [Using Optim.jl](@ref) and [Using NLopt.jl](@ref), as well as [Constrained optimization](@ref) and [Regularization](@ref) . # What to do next -You now have an understanding about our representation of structural equation models. +You now have an understanding of our representation of structural equation models. To learn more about how to use the package, you may visit the remaining tutorials. @@ -71,15 +72,18 @@ SemObserved SemObservedData SemObservedCovariance SemObservedMissing +samples +observed_vars +SemSpecification ``` -## imply +## implied ```@docs -SemImply +SemImplied RAM RAMSymbolic -ImplyEmpty +ImpliedEmpty ``` ## loss functions @@ -100,4 +104,5 @@ SemConstant SemOptimizer SemOptimizerOptim SemOptimizerNLopt +SemOptimizerProximal ``` \ No newline at end of file diff --git a/docs/src/tutorials/constraints/constraints.md b/docs/src/tutorials/constraints/constraints.md index 7e2ec53e1..338803cb3 100644 --- a/docs/src/tutorials/constraints/constraints.md +++ b/docs/src/tutorials/constraints/constraints.md @@ -16,13 +16,13 @@ graph = @StenoGraph begin # loadings ind60 → fixed(1)*x1 + x2 + x3 - dem60 → fixed(1)*y1 + y2 + y3 + y4 + dem60 → fixed(1)*y1 + label(:λ₂)*y2 + label(:λ₃)*y3 + y4 dem65 → fixed(1)*y5 + y6 + y7 + y8 # latent regressions ind60 → dem60 dem60 → dem65 - ind60 → dem65 + ind60 → label(:λₗ)*dem65 # variances _(observed_vars) ↔ _(observed_vars) @@ -31,15 +31,15 @@ graph = @StenoGraph begin # covariances y1 ↔ y5 y2 ↔ y4 + y6 - y3 ↔ y7 - y8 ↔ y4 + y6 + y3 ↔ label(:y3y7)*y7 + y8 ↔ label(:y8y4)*y4 + y6 end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars) data = example_data("political_democracy") @@ -48,11 +48,11 @@ model = Sem( data = data ) -model_fit = sem_fit(model) +model_fit = fit(model) update_estimate!(partable, model_fit) -sem_summary(partable) +details(partable) ``` ### Define the constraints @@ -64,17 +64,19 @@ Let's introduce some constraints: (Of course those constaints only serve an illustratory purpose.) -We first need to get the indices of the respective parameters that are invoved in the constraints. We can look up their labels in the output above, and retrieve their indices as +We first need to get the indices of the respective parameters that are invoved in the constraints. +We can look up their labels in the output above, and retrieve their indices as ```@example constraints -parameter_indices = get_identifier_indices([:θ_29, :θ_30, :θ_3, :θ_4, :θ_11], model) +parind = param_indices(model) +parind[:y3y7] # 29 ``` -The bound constraint is easy to specify: Just give a vector of upper or lower bounds that contains the bound for each parameter. In our example, only parameter number 11 has an upper bound, and the number of total parameters is `n_par(model) = 31`, so we define +The bound constraint is easy to specify: Just give a vector of upper or lower bounds that contains the bound for each parameter. In our example, only the parameter labeled `:λₗ` has an upper bound, and the number of total parameters is `n_par(model) = 31`, so we define ```@example constraints upper_bounds = fill(Inf, 31) -upper_bounds[11] = 0.5 +upper_bounds[parind[:λₗ]] = 0.5 ``` The equailty and inequality constraints have to be reformulated to be of the form `x = 0` or `x ≤ 0`: @@ -84,6 +86,8 @@ The equailty and inequality constraints have to be reformulated to be of the for Now they can be defined as functions of the parameter vector: ```@example constraints +parind[:y3y7] # 29 +parind[:y8y4] # 30 # θ[29] + θ[30] - 1 = 0.0 function eq_constraint(θ, gradient) if length(gradient) > 0 @@ -94,6 +98,8 @@ function eq_constraint(θ, gradient) return θ[29] + θ[30] - 1 end +parind[:λ₂] # 3 +parind[:λ₃] # 4 # θ[3] - θ[4] - 0.1 ≤ 0 function ineq_constraint(θ, gradient) if length(gradient) > 0 @@ -109,13 +115,15 @@ If the algorithm needs gradients at an iteration, it will pass the vector `gradi With `if length(gradient) > 0` we check if the algorithm needs gradients, and if it does, we fill the `gradient` vector with the gradients of the constraint w.r.t. the parameters. -In NLopt, vector-valued constraints are also possible, but we refer to the documentation fot that. +In NLopt, vector-valued constraints are also possible, but we refer to the documentation for that. ### Fit the model We now have everything together to specify and fit our model. First, we specify our optimizer backend as ```@example constraints +using NLopt + constrained_optimizer = SemOptimizerNLopt( algorithm = :AUGLAG, options = Dict(:upper_bounds => upper_bounds, :xtol_abs => 1e-4), @@ -142,23 +150,23 @@ In this example, we set both tolerances to `1e-8`. ```@example constraints model_constrained = Sem( specification = partable, - data = data, - optimizer = constrained_optimizer + data = data ) -model_fit_constrained = sem_fit(model_constrained) +model_fit_constrained = fit(constrained_optimizer, model_constrained) ``` As you can see, the optimizer converged (`:XTOL_REACHED`) and investigating the solution yields ```@example constraints update_partable!( - partable, - model_fit_constrained, + partable, + :estimate_constr, + param_labels(model_fit_constrained), solution(model_fit_constrained), - :estimate_constr) + ) -sem_summary(partable) +details(partable) ``` As we can see, the constrained solution is very close to the original solution (compare the columns estimate and estimate_constr), with the difference that the constrained parameters fulfill their constraints. diff --git a/docs/src/tutorials/construction/build_by_parts.md b/docs/src/tutorials/construction/build_by_parts.md index 5a56f1ccf..45d2a2ea1 100644 --- a/docs/src/tutorials/construction/build_by_parts.md +++ b/docs/src/tutorials/construction/build_by_parts.md @@ -1,6 +1,6 @@ # Build by parts -You can always build a model by parts - that is, you construct the observed, imply, loss and optimizer part seperately. +You can always build a model by parts - that is, you construct the observed, implied, loss and optimizer part seperately. As an example on how this works, we will build [A first model](@ref) in parts. @@ -11,8 +11,8 @@ using StructuralEquationModels data = example_data("political_democracy") -observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] -latent_vars = [:ind60, :dem60, :dem65] +obs_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +lat_vars = [:ind60, :dem60, :dem65] graph = @StenoGraph begin @@ -27,8 +27,8 @@ graph = @StenoGraph begin ind60 → dem65 # variances - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ↔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) # covariances y1 ↔ y5 @@ -39,9 +39,9 @@ graph = @StenoGraph begin end partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = lat_vars, + observed_vars = obs_vars) ``` Now, we construct the different parts: @@ -50,8 +50,8 @@ Now, we construct the different parts: # observed --------------------------------------------------------------------------------- observed = SemObservedData(specification = partable, data = data) -# imply ------------------------------------------------------------------------------------ -imply_ram = RAM(specification = partable) +# implied ------------------------------------------------------------------------------------ +implied_ram = RAM(specification = partable) # loss ------------------------------------------------------------------------------------- ml = SemML(observed = observed) @@ -63,5 +63,7 @@ optimizer = SemOptimizerOptim() # model ------------------------------------------------------------------------------------ -model_ml = Sem(observed, imply_ram, loss_ml, optimizer) +model_ml = Sem(observed, implied_ram, loss_ml) + +fit(optimizer, model_ml) ``` \ No newline at end of file diff --git a/docs/src/tutorials/construction/outer_constructor.md b/docs/src/tutorials/construction/outer_constructor.md index 21f6bfd3f..a1c0b8ad3 100644 --- a/docs/src/tutorials/construction/outer_constructor.md +++ b/docs/src/tutorials/construction/outer_constructor.md @@ -15,13 +15,13 @@ Structural Equation Model SemML - Fields observed: SemObservedCommon - imply: RAM + implied: RAM optimizer: SemOptimizerOptim ``` -The output of this call tells you exactly what model you just constructed (i.e. what the loss functions, observed, imply and optimizer parts are). +The output of this call tells you exactly what model you just constructed (i.e. what the loss functions, observed, implied and optimizer parts are). -As you can see, by default, we use maximum likelihood estimation, the RAM imply type and the `Optim.jl` optimization backend. +As you can see, by default, we use maximum likelihood estimation abd the RAM implied type. To choose something different, you can provide it as a keyword argument: ```julia @@ -29,21 +29,20 @@ model = Sem( specification = partable, data = data, observed = ..., - imply = ..., + implied = ..., loss = ..., - optimizer = ... ) ``` -For example, to construct a model for weighted least squares estimation that uses symbolic precomputation and the NLopt backend, write +For example, to construct a model for weighted least squares estimation that uses symbolic precomputation, write ```julia model = Sem( specification = partable, data = data, - imply = RAMSymbolic, + implied = RAMSymbolic, loss = SemWLS, - optimizer = SemOptimizerNLopt + optimizer = SemOptimizerOptim ) ``` @@ -73,8 +72,8 @@ W = ... model = Sem( specification = partable, data = data, - imply = RAMSymbolic, - loss = SemWLS + implied = RAMSymbolic, + loss = SemWLS, wls_weight_matrix = W ) @@ -92,25 +91,29 @@ help>SemObservedMissing For observed data with missing values. Constructor - ≡≡≡≡≡≡≡≡≡≡≡≡≡ + ≡≡≡≡≡≡≡≡≡≡≡ SemObservedMissing(; - specification, data, - obs_colnames = nothing, + observed_vars = nothing, + specification = nothing, kwargs...) Arguments - ≡≡≡≡≡≡≡≡≡≡≡ + ≡≡≡≡≡≡≡≡≡ - • specification: either a RAMMatrices or ParameterTable object (1) + • specification: optional SEM model specification + (SemSpecification) • data: observed data - • obs_colnames::Vector{Symbol}: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame) + • observed_vars::Vector{Symbol}: column names of the data (if + the object passed as data does not have column names, i.e. is + not a data frame) + + ──────────────────────────────────────────────────────────────────────── - ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -Extended help is available with `??` +Extended help is available with `??SemObservedMissing` ``` ## Optimize loss functions without analytic gradient @@ -118,7 +121,6 @@ Extended help is available with `??` For loss functions without analytic gradients, it is possible to use finite difference approximation or automatic differentiation. All loss functions provided in the package do have analytic gradients (and some even hessians or approximations thereof), so there is no need do use this feature if you are only working with them. However, if you implement your own loss function, you do not have to provide analytic gradients. -This page is a about finite difference approximation. For information about how to use automatic differentiation, see the documentation of the [AutoDiffSEM](https://github.com/StructuralEquationModels/AutoDiffSEM) package. To use finite difference approximation, you may construct your model just as before, but swap the `Sem` constructor for `SemFiniteDiff`. For example @@ -129,4 +131,4 @@ model = SemFiniteDiff( ) ``` -constructs a model that will use finite difference approximation if you estimate the parameters via `sem_fit(model)`. \ No newline at end of file +constructs a model that will use finite difference approximation if you estimate the parameters via `fit(model)`. \ No newline at end of file diff --git a/docs/src/tutorials/first_model.md b/docs/src/tutorials/first_model.md index b19a22200..e8048966c 100644 --- a/docs/src/tutorials/first_model.md +++ b/docs/src/tutorials/first_model.md @@ -15,8 +15,8 @@ using StructuralEquationModels We then first define the graph of our model in a syntax which is similar to the R-package `lavaan`: ```@setup high_level -observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] -latent_vars = [:ind60, :dem60, :dem65] +obs_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +lat_vars = [:ind60, :dem60, :dem65] graph = @StenoGraph begin @@ -31,8 +31,8 @@ graph = @StenoGraph begin ind60 → dem65 # variances - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ↔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) # covariances y1 ↔ y5 @@ -44,8 +44,8 @@ end ``` ```julia -observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] -latent_vars = [:ind60, :dem60, :dem65] +obs_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +lat_vars = [:ind60, :dem60, :dem65] graph = @StenoGraph begin @@ -60,8 +60,8 @@ graph = @StenoGraph begin ind60 → dem65 # variances - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ↔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) # covariances y1 ↔ y5 @@ -83,9 +83,9 @@ We then use this graph to define a `ParameterTable` object ```@example high_level; ansicolor = true partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = lat_vars, + observed_vars = obs_vars) ``` load the example data @@ -110,7 +110,7 @@ model = Sem( We can now fit the model via ```@example high_level; ansicolor = true -model_fit = sem_fit(model) +model_fit = fit(model) ``` and compute fit measures as @@ -119,10 +119,10 @@ and compute fit measures as fit_measures(model_fit) ``` -We can also get a bit more information about the fitted model via the `sem_summary()` function: +We can also get a bit more information about the fitted model via the `details()` function: ```@example high_level; ansicolor = true -sem_summary(model_fit) +details(model_fit) ``` To investigate the parameter estimates, we can update our `partable` object to contain the new estimates: @@ -134,7 +134,7 @@ update_estimate!(partable, model_fit) and investigate the solution with ```@example high_level; ansicolor = true -sem_summary(partable) +details(partable) ``` Congratulations, you fitted and inspected your very first model! diff --git a/docs/src/tutorials/fitting/fitting.md b/docs/src/tutorials/fitting/fitting.md index f78a6c0db..fff06abaa 100644 --- a/docs/src/tutorials/fitting/fitting.md +++ b/docs/src/tutorials/fitting/fitting.md @@ -3,7 +3,7 @@ As we saw in [A first model](@ref), after you have build a model, you can fit it via ```julia -model_fit = sem_fit(model) +model_fit = fit(model) # output @@ -16,7 +16,7 @@ Structural Equation Model SemML - Fields observed: SemObservedData - imply: RAM + implied: RAM optimizer: SemOptimizerOptim ------------- Optimization result ------------- @@ -43,10 +43,32 @@ Structural Equation Model ∇f(x) calls: 524 ``` -You may optionally specify [Starting values](@ref). +## Choosing an optimizer + +To choose a different optimizer, you can call `fit` with the keyword argument `engine = ...`, and pass additional keyword arguments: + +```julia +using Optim + +model_fit = fit(model; engine = :Optim, algorithm = BFGS()) +``` + +Available options for engine are `:Optim`, `:NLopt` and `:Proximal`, where `:NLopt` and `:Proximal` are only available if the `NLopt.jl` and `ProximalAlgorithms.jl` packages are loaded respectively. + +The available keyword arguments are listed in the sections [Using Optim.jl](@ref), [Using NLopt.jl](@ref) and [Regularization](@ref). + +Alternative, you can also explicitely define a `SemOptimizer` and pass it as the first argument to `fit`: + +```julia +my_optimizer = SemOptimizerOptim(algorithm = BFGS()) + +fit(my_optimizer, model) +``` + +You may also optionally specify [Starting values](@ref). # API - model fitting ```@docs -sem_fit +fit ``` \ No newline at end of file diff --git a/docs/src/tutorials/inspection/inspection.md b/docs/src/tutorials/inspection/inspection.md index 5bc7946ba..abd416c1c 100644 --- a/docs/src/tutorials/inspection/inspection.md +++ b/docs/src/tutorials/inspection/inspection.md @@ -1,7 +1,7 @@ # Model inspection ```@setup colored -using StructuralEquationModels +using StructuralEquationModels observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] latent_vars = [:ind60, :dem60, :dem65] @@ -31,9 +31,9 @@ graph = @StenoGraph begin end partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = latent_vars, + observed_vars = observed_vars) data = example_data("political_democracy") @@ -42,21 +42,21 @@ model = Sem( data = data ) -model_fit = sem_fit(model) +model_fit = fit(model) ``` After you fitted a model, ```julia -model_fit = sem_fit(model) +model_fit = fit(model) ``` you end up with an object of type [`SemFit`](@ref). -You can get some more information about it by using the `sem_summary` function: +You can get some more information about it by using the `details` function: ```@example colored; ansicolor = true -sem_summary(model_fit) +details(model_fit) ``` To compute fit measures, we use @@ -73,12 +73,12 @@ AIC(model_fit) A list of available [Fit measures](@ref) is at the end of this page. -To inspect the parameter estimates, we can update a `ParameterTable` object and call `sem_summary` on it: +To inspect the parameter estimates, we can update a `ParameterTable` object and call `details` on it: ```@example colored; ansicolor = true; output = false update_estimate!(partable, model_fit) -sem_summary(partable) +details(partable) ``` We can also update the `ParameterTable` object with other information via [`update_partable!`](@ref). For example, if we want to compare hessian-based and bootstrap-based standard errors, we may write @@ -87,10 +87,10 @@ We can also update the `ParameterTable` object with other information via [`upda se_bs = se_bootstrap(model_fit; n_boot = 20) se_he = se_hessian(model_fit) -update_partable!(partable, model_fit, se_he, :se_hessian) -update_partable!(partable, model_fit, se_bs, :se_bootstrap) +update_partable!(partable, :se_hessian, param_labels(model_fit), se_he) +update_partable!(partable, :se_bootstrap, param_labels(model_fit), se_bs) -sem_summary(partable) +details(partable) ``` ## Export results @@ -106,7 +106,7 @@ parameters_df = DataFrame(partable) # API - model inspection ```@docs -sem_summary +details update_estimate! update_partable! ``` @@ -126,11 +126,12 @@ fit_measures AIC BIC χ² -df +dof minus2ll -n_man -n_obs -n_par +nobserved_vars +nsamples +param_labels +nparams p_value RMSEA ``` diff --git a/docs/src/tutorials/meanstructure.md b/docs/src/tutorials/meanstructure.md index 9f2c167df..b2da5029a 100644 --- a/docs/src/tutorials/meanstructure.md +++ b/docs/src/tutorials/meanstructure.md @@ -35,13 +35,13 @@ graph = @StenoGraph begin y8 ↔ y4 + y6 # means - Symbol("1") → _(observed_vars) + Symbol(1) → _(observed_vars) end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars) ``` ```julia @@ -73,13 +73,13 @@ graph = @StenoGraph begin y8 ↔ y4 + y6 # means - Symbol("1") → _(observed_vars) + Symbol(1) → _(observed_vars) end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars) ``` that is, all observed variable means are estimated freely. @@ -96,21 +96,21 @@ model = Sem( meanstructure = true ) -sem_fit(model) +fit(model) ``` -If we build the model by parts, we have to pass the `meanstructure = true` argument to every part that requires it (when in doubt, simply comsult the documentation for the respective part). +If we build the model by parts, we have to pass the `meanstructure = true` argument to every part that requires it (when in doubt, simply consult the documentation for the respective part). For our example, ```@example meanstructure observed = SemObservedData(specification = partable, data = data, meanstructure = true) -imply_ram = RAM(specification = partable, meanstructure = true) +implied_ram = RAM(specification = partable, meanstructure = true) ml = SemML(observed = observed, meanstructure = true) -model = Sem(observed, imply_ram, SemLoss(ml), SemOptimizerOptim()) +model = Sem(observed, implied_ram, SemLoss(ml)) -sem_fit(model) +fit(model) ``` \ No newline at end of file diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md index b7d9affab..3d82fcfba 100644 --- a/docs/src/tutorials/regularization/regularization.md +++ b/docs/src/tutorials/regularization/regularization.md @@ -5,40 +5,23 @@ For ridge regularization, you can simply use `SemRidge` as an additional loss function (for example, a model with the loss functions `SemML` and `SemRidge` corresponds to ridge-regularized maximum likelihood estimation). -For lasso, elastic net and (far) beyond, we provide the `ProximalSEM` package. You can install it and load it alongside `StructuralEquationModels`: +For lasso, elastic net and (far) beyond, you can load the `ProximalAlgorithms.jl` and `ProximalOperators.jl` packages alongside `StructuralEquationModels`: ```@setup reg -import Pkg -Pkg.add(url = "https://github.com/StructuralEquationModels/ProximalSEM.jl") - -using StructuralEquationModels, ProximalSEM -``` - -```julia -import Pkg -Pkg.add(url = "https://github.com/StructuralEquationModels/ProximalSEM.jl") - -using StructuralEquationModels, ProximalSEM -``` - -!!! warning "ProximalSEM is still WIP" - The ProximalSEM package does not have any releases yet, and is not well tested - until the first release, use at your own risk and expect interfaces to change without prior notice. - -Additionally, you need to install and load `ProximalOperators.jl`: - -```@setup reg -using ProximalOperators +using StructuralEquationModels, ProximalAlgorithms, ProximalOperators ``` ```julia +using Pkg +Pkg.add("ProximalAlgorithms") Pkg.add("ProximalOperators") -using ProximalOperators +using StructuralEquationModels, ProximalAlgorithms, ProximalOperators ``` ## `SemOptimizerProximal` -`ProximalSEM` provides a new "building block" for the optimizer part of a model, called `SemOptimizerProximal`. +To estimate regularized models, we provide a "building block" for the optimizer part, called `SemOptimizerProximal`. It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms. Those can handle, amongst other things, various forms of regularization. @@ -86,9 +69,10 @@ graph = @StenoGraph begin end partable = ParameterTable( + graph, latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + observed_vars = observed_vars +) data = example_data("political_democracy") @@ -101,7 +85,9 @@ model = Sem( We labeled the covariances between the items because we want to regularize those: ```@example reg -ind = get_identifier_indices([:cov_15, :cov_24, :cov_26, :cov_37, :cov_48, :cov_68], model) +ind = getindex.( + [param_indices(model)], + [:cov_15, :cov_24, :cov_26, :cov_37, :cov_48, :cov_68]) ``` In the following, we fit the same model with lasso regularization of those covariances. @@ -126,8 +112,7 @@ optimizer_lasso = SemOptimizerProximal( model_lasso = Sem( specification = partable, - data = data, - optimizer = optimizer_lasso + data = data ) ``` @@ -135,19 +120,25 @@ Let's fit the regularized model ```@example reg -fit_lasso = sem_fit(model_lasso) +fit_lasso = fit(optimizer_lasso, model_lasso) ``` and compare the solution to unregularizted estimates: ```@example reg -fit = sem_fit(model) +fit = fit(model) update_estimate!(partable, fit) -update_partable!(partable, fit_lasso, solution(fit_lasso), :estimate_lasso) +update_partable!(partable, :estimate_lasso, param_labels(fit_lasso), solution(fit_lasso)) -sem_summary(partable) +details(partable) +``` + +Instead of explicitely defining a `SemOptimizerProximal` object, you can also pass `engine = :Proximal` and additional keyword arguments to `fit`: + +```@example reg +fit = fit(model; engine = :Proximal, operator_g = NormL1(λ)) ``` ## Second example - mixed l1 and l0 regularization @@ -164,22 +155,20 @@ To define a sup of separable proximal operators (i.e. no parameter is penalized we can use [`SlicedSeparableSum`](https://juliafirstorder.github.io/ProximalOperators.jl/stable/calculus/#ProximalOperators.SlicedSeparableSum) from the `ProximalOperators` package: ```@example reg -prox_operator = SlicedSeparableSum((NormL1(0.02), NormL0(20.0), NormL0(0.0)), ([ind], [12:22], [vcat(1:11, 23:25)])) +prox_operator = SlicedSeparableSum((NormL0(20.0), NormL1(0.02), NormL0(0.0)), ([ind], [9:11], [vcat(1:8, 12:25)])) model_mixed = Sem( specification = partable, - data = data, - optimizer = SemOptimizerProximal, - operator_g = prox_operator + data = data, ) -fit_mixed = sem_fit(model_mixed) +fit_mixed = fit(model_mixed; engine = :Proximal, operator_g = prox_operator) ``` Let's again compare the different results: ```@example reg -update_partable!(partable, fit_mixed, solution(fit_mixed), :estimate_mixed) +update_partable!(partable, :estimate_mixed, param_labels(fit_mixed), solution(fit_mixed)) -sem_summary(partable) +details(partable) ``` \ No newline at end of file diff --git a/docs/src/tutorials/specification/graph_interface.md b/docs/src/tutorials/specification/graph_interface.md index 7a03083c7..75e1d1b6d 100644 --- a/docs/src/tutorials/specification/graph_interface.md +++ b/docs/src/tutorials/specification/graph_interface.md @@ -12,13 +12,13 @@ end and convert it to a ParameterTable to construct your models: ```julia -observed_vars = ... -latent_vars = ... +obs_vars = ... +lat_vars = ... partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = lat_vars, + observed_vars = obs_vars) model = Sem( specification = partable, @@ -65,24 +65,24 @@ As you saw above and in the [A first model](@ref) example, the graph object need ```julia partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = lat_vars, + observed_vars = obs_vars) ``` The `ParameterTable` constructor also needs you to specify a vector of observed and latent variables, in the example above this would correspond to ```julia -observed_vars = [:x1 :x2 :x3 :x4 :x5 :x6 :x7 :x8 :x9] -latent_vars = [:ξ₁ :ξ₂ :ξ₃] +obs_vars = [:x1 :x2 :x3 :x4 :x5 :x6 :x7 :x8 :x9] +lat_vars = [:ξ₁ :ξ₂ :ξ₃] ``` The variable names (`:x1`) have to be symbols, the syntax `:something` creates an object of type `Symbol`. But you can also use vectors of symbols inside the graph specification, escaping them with `_(...)`. For example, this graph specification ```julia @StenoGraph begin - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ⇔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ⇔ _(lat_vars) end ``` creates undirected effects coresponding to @@ -95,7 +95,7 @@ Mean parameters are specified as a directed effect from `1` to the respective va ```julia @StenoGraph begin - Symbol("1") → _(observed_vars) + Symbol(1) → _(obs_vars) end ``` diff --git a/docs/src/tutorials/specification/ram_matrices.md b/docs/src/tutorials/specification/ram_matrices.md index 8eea6967c..abe76ea6f 100644 --- a/docs/src/tutorials/specification/ram_matrices.md +++ b/docs/src/tutorials/specification/ram_matrices.md @@ -59,8 +59,8 @@ spec = RAMMatrices(; A = A, S = S, F = F, - parameters = θ, - colnames = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] + param_labels = θ, + vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] ) model = Sem( @@ -90,8 +90,8 @@ spec = RAMMatrices(; A = A, S = S, F = F, - parameters = θ, - colnames = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] + param_labels = θ, + vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65] ) ``` diff --git a/docs/src/tutorials/specification/specification.md b/docs/src/tutorials/specification/specification.md index 88f19ce3d..85bb37c00 100644 --- a/docs/src/tutorials/specification/specification.md +++ b/docs/src/tutorials/specification/specification.md @@ -10,17 +10,17 @@ This leads to the following chart: You can enter model specification at each point, but in general (and especially if you come from `lavaan`), it is the easiest to follow the red arrows: specify a graph object, convert it to a prameter table, and use this parameter table to construct your models ( just like we did in [A first model](@ref)): ```julia -observed_vars = ... -latent_vars = ... +obs_vars = ... +lat_vars = ... graph = @StenoGraph begin ... end partable = ParameterTable( - latent_vars = latent_vars, - observed_vars = observed_vars, - graph = graph) + graph, + latent_vars = lat_vars, + observed_vars = obs_vars) model = Sem( specification = partable, diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl new file mode 100644 index 000000000..c5e0ad6cb --- /dev/null +++ b/ext/SEMNLOptExt/NLopt.jl @@ -0,0 +1,141 @@ +Base.convert( + ::Type{NLoptConstraint}, + tuple::NamedTuple{(:f, :tol), Tuple{F, T}}, +) where {F, T} = NLoptConstraint(tuple.f, tuple.tol) + +############################################################################################ +### Constructor +############################################################################################ + +function SemOptimizerNLopt(; + algorithm = :LD_LBFGS, + local_algorithm = nothing, + options = Dict{Symbol, Any}(), + local_options = Dict{Symbol, Any}(), + equality_constraints = Vector{NLoptConstraint}(), + inequality_constraints = Vector{NLoptConstraint}(), + kwargs..., +) + applicable(iterate, equality_constraints) && !isa(equality_constraints, NamedTuple) || + (equality_constraints = [equality_constraints]) + applicable(iterate, inequality_constraints) && + !isa(inequality_constraints, NamedTuple) || + (inequality_constraints = [inequality_constraints]) + return SemOptimizerNLopt( + algorithm, + local_algorithm, + options, + local_options, + convert.(NLoptConstraint, equality_constraints), + convert.(NLoptConstraint, inequality_constraints), + ) +end + +SEM.SemOptimizer{:NLopt}(args...; kwargs...) = SemOptimizerNLopt(args...; kwargs...) + +############################################################################################ +### Recommended methods +############################################################################################ + +SEM.update_observed(optimizer::SemOptimizerNLopt, observed::SemObserved; kwargs...) = + optimizer + +############################################################################################ +### additional methods +############################################################################################ + +SEM.algorithm(optimizer::SemOptimizerNLopt) = optimizer.algorithm +local_algorithm(optimizer::SemOptimizerNLopt) = optimizer.local_algorithm +SEM.options(optimizer::SemOptimizerNLopt) = optimizer.options +local_options(optimizer::SemOptimizerNLopt) = optimizer.local_options +equality_constraints(optimizer::SemOptimizerNLopt) = optimizer.equality_constraints +inequality_constraints(optimizer::SemOptimizerNLopt) = optimizer.inequality_constraints + +mutable struct NLoptResult + result::Any + problem::Any +end + +SEM.optimizer(res::NLoptResult) = res.problem.algorithm +SEM.n_iterations(res::NLoptResult) = res.problem.numevals +SEM.convergence(res::NLoptResult) = res.result[3] + +# construct SemFit from fitted NLopt object +function SemFit_NLopt(optimization_result, model::AbstractSem, start_val, opt) + return SemFit( + optimization_result[1], + optimization_result[2], + start_val, + model, + NLoptResult(optimization_result, opt), + ) +end + +# fit method +function SEM.fit( + optim::SemOptimizerNLopt, + model::AbstractSem, + start_params::AbstractVector; + kwargs..., +) + + # construct the NLopt problem + opt = construct_NLopt_problem(optim.algorithm, optim.options, length(start_params)) + set_NLopt_constraints!(opt, optim) + opt.min_objective = + (par, G) -> SEM.evaluate!( + zero(eltype(par)), + !isnothing(G) && !isempty(G) ? G : nothing, + nothing, + model, + par, + ) + + if !isnothing(optim.local_algorithm) + opt_local = construct_NLopt_problem( + optim.local_algorithm, + optim.local_options, + length(start_params), + ) + opt.local_optimizer = opt_local + end + + # fit + result = NLopt.optimize(opt, start_params) + + return SemFit_NLopt(result, model, start_params, opt) +end + +############################################################################################ +### additional functions +############################################################################################ + +function construct_NLopt_problem(algorithm, options, npar) + opt = Opt(algorithm, npar) + + for (key, val) in pairs(options) + setproperty!(opt, key, val) + end + + return opt +end + +function set_NLopt_constraints!(opt::Opt, optimizer::SemOptimizerNLopt) + for con in optimizer.inequality_constraints + inequality_constraint!(opt, con.f, con.tol) + end + for con in optimizer.equality_constraints + equality_constraint!(opt, con.f, con.tol) + end +end + +############################################################################################ +# pretty printing +############################################################################################ + +function Base.show(io::IO, result::NLoptResult) + print(io, "Optimizer status: $(result.result[3]) \n") + print(io, "Minimum: $(round(result.result[1]; digits = 2)) \n") + print(io, "Algorithm: $(result.problem.algorithm) \n") + print(io, "No. evaluations: $(result.problem.numevals) \n") +end diff --git a/ext/SEMNLOptExt/SEMNLOptExt.jl b/ext/SEMNLOptExt/SEMNLOptExt.jl new file mode 100644 index 000000000..c79fc2b86 --- /dev/null +++ b/ext/SEMNLOptExt/SEMNLOptExt.jl @@ -0,0 +1,10 @@ +module SEMNLOptExt + +using StructuralEquationModels, NLopt +using StructuralEquationModels: SemOptimizerNLopt, NLoptConstraint + +SEM = StructuralEquationModels + +include("NLopt.jl") + +end diff --git a/ext/SEMProximalOptExt/ProximalAlgorithms.jl b/ext/SEMProximalOptExt/ProximalAlgorithms.jl new file mode 100644 index 000000000..0d4748e3a --- /dev/null +++ b/ext/SEMProximalOptExt/ProximalAlgorithms.jl @@ -0,0 +1,93 @@ +SEM.SemOptimizer{:Proximal}(args...; kwargs...) = SemOptimizerProximal(args...; kwargs...) + +SemOptimizerProximal(; + algorithm = ProximalAlgorithms.PANOC(), + operator_g, + operator_h = nothing, + kwargs..., +) = SemOptimizerProximal(algorithm, operator_g, operator_h) + +############################################################################################ +### Recommended methods +############################################################################################ + +SEM.update_observed(optimizer::SemOptimizerProximal, observed::SemObserved; kwargs...) = + optimizer + +############################################################################################ +### additional methods +############################################################################################ + +SEM.algorithm(optimizer::SemOptimizerProximal) = optimizer.algorithm + +############################################################################ +### Pretty Printing +############################################################################ + +function Base.show(io::IO, struct_inst::SemOptimizerProximal) + print_type_name(io, struct_inst) + print_field_types(io, struct_inst) +end + +## connect to ProximalAlgorithms.jl +function ProximalAlgorithms.value_and_gradient(model::AbstractSem, params) + grad = similar(params) + obj = SEM.evaluate!(zero(eltype(params)), grad, nothing, model, params) + return obj, grad +end + +mutable struct ProximalResult + result::Any +end + +function SEM.fit( + optim::SemOptimizerProximal, + model::AbstractSem, + start_params::AbstractVector; + kwargs..., +) + if isnothing(optim.operator_h) + solution, iterations = + optim.algorithm(x0 = start_params, f = model, g = optim.operator_g) + else + solution, iterations = optim.algorithm( + x0 = start_params, + f = model, + g = optim.operator_g, + h = optim.operator_h, + ) + end + + minimum = objective!(model, solution) + + optimization_result = Dict( + :minimum => minimum, + :iterations => iterations, + :algorithm => optim.algorithm, + :operator_g => optim.operator_g, + ) + + isnothing(optim.operator_h) || + push!(optimization_result, :operator_h => optim.operator_h) + + return SemFit( + minimum, + solution, + start_params, + model, + ProximalResult(optimization_result), + ) +end + +############################################################################################ +# pretty printing +############################################################################################ + +function Base.show(io::IO, result::ProximalResult) + print(io, "Minimum: $(round(result.result[:minimum]; digits = 2)) \n") + print(io, "No. evaluations: $(result.result[:iterations]) \n") + print(io, "Operator: $(nameof(typeof(result.result[:operator_g]))) \n") + if haskey(result.result, :operator_h) + print(io, "Second Operator: $(nameof(typeof(result.result[:operator_h]))) \n") + end +end diff --git a/ext/SEMProximalOptExt/SEMProximalOptExt.jl b/ext/SEMProximalOptExt/SEMProximalOptExt.jl new file mode 100644 index 000000000..192944fef --- /dev/null +++ b/ext/SEMProximalOptExt/SEMProximalOptExt.jl @@ -0,0 +1,11 @@ +module SEMProximalOptExt + +using StructuralEquationModels +using ProximalAlgorithms +using StructuralEquationModels: SemOptimizerProximal, print_type_name, print_field_types + +SEM = StructuralEquationModels + +include("ProximalAlgorithms.jl") + +end diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 048b7181c..f6068dc50 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -4,10 +4,10 @@ using LinearAlgebra, Optim, NLSolversBase, Statistics, + StatsAPI, StatsBase, SparseArrays, Symbolics, - NLopt, FiniteDiff, PrettyTables, Distributions, @@ -16,7 +16,8 @@ using LinearAlgebra, DelimitedFiles, DataFrames -import DataFrames: DataFrame +import StatsAPI: params, coef, coefnames, dof, fit, nobs, coeftable + export StenoGraphs, @StenoGraph, meld const SEM = StructuralEquationModels @@ -24,29 +25,39 @@ const SEM = StructuralEquationModels # type hierarchy include("types.jl") include("objective_gradient_hessian.jl") + +# helper objects and functions +include("additional_functions/commutation_matrix.jl") +include("additional_functions/params_array.jl") + # fitted objects include("frontend/fit/SemFit.jl") # specification of models +include("frontend/common.jl") +include("frontend/specification/checks.jl") include("frontend/specification/ParameterTable.jl") -include("frontend/specification/EnsembleParameterTable.jl") include("frontend/specification/RAMMatrices.jl") +include("frontend/specification/EnsembleParameterTable.jl") include("frontend/specification/StenoGraphs.jl") include("frontend/fit/summary.jl") +include("frontend/StatsAPI.jl") # pretty printing include("frontend/pretty_printing.jl") # observed -include("observed/get_colnames.jl") -include("observed/covariance.jl") +include("observed/abstract.jl") include("observed/data.jl") +include("observed/covariance.jl") +include("observed/missing_pattern.jl") include("observed/missing.jl") include("observed/EM.jl") # constructor include("frontend/specification/Sem.jl") include("frontend/specification/documentation.jl") -# imply -include("imply/RAM/symbolic.jl") -include("imply/RAM/generic.jl") -include("imply/empty.jl") +# implied +include("implied/abstract.jl") +include("implied/RAM/symbolic.jl") +include("implied/RAM/generic.jl") +include("implied/empty.jl") # loss include("loss/ML/ML.jl") include("loss/ML/FIML.jl") @@ -54,55 +65,54 @@ include("loss/regularization/ridge.jl") include("loss/WLS/WLS.jl") include("loss/constant/constant.jl") # optimizer -include("diff/optim.jl") -include("diff/NLopt.jl") -include("diff/Empty.jl") -# optimizer -include("optimizer/documentation.jl") +include("optimizer/abstract.jl") +include("optimizer/Empty.jl") include("optimizer/optim.jl") -include("optimizer/NLopt.jl") # helper functions include("additional_functions/helper.jl") -include("additional_functions/parameters.jl") -include("additional_functions/start_val/start_val.jl") include("additional_functions/start_val/start_fabin3.jl") -include("additional_functions/start_val/start_partable.jl") include("additional_functions/start_val/start_simple.jl") include("additional_functions/artifacts.jl") include("additional_functions/simulation.jl") -# identifier -include("additional_functions/identifier.jl") # fit measures include("frontend/fit/fitmeasures/AIC.jl") include("frontend/fit/fitmeasures/BIC.jl") include("frontend/fit/fitmeasures/chi2.jl") -include("frontend/fit/fitmeasures/df.jl") +include("frontend/fit/fitmeasures/dof.jl") include("frontend/fit/fitmeasures/minus2ll.jl") -include("frontend/fit/fitmeasures/n_par.jl") -include("frontend/fit/fitmeasures/n_obs.jl") include("frontend/fit/fitmeasures/p.jl") include("frontend/fit/fitmeasures/RMSEA.jl") -include("frontend/fit/fitmeasures/n_man.jl") include("frontend/fit/fitmeasures/fit_measures.jl") # standard errors include("frontend/fit/standard_errors/hessian.jl") include("frontend/fit/standard_errors/bootstrap.jl") +# extensions +include("package_extensions/SEMNLOptExt.jl") +include("package_extensions/SEMProximalOptExt.jl") export AbstractSem, AbstractSemSingle, AbstractSemCollection, + coef, + coefnames, + coeftable, Sem, SemFiniteDiff, SemEnsemble, - SemImply, + MeanStruct, + NoMeanStruct, + HasMeanStruct, + HessianEval, + ExactHessian, + ApproxHessian, + SemImplied, RAMSymbolic, RAM, - ImplyEmpty, - imply, + ImpliedEmpty, + implied, start_val, start_fabin3, start_simple, - start_parameter_table, SemLoss, SemLossFunction, SemML, @@ -115,8 +125,6 @@ export AbstractSem, SemOptimizer, SemOptimizerEmpty, SemOptimizerOptim, - SemOptimizerNLopt, - NLoptConstraint, optimizer, n_iterations, convergence, @@ -125,11 +133,16 @@ export AbstractSem, SemObservedCovariance, SemObservedMissing, observed, - sem_fit, + obs_cov, + obs_mean, + nsamples, + nobs, + samples, + fit, SemFit, minimum, solution, - sem_summary, + details, objective!, gradient!, hessian!, @@ -137,6 +150,8 @@ export AbstractSem, objective_hessian!, gradient_hessian!, objective_gradient_hessian!, + SemSpecification, + RAMMatrices, ParameterTable, EnsembleParameterTable, update_partable!, @@ -149,30 +164,40 @@ export AbstractSem, start, Label, label, - get_identifier_indices, - RAMMatrices, - identifier, + nvars, + vars, + nlatent_vars, + latent_vars, + nobserved_vars, + observed_vars, + sort_vars!, + sort_vars, + params, + params!, + nparams, + param_indices, + param_labels, fit_measures, AIC, BIC, χ², - df, + dof, fit_measures, minus2ll, - n_par, - n_obs, p_value, RMSEA, - n_man, EmMVNModel, se_hessian, se_bootstrap, example_data, - swap_observed, + replace_observed, update_observed, @StenoGraph, →, ←, ↔, - ⇔ + ⇔, + SemOptimizerNLopt, + NLoptConstraint, + SemOptimizerProximal end diff --git a/src/additional_functions/commutation_matrix.jl b/src/additional_functions/commutation_matrix.jl new file mode 100644 index 000000000..345f809e0 --- /dev/null +++ b/src/additional_functions/commutation_matrix.jl @@ -0,0 +1,75 @@ +""" + + transpose_linear_indices(n, [m]) + +Put each linear index of the *n×m* matrix to the position of the +corresponding element in the transposed matrix. + +## Example +` +1 4 +2 5 => 1 2 3 +3 6 4 5 6 +` +""" +transpose_linear_indices(n::Integer, m::Integer = n) = + repeat(1:n, inner = m) .+ repeat((0:(m-1)) * n, outer = n) + +""" + CommutationMatrix(n::Integer) <: AbstractMatrix{Int} + +A *commutation matrix* *C* is a n²×n² matrix of 0s and 1s. +If *vec(A)* is a vectorized form of a n×n matrix *A*, +then ``C * vec(A) = vec(Aᵀ)``. +""" +struct CommutationMatrix <: AbstractMatrix{Int} + n::Int + n²::Int + transpose_inds::Vector{Int} # maps the linear indices of n×n matrix *B* to the indices of matrix *B'* + + CommutationMatrix(n::Integer) = new(n, n^2, transpose_linear_indices(n)) +end + +Base.size(A::CommutationMatrix) = (A.n², A.n²) +Base.size(A::CommutationMatrix, dim::Integer) = + 1 <= dim <= 2 ? A.n² : throw(ArgumentError("invalid matrix dimension $dim")) +Base.length(A::CommutationMatrix) = A.n²^2 +Base.getindex(A::CommutationMatrix, i::Int, j::Int) = j == A.transpose_inds[i] ? 1 : 0 + +function Base.:(*)(A::CommutationMatrix, B::AbstractVector) + size(A, 2) == size(B, 1) || throw( + DimensionMismatch("A has $(size(A, 2)) columns, but B has $(size(B, 1)) elements"), + ) + return B[A.transpose_inds] +end + +function Base.:(*)(A::CommutationMatrix, B::AbstractMatrix) + size(A, 2) == size(B, 1) || throw( + DimensionMismatch("A has $(size(A, 2)) columns, but B has $(size(B, 1)) rows"), + ) + return B[A.transpose_inds, :] +end + +function Base.:(*)(A::CommutationMatrix, B::SparseMatrixCSC) + size(A, 2) == size(B, 1) || throw( + DimensionMismatch("A has $(size(A, 2)) columns, but B has $(size(B, 1)) rows"), + ) + return SparseMatrixCSC( + size(B, 1), + size(B, 2), + copy(B.colptr), + A.transpose_inds[B.rowval], + copy(B.nzval), + ) +end + +function LinearAlgebra.lmul!(A::CommutationMatrix, B::SparseMatrixCSC) + size(A, 2) == size(B, 1) || throw( + DimensionMismatch("A has $(size(A, 2)) columns, but B has $(size(B, 1)) rows"), + ) + + @inbounds for (i, rowind) in enumerate(B.rowval) + B.rowval[i] = A.transpose_inds[rowind] + end + return B +end diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index abc37207c..5559034e0 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -21,27 +21,19 @@ function make_onelement_array(A) end =# -function semvec(observed, imply, loss, optimizer) +function semvec(observed, implied, loss, optimizer) observed = make_onelement_array(observed) - imply = make_onelement_array(imply) + implied = make_onelement_array(implied) loss = make_onelement_array(loss) optimizer = make_onelement_array(optimizer) - #sem_vec = Array{AbstractSem}(undef, maximum(length.([observed, imply, loss, optimizer]))) - sem_vec = Sem.(observed, imply, loss, optimizer) + #sem_vec = Array{AbstractSem}(undef, maximum(length.([observed, implied, loss, optimizer]))) + sem_vec = Sem.(observed, implied, loss, optimizer) return sem_vec end -function get_observed(rowind, data, semobserved; args = (), kwargs = NamedTuple()) - observed_vec = Vector{semobserved}(undef, length(rowind)) - for i in 1:length(rowind) - observed_vec[i] = semobserved(args...; data = Matrix(data[rowind[i], :]), kwargs...) - end - return observed_vec -end - -skipmissing_mean(mat::AbstractMatrix) = +skipmissing_mean(mat::AbstractMatrix) = [mean(skipmissing(coldata)) for coldata in eachcol(mat)] function F_one_person(imp_mean, meandiff, inverse, data, logdet) @@ -106,148 +98,49 @@ function sparse_outer_mul!(C, A, B::Vector, ind) #computes A*S*B -> C, where ind end end -function cov_and_mean(rows; corrected = false) - obs_mean, obs_cov = StatsBase.mean_and_cov(reduce(hcat, rows), 2, corrected = corrected) - return obs_cov, vec(obs_mean) -end - -function duplication_matrix(nobs) - nobs = Int(nobs) - n1 = Int(nobs * (nobs + 1) * 0.5) - n2 = Int(nobs^2) - Dt = zeros(n1, n2) - - for j in 1:nobs - for i in j:nobs - u = zeros(n1) - u[Int((j - 1) * nobs + i - 0.5 * j * (j - 1))] = 1 - T = zeros(nobs, nobs) - T[j, i] = 1 - T[i, j] = 1 - Dt += u * transpose(vec(T)) +# n²×(n(n+1)/2) matrix to transform a vector of lower +# triangular entries into a vectorized form of a n×n symmetric matrix, +# opposite of elimination_matrix() +function duplication_matrix(n::Integer) + ntri = div(n * (n + 1), 2) + D = zeros(n^2, ntri) + for j in 1:n + for i in j:n + tri_ix = (j - 1) * n + i - div(j * (j - 1), 2) + D[j+n*(i-1), tri_ix] = 1 + D[i+n*(j-1), tri_ix] = 1 end end - D = transpose(Dt) return D end -function elimination_matrix(nobs) - nobs = Int(nobs) - n1 = Int(nobs * (nobs + 1) * 0.5) - n2 = Int(nobs^2) - L = zeros(n1, n2) - - for j in 1:nobs - for i in j:nobs - u = zeros(n1) - u[Int((j - 1) * nobs + i - 0.5 * j * (j - 1))] = 1 - T = zeros(nobs, nobs) - T[i, j] = 1 - L += u * transpose(vec(T)) +# (n(n+1)/2)×n² matrix to transform a +# vectorized form of a n×n symmetric matrix +# into vector of its lower triangular entries, +# opposite of duplication_matrix() +function elimination_matrix(n::Integer) + ntri = div(n * (n + 1), 2) + L = zeros(ntri, n^2) + for j in 1:n + for i in j:n + tri_ix = (j - 1) * n + i - div(j * (j - 1), 2) + L[tri_ix, i+n*(j-1)] = 1 end end return L end -function commutation_matrix(n; tosparse = false) - M = zeros(n^2, n^2) - - for i in 1:n - for j in 1:n - M[i+n*(j-1), j+n*(i-1)] = 1.0 - end - end - - if tosparse - M = sparse(M) - end - - return M -end - -function commutation_matrix_pre_square(A) - n2 = size(A, 1) - n = Int(sqrt(n2)) - - ind = repeat(1:n, inner = n) - indadd = (0:(n-1)) * n - for i in 1:n - ind[((i-1)*n+1):i*n] .+= indadd - end - - A_post = A[ind, :] - - return A_post -end - -function commutation_matrix_pre_square_add!(B, A) # comuptes B + KₙA - n2 = size(A, 1) - n = Int(sqrt(n2)) - - ind = repeat(1:n, inner = n) - indadd = (0:(n-1)) * n - for i in 1:n - ind[((i-1)*n+1):i*n] .+= indadd - end - - @views @inbounds B .+= A[ind, :] - - return B -end - -function get_commutation_lookup(n2::Int64) - n = Int(sqrt(n2)) - ind = repeat(1:n, inner = n) - indadd = (0:(n-1)) * n - for i in 1:n - ind[((i-1)*n+1):i*n] .+= indadd - end - - lookup = Dict{Int64, Int64}() - - for i in 1:n2 - j = findall(x -> (x == i), ind)[1] - push!(lookup, i => j) - end - - return lookup -end - -function commutation_matrix_pre_square!(A::SparseMatrixCSC, lookup) # comuptes B + KₙA - for (i, rowind) in enumerate(A.rowval) - A.rowval[i] = lookup[rowind] - end -end - -function commutation_matrix_pre_square!(A::SparseMatrixCSC) # computes KₙA - lookup = get_commutation_lookup(size(A, 2)) - commutation_matrix_pre_square!(A, lookup) -end - -function commutation_matrix_pre_square(A::SparseMatrixCSC) - B = copy(A) - commutation_matrix_pre_square!(B) - return B -end - -function commutation_matrix_pre_square(A::SparseMatrixCSC, lookup) - B = copy(A) - commutation_matrix_pre_square!(B, lookup) - return B -end - -function commutation_matrix_pre_square_add_mt!(B, A) # comuptes B + KₙA # 0 allocations but slower - n2 = size(A, 1) - n = Int(sqrt(n2)) - - indadd = (0:(n-1)) * n - - Threads.@threads for i in 1:n - for j in 1:n - row = i + indadd[j] - @views @inbounds B[row, :] .+= A[row, :] +# returns the vector of non-unique values in the order of appearance +# each non-unique values is reported once +function nonunique(values::AbstractVector) + value_counts = Dict{eltype(values), Int}() + res = similar(values, 0) + for v in values + n = get!(value_counts, v, 0) + if n == 1 # second encounter + push!(res, v) end + value_counts[v] = n + 1 end - - return B + return res end diff --git a/src/additional_functions/identifier.jl b/src/additional_functions/identifier.jl deleted file mode 100644 index fefcc1be5..000000000 --- a/src/additional_functions/identifier.jl +++ /dev/null @@ -1,59 +0,0 @@ -############################################################################################ -# get parameter identifier -############################################################################################ - -identifier(sem_fit::SemFit) = identifier(sem_fit.model) -identifier(model::AbstractSemSingle) = identifier(model.imply) -identifier(model::SemEnsemble) = model.identifier - -############################################################################################ -# construct identifier -############################################################################################ - -identifier(ram_matrices::RAMMatrices) = - Dict{Symbol, Int64}(ram_matrices.parameters .=> 1:length(ram_matrices.parameters)) -function identifier(partable::ParameterTable) - _, _, identifier = get_par_npar_identifier(partable) - return identifier -end - -############################################################################################ -# get indices of a Vector of parameter labels -############################################################################################ - -get_identifier_indices(parameters, identifier::Dict{Symbol, Int}) = - [identifier[par] for par in parameters] - -get_identifier_indices( - parameters, - obj::Union{SemFit, AbstractSemSingle, SemEnsemble, SemImply}, -) = get_identifier_indices(parameters, identifier(obj)) - -function get_identifier_indices(parameters, obj::Union{ParameterTable, RAMMatrices}) - @warn "You are trying to find parameter indices from a ParameterTable or RAMMatrices object. \n - If your model contains user-defined types, this may lead to wrong results. \n - To be on the safe side, try to reference parameters by labels or query the indices from - the constructed model (`get_identifier_indices(parameters, model)`)." maxlog = 1 - return get_identifier_indices(parameters, identifier(obj)) -end - -############################################################################################ -# documentation -############################################################################################ -""" - get_identifier_indices(parameters, model) - -Returns the indices of `parameters`. - -# Arguments -- `parameters::Vector{Symbol}`: parameter labels -- `model`: either a SEM or a fitted SEM - -# Examples -```julia -parameter_indices = get_identifier_indices([:λ₁, λ₂], my_fitted_sem) - -values = solution(my_fitted_sem)[parameter_indices] -``` -""" -function get_identifier_indices end diff --git a/src/additional_functions/parameters.jl b/src/additional_functions/parameters.jl deleted file mode 100644 index 8d01b3747..000000000 --- a/src/additional_functions/parameters.jl +++ /dev/null @@ -1,137 +0,0 @@ -# fill A, S, and M matrices with the parameter values according to the parameters map -function fill_A_S_M!( - A::AbstractMatrix, - S::AbstractMatrix, - M::Union{AbstractVector, Nothing}, - A_indices::AbstractArrayParamsMap, - S_indices::AbstractArrayParamsMap, - M_indices::Union{AbstractArrayParamsMap, Nothing}, - parameters::AbstractVector, -) - @inbounds for (iA, iS, par) in zip(A_indices, S_indices, parameters) - for index_A in iA - A[index_A] = par - end - - for index_S in iS - S[index_S] = par - end - end - - if !isnothing(M) - @inbounds for (iM, par) in zip(M_indices, parameters) - for index_M in iM - M[index_M] = par - end - end - end -end - -# build the map from the index of the parameter to the linear indices -# of this parameter occurences in M -# returns ArrayParamsMap object -function array_parameters_map(parameters::AbstractVector, M::AbstractArray) - params_index = Dict(param => i for (i, param) in enumerate(parameters)) - T = Base.eltype(eachindex(M)) - res = [Vector{T}() for _ in eachindex(parameters)] - for (i, val) in enumerate(M) - par_ind = get(params_index, val, nothing) - if !isnothing(par_ind) - push!(res[par_ind], i) - end - end - return res -end - -function eachindex_lower(M; linear_indices = false, kwargs...) - indices = CartesianIndices(M) - indices = filter(x -> (x[1] >= x[2]), indices) - - if linear_indices - indices = cartesian2linear(indices, M) - end - - return indices -end - -function cartesian2linear(ind_cart, dims) - ind_lin = LinearIndices(dims)[ind_cart] - return ind_lin -end - -function linear2cartesian(ind_lin, dims) - ind_cart = CartesianIndices(dims)[ind_lin] - return ind_cart -end - -function set_constants!(M, M_pre) - for index in eachindex(M) - δ = tryparse(Float64, string(M[index])) - - if !iszero(M[index]) & (δ !== nothing) - M_pre[index] = δ - end - end -end - -function check_constants(M) - for index in eachindex(M) - δ = tryparse(Float64, string(M[index])) - - if !iszero(M[index]) & (δ !== nothing) - return true - end - end - - return false -end - -# construct length(M)×length(parameters) sparse matrix of 1s at the positions, -# where the corresponding parameter occurs in the M matrix -function matrix_gradient(M_indices::ArrayParamsMap, M_length::Integer) - rowval = reduce(vcat, M_indices) - colptr = - pushfirst!(accumulate((ptr, M_ind) -> ptr + length(M_ind), M_indices, init = 1), 1) - return SparseMatrixCSC( - M_length, - length(M_indices), - colptr, - rowval, - ones(length(rowval)), - ) -end - -# fill M with parameters -function fill_matrix!( - M::AbstractMatrix, - M_indices::AbstractArrayParamsMap, - parameters::AbstractVector, -) - for (iM, par) in zip(M_indices, parameters) - for index_M in iM - M[index_M] = par - end - end - return M -end - -# range of parameters that are referenced in the matrix -function param_range(mtx_indices::AbstractArrayParamsMap) - first_i = findfirst(!isempty, mtx_indices) - last_i = findlast(!isempty, mtx_indices) - - if !isnothing(first_i) && !isnothing(last_i) - for i in first_i:last_i - if isempty(mtx_indices[i]) - # TODO show which parameter is missing in which matrix - throw( - ErrorException( - "Your parameter vector is not partitioned into directed and undirected effects", - ), - ) - end - end - end - - return first_i:last_i -end diff --git a/src/additional_functions/params_array.jl b/src/additional_functions/params_array.jl new file mode 100644 index 000000000..1031e349e --- /dev/null +++ b/src/additional_functions/params_array.jl @@ -0,0 +1,271 @@ +""" +Array with partially parameterized elements. +""" +struct ParamsArray{T, N} <: AbstractArray{T, N} + linear_indices::Vector{Int} # linear indices of the parameter refs in the destination array + nz_indices::Vector{Int} # indices of the parameters refs in nonzero elements vector + # (including the constants) ordered by the linear index + param_ptr::Vector{Int} # i-th element marks the start of the range in linear/nonzero + # indices arrays that corresponds to the i-th parameter + # (nparams + 1 elements) + constants::Vector{Tuple{Int, Int, T}} # linear index, index in nonzero vector, value + size::NTuple{N, Int} # size of the destination array +end + +ParamsVector{T} = ParamsArray{T, 1} +ParamsMatrix{T} = ParamsArray{T, 2} + +function ParamsArray{T, N}( + params_map::AbstractVector{<:AbstractVector{Int}}, + constants::Vector{Pair{Int, T}}, + size::NTuple{N, Int}, +) where {T, N} + params_ptr = + pushfirst!(accumulate((ptr, inds) -> ptr + length(inds), params_map, init = 1), 1) + param_lin_inds = collect(Iterators.flatten(params_map)) + nz_lin_inds = unique!(sort!([param_lin_inds; first.(constants)])) + if length(nz_lin_inds) < length(param_lin_inds) + length(constants) + throw(ArgumentError("Duplicate linear indices in the parameterized array")) + end + return ParamsArray{T, N}( + param_lin_inds, + searchsortedfirst.(Ref(nz_lin_inds), param_lin_inds), + params_ptr, + [(c[1], searchsortedfirst(nz_lin_inds, c[1]), c[2]) for c in constants], + size, + ) +end + +function ParamsArray{T, N}( + arr::AbstractArray{<:Any, N}, + params::AbstractVector{Symbol}; + skip_zeros::Bool = true, +) where {T, N} + params_index = Dict(param => i for (i, param) in enumerate(params)) + constants = Vector{Pair{Int, T}}() + params_map = [Vector{Int}() for _ in eachindex(params)] + arr_ixs = CartesianIndices(arr) + for (i, val) in pairs(vec(arr)) + ismissing(val) && continue + if isa(val, Number) + (skip_zeros && iszero(val)) || push!(constants, i => val) + else + par_ind = get(params_index, val, nothing) + if !isnothing(par_ind) + push!(params_map[par_ind], i) + else + throw(KeyError("Unrecognized parameter $val at position $(arr_ixs[i])")) + end + end + end + return ParamsArray{T, N}(params_map, constants, size(arr)) +end + +ParamsArray{T}( + arr::AbstractArray{<:Any, N}, + params::AbstractVector{Symbol}; + kwargs..., +) where {T, N} = ParamsArray{T, N}(arr, params; kwargs...) + +nparams(arr::ParamsArray) = length(arr.param_ptr) - 1 +SparseArrays.nnz(arr::ParamsArray) = length(arr.linear_indices) + length(arr.constants) + +Base.size(arr::ParamsArray) = arr.size +Base.size(arr::ParamsArray, i::Integer) = arr.size[i] + +Base.:(==)(a::ParamsArray, b::ParamsArray) = return eltype(a) == eltype(b) && + size(a) == size(b) && + a.constants == b.constants && + a.param_ptr == b.param_ptr && + a.linear_indices == b.linear_indices + +Base.hash(a::ParamsArray, h::UInt) = hash( + typeof(a), + hash( + eltype(a), + hash(size(a), hash(a.constants, hash(a.param_ptr, hash(a.linear_indices, h)))), + ), +) + +# the range of arr.param_ptr indices that correspond to i-th parameter +param_occurences_range(arr::ParamsArray, i::Integer) = + arr.param_ptr[i]:(arr.param_ptr[i+1]-1) + +""" + param_occurences(arr::ParamsArray, i::Integer) + +Get the linear indices of the elements in `arr` that correspond to the +`i`-th parameter. +""" +param_occurences(arr::ParamsArray, i::Integer) = + view(arr.linear_indices, arr.param_ptr[i]:(arr.param_ptr[i+1]-1)) + +""" + materialize!(dest::AbstractArray{<:Any, N}, src::ParamsArray{<:Any, N}, + params::AbstractVector; + set_constants::Bool = true, + set_zeros::Bool = false) + +Materialize the parameterized array `src` into `dest` by substituting the parameter +references with the parameter values from `params`. +""" +function materialize!( + dest::AbstractArray{<:Any, N}, + src::ParamsArray{<:Any, N}, + params::AbstractVector; + set_constants::Bool = true, + set_zeros::Bool = false, +) where {N} + size(dest) == size(src) || throw( + DimensionMismatch( + "Parameters ($(size(params_arr))) and destination ($(size(dest))) array sizes don't match", + ), + ) + nparams(src) == length(params) || throw( + DimensionMismatch( + "Number of values ($(length(params))) does not match the number of parameters ($(nparams(src)))", + ), + ) + Z = eltype(dest) <: Number ? eltype(dest) : eltype(src) + set_zeros && fill!(dest, zero(Z)) + if set_constants + @inbounds for (i, _, val) in src.constants + dest[i] = val + end + end + @inbounds for (i, val) in enumerate(params) + for j in param_occurences_range(src, i) + dest[src.linear_indices[j]] = val + end + end + return dest +end + +function materialize!( + dest::SparseMatrixCSC, + src::ParamsMatrix, + params::AbstractVector; + set_constants::Bool = true, + set_zeros::Bool = false, +) + set_zeros && throw(ArgumentError("Cannot set zeros for sparse matrix")) + size(dest) == size(src) || throw( + DimensionMismatch( + "Parameters ($(size(params_arr))) and destination ($(size(dest))) array sizes don't match", + ), + ) + nparams(src) == length(params) || throw( + DimensionMismatch( + "Number of values ($(length(params))) does not match the number of parameters ($(nparams(src)))", + ), + ) + + nnz(dest) == nnz(src) || throw( + DimensionMismatch( + "Number of non-zero elements ($(nnz(dest))) does not match the number of parameter references and constants ($(nnz(src)))", + ), + ) + if set_constants + @inbounds for (_, j, val) in src.constants + dest.nzval[j] = val + end + end + @inbounds for (i, val) in enumerate(params) + for j in param_occurences_range(src, i) + dest.nzval[src.nz_indices[j]] = val + end + end + return dest +end + +""" + materialize([T], src::ParamsArray{<:Any, N}, + params::AbstractVector{T}) where T + +Materialize the parameterized array `src` into a new array of type `T` +by substituting the parameter references with the parameter values from `params`. +""" +materialize(::Type{T}, arr::ParamsArray, params::AbstractVector) where {T} = + materialize!(similar(arr, T), arr, params, set_constants = true, set_zeros = true) + +materialize(arr::ParamsArray, params::AbstractVector{T}) where {T} = + materialize(Union{T, eltype(arr)}, arr, params) + +# the hack to update the structured matrix (should be fine since the structure is imposed by ParamsMatrix) +materialize!( + dest::Union{Symmetric, LowerTriangular, UpperTriangular}, + src::ParamsMatrix{<:Any}, + params::AbstractVector; + kwargs..., +) = materialize!(parent(dest), src, params; kwargs...) + +function sparse_materialize( + ::Type{T}, + arr::ParamsMatrix, + params::AbstractVector, +) where {T} + nparams(arr) == length(params) || throw( + DimensionMismatch( + "Number of values ($(length(params))) does not match the number of parameter ($(nparams(arr)))", + ), + ) + + nz_vals = Vector{T}(undef, nnz(arr)) + nz_lininds = Vector{Int}(undef, nnz(arr)) + # fill constants + @inbounds for (lin_ind, nz_ind, val) in arr.constants + nz_vals[nz_ind] = val + nz_lininds[nz_ind] = lin_ind + end + # fill parameters + @inbounds for (i, val) in enumerate(params) + for j in param_occurences_range(arr, i) + nz_ind = arr.nz_indices[j] + nz_vals[nz_ind] = val + nz_lininds[nz_ind] = arr.linear_indices[j] + end + end + arr_ixs = CartesianIndices(size(arr)) + return sparse( + [arr_ixs[i][1] for i in nz_lininds], + [arr_ixs[i][2] for i in nz_lininds], + nz_vals, + size(arr)..., + ) +end + +sparse_materialize(arr::ParamsArray, params::AbstractVector{T}) where {T} = + sparse_materialize(Union{T, eltype(arr)}, arr, params) + +# construct length(M)×length(params) sparse matrix of 1s at the positions, +# where the corresponding parameter occurs in the arr +sparse_gradient(::Type{T}, arr::ParamsArray) where {T} = SparseMatrixCSC( + length(arr), + nparams(arr), + arr.param_ptr, + arr.linear_indices, + ones(T, length(arr.linear_indices)), +) + +sparse_gradient(arr::ParamsArray{T}) where {T} = sparse_gradient(T, arr) + +# range of parameters that are referenced in the matrix +function params_range(arr::ParamsArray; allow_gaps::Bool = false) + first_i = findfirst(i -> arr.param_ptr[i+1] > arr.param_ptr[i], 1:nparams(arr)-1) + last_i = findlast(i -> arr.param_ptr[i+1] > arr.param_ptr[i], 1:nparams(arr)-1) + + if !allow_gaps && !isnothing(first_i) && !isnothing(last_i) + for i in first_i:last_i + if isempty(param_occurences_range(arr, i)) + # TODO show which parameter is missing in which matrix + throw( + ErrorException( + "Parameter vector is not partitioned into directed and undirected effects", + ), + ) + end + end + end + + return first_i:last_i +end diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl index 58e8432e1..27d58f93f 100644 --- a/src/additional_functions/simulation.jl +++ b/src/additional_functions/simulation.jl @@ -1,27 +1,27 @@ """ - (1) swap_observed(model::AbstractSemSingle; kwargs...) + (1) replace_observed(model::AbstractSemSingle; kwargs...) - (2) swap_observed(model::AbstractSemSingle, observed; kwargs...) + (2) replace_observed(model::AbstractSemSingle, observed; kwargs...) Return a new model with swaped observed part. # Arguments -- `model::AbstractSemSingle`: optimization algorithm. +- `model::AbstractSemSingle`: model to swap the observed part of. - `kwargs`: additional keyword arguments; typically includes `data = ...` - `observed`: Either an object of subtype of `SemObserved` or a subtype of `SemObserved` # Examples -See the online documentation on [Swap observed data](@ref). +See the online documentation on [Replace observed data](@ref). """ -function swap_observed end +function replace_observed end """ update_observed(to_update, observed::SemObserved; kwargs...) -Update a `SemImply`, `SemLossFunction` or `SemOptimizer` object to use a `SemObserved` object. +Update a `SemImplied`, `SemLossFunction` or `SemOptimizer` object to use a `SemObserved` object. # Examples -See the online documentation on [Swap observed data](@ref). +See the online documentation on [Replace observed data](@ref). # Implementation You can provide a method for this function when defining a new type, for more information @@ -34,30 +34,28 @@ function update_observed end ############################################################################################ # use the same observed type as before -swap_observed(model::AbstractSemSingle; kwargs...) = - swap_observed(model, typeof(observed(model)).name.wrapper; kwargs...) +replace_observed(model::AbstractSemSingle; kwargs...) = + replace_observed(model, typeof(observed(model)).name.wrapper; kwargs...) # construct a new observed type -swap_observed(model::AbstractSemSingle, observed_type; kwargs...) = - swap_observed(model, observed_type(; kwargs...); kwargs...) +replace_observed(model::AbstractSemSingle, observed_type; kwargs...) = + replace_observed(model, observed_type(; kwargs...); kwargs...) -swap_observed(model::AbstractSemSingle, new_observed::SemObserved; kwargs...) = - swap_observed( +replace_observed(model::AbstractSemSingle, new_observed::SemObserved; kwargs...) = + replace_observed( model, observed(model), - imply(model), + implied(model), loss(model), - optimizer(model), new_observed; kwargs..., ) -function swap_observed( +function replace_observed( model::AbstractSemSingle, old_observed, - imply, + implied, loss, - optimizer, new_observed::SemObserved; kwargs..., ) @@ -66,29 +64,24 @@ function swap_observed( # get field types kwargs[:observed_type] = typeof(new_observed) kwargs[:old_observed_type] = typeof(old_observed) - kwargs[:imply_type] = typeof(imply) + kwargs[:implied_type] = typeof(implied) kwargs[:loss_types] = [typeof(lossfun) for lossfun in loss.functions] - kwargs[:optimizer_type] = typeof(optimizer) - # update imply - imply = update_observed(imply, new_observed; kwargs...) - kwargs[:imply] = imply - kwargs[:n_par] = n_par(imply) + # update implied + implied = update_observed(implied, new_observed; kwargs...) + kwargs[:implied] = implied + kwargs[:nparams] = nparams(implied) # update loss loss = update_observed(loss, new_observed; kwargs...) kwargs[:loss] = loss - # update optimizer - optimizer = update_observed(optimizer, new_observed; kwargs...) - - #new_imply = update_observed(model.imply, new_observed; kwargs...) + #new_implied = update_observed(model.implied, new_observed; kwargs...) return Sem( new_observed, - update_observed(model.imply, new_observed; kwargs...), + update_observed(model.implied, new_observed; kwargs...), update_observed(model.loss, new_observed; kwargs...), - update_observed(model.optimizer, new_observed; kwargs...), ) end @@ -98,3 +91,44 @@ function update_observed(loss::SemLoss, new_observed; kwargs...) ) return SemLoss(new_functions, loss.weights) end + +############################################################################################ +# simulate data +############################################################################################ +""" + (1) rand(model::AbstractSemSingle, params, n) + + (2) rand(model::AbstractSemSingle, n) + +Sample normally distributed data from the model-implied covariance matrix and mean vector. + +# Arguments +- `model::AbstractSemSingle`: model to simulate from. +- `params`: parameter values to simulate from. +- `n::Integer`: Number of samples. + +# Examples +```julia +rand(model, start_simple(model), 100) +``` +""" +function Distributions.rand( + model::AbstractSemSingle{O, I, L}, + params, + n::Integer, +) where {O, I <: Union{RAM, RAMSymbolic}, L} + update!(EvaluationTargets{true, false, false}(), model.implied, model, params) + return rand(model, n) +end + +function Distributions.rand( + model::AbstractSemSingle{O, I, L}, + n::Integer, +) where {O, I <: Union{RAM, RAMSymbolic}, L} + if MeanStruct(model.implied) === NoMeanStruct + data = permutedims(rand(MvNormal(Symmetric(model.implied.Σ)), n)) + elseif MeanStruct(model.implied) === HasMeanStruct + data = permutedims(rand(MvNormal(model.implied.μ, Symmetric(model.implied.Σ)), n)) + end + return data +end diff --git a/src/additional_functions/start_val/start_fabin3.jl b/src/additional_functions/start_val/start_fabin3.jl index ee7dcb8cf..bd55f21d7 100644 --- a/src/additional_functions/start_val/start_fabin3.jl +++ b/src/additional_functions/start_val/start_fabin3.jl @@ -8,49 +8,48 @@ function start_fabin3 end # splice model and loss functions function start_fabin3(model::AbstractSemSingle; kwargs...) - return start_fabin3( - model.observed, - model.imply, - model.optimizer, - model.loss.functions..., - kwargs..., - ) + return start_fabin3(model.observed, model.implied, model.loss.functions..., kwargs...) end -function start_fabin3(observed, imply, optimizer, args...; kwargs...) - return start_fabin3(imply.ram_matrices, obs_cov(observed), obs_mean(observed)) +function start_fabin3(observed, implied, args...; kwargs...) + return start_fabin3(implied.ram_matrices, obs_cov(observed), obs_mean(observed)) end # SemObservedMissing -function start_fabin3(observed::SemObservedMissing, imply, optimizer, args...; kwargs...) +function start_fabin3(observed::SemObservedMissing, implied, args...; kwargs...) if !observed.em_model.fitted em_mvn(observed; kwargs...) end - return start_fabin3(imply.ram_matrices, observed.em_model.Σ, observed.em_model.μ) + return start_fabin3(implied.ram_matrices, observed.em_model.Σ, observed.em_model.μ) end -function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) - A_ind, S_ind, F_ind, M_ind, parameters = ram_matrices.A_ind, - ram_matrices.S_ind, - ram_matrices.F_ind, - ram_matrices.M_ind, - ram_matrices.parameters +function start_fabin3( + ram_matrices::RAMMatrices, + Σ::AbstractMatrix, + μ::Union{AbstractVector, Nothing}, +) + A, S, F, M, n_par = ram_matrices.A, + ram_matrices.S, + ram_matrices.F, + ram_matrices.M, + nparams(ram_matrices) + + if !isnothing(M) && isnothing(μ) + throw(ArgumentError("RAM has meanstructure, but no observed means provided.")) + end - n_par = length(parameters) start_val = zeros(n_par) - n_var, n_nod = ram_matrices.size_F - n_latent = n_nod - n_var - - C_indices = CartesianIndices((n_nod, n_nod)) + F_var2obs = Dict( + i => F.rowval[F.colptr[i]] for i in axes(F, 2) if isobserved_var(ram_matrices, i) + ) + @assert length(F_var2obs) == size(F, 1) # check in which matrix each parameter appears - indices = Vector{CartesianIndex{2}}(undef, n_par) - #= in_S = length.(S_ind) .!= 0 in_A = length.(A_ind) .!= 0 - A_ind_c = [linear2cartesian(ind, (n_nod, n_nod)) for ind in A_ind] + A_ind_c = [linear2cartesian(ind, (n_var, n_var)) for ind in A_ind] in_Λ = [any(ind[2] .∈ F_ind) for ind in A_ind_c] if !isnothing(M) @@ -65,26 +64,53 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) end =# # set undirected parameters in S - for (i, S_ind) in enumerate(S_ind) - for c_ind in C_indices[S_ind] - (c_ind[1] == c_ind[2]) || continue # covariances stay 0 - pos = searchsortedfirst(F_ind, c_ind[1]) - start_val[i] = - (pos <= length(F_ind)) && (F_ind[pos] == c_ind[1]) ? Σ[pos, pos] / 2 : 0.05 - break # i-th parameter initialized + S_indices = CartesianIndices(S) + for j in 1:nparams(S) + for lin_ind in param_occurences(S, j) + to, from = Tuple(S_indices[lin_ind]) + if (to == from) # covariances start with 0 + # half of observed variance for observed, 0.05 for latent + obs = get(F_var2obs, to, nothing) + start_val[j] = !isnothing(obs) ? Σ[obs, obs] / 2 : 0.05 + break # j-th parameter initialized + end end end # set loadings - constants = ram_matrices.constants - A_ind_c = [linear2cartesian(ind, (n_nod, n_nod)) for ind in A_ind] + A_indices = CartesianIndices(A) # ind_Λ = findall([is_in_Λ(ind_vec, F_ind) for ind_vec in A_ind_c]) - function calculate_lambda( - ref::Integer, - indicator::Integer, - indicators::AbstractVector{<:Integer}, - ) + # collect latent variable indicators in A + # maps latent parameter to the vector of dependent vars + # the 2nd index in the pair specified the parameter index, + # 0 if no parameter (constant), -1 if constant=1 + var2indicators = Dict{Int, Vector{Pair{Int, Int}}}() + for j in 1:nparams(A) + for lin_ind in param_occurences(A, j) + to, from = Tuple(A_indices[lin_ind]) + haskey(F_var2obs, from) && continue # skip observed + obs = get(F_var2obs, to, nothing) + if !isnothing(obs) + indicators = get!(() -> Vector{Pair{Int, Int}}(), var2indicators, from) + push!(indicators, obs => j) + end + end + end + + for (lin_ind, val) in A.constants + iszero(val) && continue # only non-zero loadings + to, from = Tuple(A_indices[lin_ind]) + haskey(F_var2obs, from) && continue # skip observed + obs = get(F_var2obs, to, nothing) + if !isnothing(obs) + indicators = get!(() -> Vector{Pair{Int, Int}}(), var2indicators, from) + push!(indicators, obs => ifelse(isone(val), -1, 0)) # no parameter associated, -1 = reference, 0 = indicator + end + end + + # calculate starting values for parameters of latent regression vars + function calculate_lambda(ref::Integer, indicator::Integer, indicators::AbstractVector) instruments = filter(i -> (i != ref) && (i != indicator), indicators) if length(instruments) == 1 s13 = Σ[ref, instruments[1]] @@ -99,61 +125,33 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) end end - for i in setdiff(1:n_nod, F_ind) - reference = Int64[] - indicators = Int64[] - indicator2parampos = Dict{Int, Int}() - - for (j, Aj_ind_c) in enumerate(A_ind_c) - for ind_c in Aj_ind_c - (ind_c[2] == i) || continue - ind_pos = searchsortedfirst(F_ind, ind_c[1]) - if (ind_pos <= length(F_ind)) && (F_ind[ind_pos] == ind_c[1]) - push!(indicators, ind_pos) - indicator2parampos[ind_pos] = j - end - end - end - - for ram_const in constants - if (ram_const.matrix == :A) && (ram_const.index[2] == i) - ind_pos = searchsortedfirst(F_ind, ram_const.index[1]) - if (ind_pos <= length(F_ind)) && (F_ind[ind_pos] == ram_const.index[1]) - if isone(ram_const.value) - push!(reference, ind_pos) - else - push!(indicators, ind_pos) - # no parameter associated - end - end - end - end - + for (i, indicators) in pairs(var2indicators) + reference = [obs for (obs, param) in indicators if param == -1] + indicator_obs = first.(indicators) # is there at least one reference indicator? if length(reference) > 0 - if (length(reference) > 1) && isempty(indicator2parampos) # don't warn if entire column is fixed - @warn "You have more than 1 scaling indicator for $(ram_matrices.colnames[i])" + if (length(reference) > 1) && any(((obs, param),) -> param > 0, indicators) # don't warn if entire column is fixed + @warn "You have more than 1 scaling indicator for $(ram_matrices.vars[i])" end ref = reference[1] - for (j, indicator) in enumerate(indicators) - if (indicator != ref) && - (parampos = get(indicator2parampos, indicator, 0)) != 0 - start_val[parampos] = calculate_lambda(ref, indicator, indicators) + for (indicator, param) in indicators + if (indicator != ref) && (param > 0) + start_val[param] = calculate_lambda(ref, indicator, indicator_obs) end end # no reference indicator: - elseif length(indicators) > 0 - ref = indicators[1] - λ = Vector{Float64}(undef, length(indicators)) + else + ref = indicator_obs[1] + λ = Vector{Float64}(undef, length(indicator_obs)) λ[1] = 1.0 - for (j, indicator) in enumerate(indicators) + for (j, indicator) in enumerate(indicator_obs) if indicator != ref - λ[j] = calculate_lambda(ref, indicator, indicators) + λ[j] = calculate_lambda(ref, indicator, indicator_obs) end end - Σ_λ = Σ[indicators, indicators] + Σ_λ = Σ[indicator_obs, indicator_obs] l₂ = sum(abs2, λ) D = λ * λ' ./ l₂ θ = (I - D .^ 2) \ (diag(Σ_λ - D * Σ_λ * D)) @@ -164,24 +162,22 @@ function start_fabin3(ram_matrices::RAMMatrices, Σ, μ) λ .*= sign(Ψ) * sqrt(abs(Ψ)) - for (j, indicator) in enumerate(indicators) - if (parampos = get(indicator2parampos, indicator, 0)) != 0 - start_val[parampos] = λ[j] + for (j, (_, param)) in enumerate(indicators) + if param > 0 + start_val[param] = λ[j] end end - else - @warn "No scaling indicators for $(ram_matrices.colnames[i])" end end - # set means - if !isnothing(M_ind) - for (i, M_ind) in enumerate(M_ind) - if length(M_ind) != 0 - ind = M_ind[1] - pos = searchsortedfirst(F_ind, ind[1]) - if (pos <= length(F_ind)) && (F_ind[pos] == ind[1]) - start_val[i] = μ[pos] + if !isnothing(M) + # set starting values of the observed means + for j in 1:nparams(M) + M_ind = param_occurences(M, j) + if !isempty(M_ind) + obs = get(F_var2obs, M_ind[1], nothing) + if !isnothing(obs) + start_val[j] = μ[obs] end # latent means stay 0 end end diff --git a/src/additional_functions/start_val/start_partable.jl b/src/additional_functions/start_val/start_partable.jl deleted file mode 100644 index 01d06ac71..000000000 --- a/src/additional_functions/start_val/start_partable.jl +++ /dev/null @@ -1,50 +0,0 @@ -""" - start_parameter_table(model; parameter_table) - -Return a vector of starting values taken from `parameter_table`. -""" -function start_parameter_table end - -# splice model and loss functions -function start_parameter_table(model::AbstractSemSingle; kwargs...) - return start_parameter_table( - model.observed, - model.imply, - model.optimizer, - model.loss.functions...; - kwargs..., - ) -end - -# RAM(Symbolic) -function start_parameter_table(observed, imply, optimizer, args...; kwargs...) - return start_parameter_table(ram_matrices(imply); kwargs...) -end - -function start_parameter_table( - ram_matrices::RAMMatrices; - parameter_table::ParameterTable, - kwargs..., -) - start_val = zeros(0) - - for identifier_ram in ram_matrices.parameters - found = false - for (i, identifier_table) in enumerate(parameter_table.identifier) - if identifier_ram == identifier_table - push!(start_val, parameter_table.start[i]) - found = true - break - end - end - if !found - throw( - ErrorException( - "At least one parameter could not be found in the parameter table.", - ), - ) - end - end - - return start_val -end diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl index 4c4645256..ad5148e3f 100644 --- a/src/additional_functions/start_val/start_simple.jl +++ b/src/additional_functions/start_val/start_simple.jl @@ -10,24 +10,18 @@ start_covariances_obs_lat = 0.0, start_means = 0.0, kwargs...) - + Return a vector of simple starting values. """ function start_simple end # Single Models ---------------------------------------------------------------------------- function start_simple(model::AbstractSemSingle; kwargs...) - return start_simple( - model.observed, - model.imply, - model.optimizer, - model.loss.functions..., - kwargs..., - ) + return start_simple(model.observed, model.implied, model.loss.functions...; kwargs...) end -function start_simple(observed, imply, optimizer, args...; kwargs...) - return start_simple(imply.ram_matrices; kwargs...) +function start_simple(observed, implied, args...; kwargs...) + return start_simple(implied.ram_matrices; kwargs...) end # Ensemble Models -------------------------------------------------------------------------- @@ -62,22 +56,24 @@ function start_simple( start_means = 0.0, kwargs..., ) - A_ind, S_ind, F_ind, M_ind, parameters = ram_matrices.A_ind, - ram_matrices.S_ind, - ram_matrices.F_ind, - ram_matrices.M_ind, - ram_matrices.parameters + A, S, F_ind, M, n_par = ram_matrices.A, + ram_matrices.S, + observed_var_indices(ram_matrices), + ram_matrices.M, + nparams(ram_matrices) - n_par = length(parameters) start_val = zeros(n_par) - n_var, n_nod = ram_matrices.size_F + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) - C_indices = CartesianIndices((n_nod, n_nod)) + C_indices = CartesianIndices((n_var, n_var)) for i in 1:n_par - if length(S_ind[i]) != 0 + Si_ind = param_occurences(S, i) + Ai_ind = param_occurences(A, i) + if length(Si_ind) != 0 # use the first occurence of the parameter to determine starting value - c_ind = C_indices[S_ind[i][1]] + c_ind = C_indices[Si_ind[1]] if c_ind[1] == c_ind[2] if c_ind[1] ∈ F_ind start_val[i] = start_variances_observed @@ -95,14 +91,14 @@ function start_simple( start_val[i] = start_covariances_obs_lat end end - elseif length(A_ind[i]) != 0 - c_ind = C_indices[A_ind[i][1]] + elseif length(Ai_ind) != 0 + c_ind = C_indices[Ai_ind[1]] if (c_ind[1] ∈ F_ind) & !(c_ind[2] ∈ F_ind) start_val[i] = start_loadings else start_val[i] = start_regressions end - elseif !isnothing(M_ind) && (length(M_ind[i]) != 0) + elseif !isnothing(M) && (length(param_occurences(M, i)) != 0) start_val[i] = start_means end end diff --git a/src/additional_functions/start_val/start_val.jl b/src/additional_functions/start_val/start_val.jl deleted file mode 100644 index 8b6402efa..000000000 --- a/src/additional_functions/start_val/start_val.jl +++ /dev/null @@ -1,26 +0,0 @@ -""" - start_val(model) - -Return a vector of starting values. -Defaults are FABIN 3 starting values for single models and simple starting values for -ensemble models. -""" -function start_val end -# Single Models ---------------------------------------------------------------------------- - -# splice model and loss functions -start_val(model::AbstractSemSingle; kwargs...) = start_val( - model, - model.observed, - model.imply, - model.optimizer, - model.loss.functions...; - kwargs..., -) - -# Fabin 3 starting values for RAM(Symbolic) -start_val(model, observed, imply, optimizer, args...; kwargs...) = - start_fabin3(model; kwargs...) - -# Ensemble Models -------------------------------------------------------------------------- -start_val(model::SemEnsemble; kwargs...) = start_simple(model; kwargs...) diff --git a/src/diff/NLopt.jl b/src/diff/NLopt.jl deleted file mode 100644 index 12fcd7e0f..000000000 --- a/src/diff/NLopt.jl +++ /dev/null @@ -1,115 +0,0 @@ -############################################################################################ -### Types -############################################################################################ -""" -Connects to `NLopt.jl` as the optimization backend. - -# Constructor - - SemOptimizerNLopt(; - algorithm = :LD_LBFGS, - options = Dict{Symbol, Any}(), - local_algorithm = nothing, - local_options = Dict{Symbol, Any}(), - equality_constraints = Vector{NLoptConstraint}(), - inequality_constraints = Vector{NLoptConstraint}(), - kwargs...) - -# Arguments -- `algorithm`: optimization algorithm. -- `options::Dict{Symbol, Any}`: options for the optimization algorithm -- `local_algorithm`: local optimization algorithm -- `local_options::Dict{Symbol, Any}`: options for the local optimization algorithm -- `equality_constraints::Vector{NLoptConstraint}`: vector of equality constraints -- `inequality_constraints::Vector{NLoptConstraint}`: vector of inequality constraints - -# Example -```julia -my_optimizer = SemOptimizerNLopt() - -# constrained optimization with augmented lagrangian -my_constrained_optimizer = SemOptimizerNLopt(; - algorithm = :AUGLAG, - local_algorithm = :LD_LBFGS, - local_options = Dict(:ftol_rel => 1e-6), - inequality_constraints = NLoptConstraint(;f = my_constraint, tol = 0.0), -) -``` - -# Usage -All algorithms and options from the NLopt library are available, for more information see -the NLopt.jl package and the NLopt online documentation. -For information on how to use inequality and equality constraints, -see [Constrained optimization](@ref) in our online documentation. - -# Extended help - -## Interfaces -- `algorithm(::SemOptimizerNLopt)` -- `local_algorithm(::SemOptimizerNLopt)` -- `options(::SemOptimizerNLopt)` -- `local_options(::SemOptimizerNLopt)` -- `equality_constraints(::SemOptimizerNLopt)` -- `inequality_constraints(::SemOptimizerNLopt)` - -## Implementation - -Subtype of `SemOptimizer`. -""" -struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer - algorithm::A - local_algorithm::A2 - options::B - local_options::B2 - equality_constraints::C - inequality_constraints::C -end - -Base.@kwdef mutable struct NLoptConstraint - f::Any - tol = 0.0 -end - -############################################################################################ -### Constructor -############################################################################################ - -function SemOptimizerNLopt(; - algorithm = :LD_LBFGS, - local_algorithm = nothing, - options = Dict{Symbol, Any}(), - local_options = Dict{Symbol, Any}(), - equality_constraints = Vector{NLoptConstraint}(), - inequality_constraints = Vector{NLoptConstraint}(), - kwargs..., -) - applicable(iterate, equality_constraints) || - (equality_constraints = [equality_constraints]) - applicable(iterate, inequality_constraints) || - (inequality_constraints = [inequality_constraints]) - return SemOptimizerNLopt( - algorithm, - local_algorithm, - options, - local_options, - equality_constraints, - inequality_constraints, - ) -end - -############################################################################################ -### Recommended methods -############################################################################################ - -update_observed(optimizer::SemOptimizerNLopt, observed::SemObserved; kwargs...) = optimizer - -############################################################################################ -### additional methods -############################################################################################ - -algorithm(optimizer::SemOptimizerNLopt) = optimizer.algorithm -local_algorithm(optimizer::SemOptimizerNLopt) = optimizer.local_algorithm -options(optimizer::SemOptimizerNLopt) = optimizer.options -local_options(optimizer::SemOptimizerNLopt) = optimizer.local_options -equality_constraints(optimizer::SemOptimizerNLopt) = optimizer.equality_constraints -inequality_constraints(optimizer::SemOptimizerNLopt) = optimizer.inequality_constraints diff --git a/src/diff/optim.jl b/src/diff/optim.jl deleted file mode 100644 index 4e4b04e9f..000000000 --- a/src/diff/optim.jl +++ /dev/null @@ -1,69 +0,0 @@ -############################################################################################ -### Types and Constructor -############################################################################################ -""" -Connects to `Optim.jl` as the optimization backend. - -# Constructor - - SemOptimizerOptim(; - algorithm = LBFGS(), - options = Optim.Options(;f_tol = 1e-10, x_tol = 1.5e-8), - kwargs...) - -# Arguments -- `algorithm`: optimization algorithm. -- `options::Optim.Options`: options for the optimization algorithm - -# Usage -All algorithms and options from the Optim.jl library are available, for more information see -the Optim.jl online documentation. - -# Examples -```julia -my_optimizer = SemOptimizerOptim() - -# hessian based optimization with backtracking linesearch and modified initial step size -using Optim, LineSearches - -my_newton_optimizer = SemOptimizerOptim( - algorithm = Newton( - ;linesearch = BackTracking(order=3), - alphaguess = InitialHagerZhang() - ) -) -``` - -# Extended help - -## Interfaces -- `algorithm(::SemOptimizerOptim)` -- `options(::SemOptimizerOptim)` - -## Implementation - -Subtype of `SemOptimizer`. -""" -mutable struct SemOptimizerOptim{A, B} <: SemOptimizer - algorithm::A - options::B -end - -SemOptimizerOptim(; - algorithm = LBFGS(), - options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8), - kwargs..., -) = SemOptimizerOptim(algorithm, options) - -############################################################################################ -### Recommended methods -############################################################################################ - -update_observed(optimizer::SemOptimizerOptim, observed::SemObserved; kwargs...) = optimizer - -############################################################################################ -### additional methods -############################################################################################ - -algorithm(optimizer::SemOptimizerOptim) = optimizer.algorithm -options(optimizer::SemOptimizerOptim) = optimizer.options diff --git a/src/frontend/StatsAPI.jl b/src/frontend/StatsAPI.jl new file mode 100644 index 000000000..edd677e34 --- /dev/null +++ b/src/frontend/StatsAPI.jl @@ -0,0 +1,78 @@ +""" + params!(out::AbstractVector, partable::ParameterTable, + col::Symbol = :estimate) + +Extract parameter values from the `col` column of `partable` +into the `out` vector. + +The `out` vector should be of `nparams(partable)` length. +The *i*-th element of the `out` vector will contain the +value of the *i*-th parameter from `params_labels(partable)`. + +Note that the function combines the duplicate occurences of the +same parameter in `partable` and will raise an error if the +values do not match. +""" +function params!( + out::AbstractVector, + partable::ParameterTable, + col::Symbol = :estimate, +) + (length(out) == nparams(partable)) || throw( + DimensionMismatch( + "The length of parameter values vector ($(length(out))) does not match the number of parameters ($(nparams(partable)))", + ), + ) + param_index = param_indices(partable) + params_col = partable.columns[col] + for (i, label) in enumerate(partable.columns[:label]) + (label == :const) && continue + param_ind = get(param_index, label, nothing) + @assert !isnothing(param_ind) "Parameter table contains unregistered parameter :$param at row #$i" + param = params_col[i] + if !isnan(out[param_ind]) + @assert isequal(out[param_ind], param) "Parameter :$label value at row #$i ($param) differs from the earlier encountered value ($(out[param_ind]))" + else + out[param_ind] = param + end + end + return out +end + +""" + params(x::ParameterTable, col::Symbol = :estimate) + +Extract parameter values from the `col` column of `partable`. + +Returns the values vector. The *i*-th element corresponds to +the value of *i*-th parameter from `params_label(partable)`. + +Note that the function combines the duplicate occurences of the +same parameter in `partable` and will raise an error if the +values do not match. +""" +params(partable::ParameterTable, col::Symbol = :estimate) = + params!(fill(NaN, nparams(partable)), partable, col) + +""" + coef(x::ParameterTable) + +For a `ParameterTable`, this function is synonymous to [`params`](@ref). +""" +coef(x::ParameterTable) = params(x) + +""" + coefnames(x::ParameterTable) + +Synonymous to [`param_labels`](@ref param_labels). +""" +coefnames(x::ParameterTable) = param_labels(x) + +""" + nobs(model::AbstractSem) -> Int + +Synonymous to [`nsamples`](@ref). +""" +nobs(model::AbstractSem) = nsamples(model) + +coeftable(model::AbstractSem; level::Real=0.95) = throw(ArgumentError("StructuralEquationModels does not support the `CoefTable` interface; see [`ParameterTable`](@ref) instead.")) \ No newline at end of file diff --git a/src/frontend/common.jl b/src/frontend/common.jl new file mode 100644 index 000000000..e89a6cf8b --- /dev/null +++ b/src/frontend/common.jl @@ -0,0 +1,64 @@ +# API methods supported by multiple SEM.jl types + +""" + params(semobj) -> Vector{Symbol} + +Return the vector of SEM model parameter identifiers. +""" +function params end + +""" + nparams(semobj) + +Return the number of parameters in a SEM model associated with `semobj`. + +See also [`params`](@ref). +""" +nparams(semobj) = length(param_labels(semobj)) + +""" + nvars(semobj) + +Return the number of variables in a SEM model associated with `semobj`. + +See also [`vars`](@ref). +""" +nvars(semobj) = length(vars(semobj)) + +""" + nobserved_vars(semobj) + +Return the number of observed variables in a SEM model associated with `semobj`. +""" +nobserved_vars(semobj) = length(observed_vars(semobj)) + +""" + nlatent_vars(semobj) + +Return the number of latent variables in a SEM model associated with `semobj`. +""" +nlatent_vars(semobj) = length(latent_vars(semobj)) + +""" + param_indices(semobj) + +Returns a dict of parameter names and their indices in `semobj`. + +# Examples +```julia +parind = param_indices(my_fitted_sem) +parind[:param_name] +``` + +See also [`params`](@ref). +""" +param_indices(semobj) = Dict(par => i for (i, par) in enumerate(param_labels(semobj))) + +""" + nsamples(semobj) + +Return the number of samples (observed data points). + +For ensemble models, return the sum over all submodels. +""" +function nsamples end diff --git a/src/frontend/fit/SemFit.jl b/src/frontend/fit/SemFit.jl index 97cd9c5a6..438da4da6 100644 --- a/src/frontend/fit/SemFit.jl +++ b/src/frontend/fit/SemFit.jl @@ -46,6 +46,10 @@ end # additional methods ############################################################################################ +param_labels(fit::SemFit) = param_labels(fit.model) +nparams(fit::SemFit) = nparams(fit.model) +nsamples(fit::SemFit) = nsamples(fit.model) + # access fields minimum(sem_fit::SemFit) = sem_fit.minimum solution(sem_fit::SemFit) = sem_fit.solution diff --git a/src/frontend/fit/fitmeasures/AIC.jl b/src/frontend/fit/fitmeasures/AIC.jl index 519f7beb7..f26f1f4dc 100644 --- a/src/frontend/fit/fitmeasures/AIC.jl +++ b/src/frontend/fit/fitmeasures/AIC.jl @@ -3,4 +3,4 @@ Return the akaike information criterion. """ -AIC(sem_fit::SemFit) = minus2ll(sem_fit) + 2n_par(sem_fit) +AIC(sem_fit::SemFit) = minus2ll(sem_fit) + 2nparams(sem_fit) diff --git a/src/frontend/fit/fitmeasures/BIC.jl b/src/frontend/fit/fitmeasures/BIC.jl index 56200f32b..20638f4e4 100644 --- a/src/frontend/fit/fitmeasures/BIC.jl +++ b/src/frontend/fit/fitmeasures/BIC.jl @@ -3,4 +3,4 @@ Return the bayesian information criterion. """ -BIC(sem_fit::SemFit) = minus2ll(sem_fit) + log(n_obs(sem_fit)) * n_par(sem_fit) +BIC(sem_fit::SemFit) = minus2ll(sem_fit) + log(nsamples(sem_fit)) * nparams(sem_fit) diff --git a/src/frontend/fit/fitmeasures/RMSEA.jl b/src/frontend/fit/fitmeasures/RMSEA.jl index 3b3eb384b..b9fff648e 100644 --- a/src/frontend/fit/fitmeasures/RMSEA.jl +++ b/src/frontend/fit/fitmeasures/RMSEA.jl @@ -6,13 +6,13 @@ Return the RMSEA. function RMSEA end RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = - RMSEA(df(sem_fit), χ²(sem_fit), n_obs(sem_fit)) + RMSEA(dof(sem_fit), χ²(sem_fit), nsamples(sem_fit)) RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) = - sqrt(length(sem_fit.model.sems)) * RMSEA(df(sem_fit), χ²(sem_fit), n_obs(sem_fit)) + sqrt(length(sem_fit.model.sems)) * RMSEA(dof(sem_fit), χ²(sem_fit), nsamples(sem_fit)) -function RMSEA(df, chi2, n_obs) - rmsea = (chi2 - df) / (n_obs * df) +function RMSEA(dof, chi2, nsamples) + rmsea = (chi2 - dof) / (nsamples * dof) rmsea > 0 ? nothing : rmsea = 0 return sqrt(rmsea) end diff --git a/src/frontend/fit/fitmeasures/chi2.jl b/src/frontend/fit/fitmeasures/chi2.jl index 51fe6f0cd..333783f95 100644 --- a/src/frontend/fit/fitmeasures/chi2.jl +++ b/src/frontend/fit/fitmeasures/chi2.jl @@ -13,21 +13,21 @@ function χ² end χ²(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = χ²( sem_fit, sem_fit.model.observed, - sem_fit.model.imply, - sem_fit.model.optimizer, + sem_fit.model.implied, sem_fit.model.loss.functions..., ) # RAM + SemML -χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - (n_obs(sem_fit) - 1) * (sem_fit.minimum - logdet(observed.obs_cov) - observed.n_man) +χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) = + (nsamples(sem_fit) - 1) * + (sem_fit.minimum - logdet(observed.obs_cov) - nobserved_vars(observed)) # bollen, p. 115, only correct for GLS weight matrix -χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = - (n_obs(sem_fit) - 1) * sem_fit.minimum +χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) = + (nsamples(sem_fit) - 1) * sem_fit.minimum # FIML -function χ²(sem_fit::SemFit, observed::SemObservedMissing, imp, optimizer, loss_ml::SemFIML) +function χ²(sem_fit::SemFit, observed::SemObservedMissing, imp, loss_ml::SemFIML) ll_H0 = minus2ll(sem_fit) ll_H1 = minus2ll(observed) chi2 = ll_H0 - ll_H1 @@ -45,7 +45,7 @@ end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemWLS} check_ensemble_length(model) check_lossfun_types(model, L) - return (sum(n_obs.(model.sems)) - 1) * sem_fit.minimum + return (nsamples(model) - 1) * sem_fit.minimum end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML} @@ -53,10 +53,10 @@ function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML check_lossfun_types(model, L) F_G = sem_fit.minimum F_G -= sum([ - w * (logdet(m.observed.obs_cov) + m.observed.n_man) for + w * (logdet(m.observed.obs_cov) + nobserved_vars(m.observed)) for (w, m) in zip(model.weights, model.sems) ]) - return (sum(n_obs.(model.sems)) - 1) * F_G + return (nsamples(model) - 1) * F_G end function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemFIML} diff --git a/src/frontend/fit/fitmeasures/df.jl b/src/frontend/fit/fitmeasures/df.jl deleted file mode 100644 index f546bb000..000000000 --- a/src/frontend/fit/fitmeasures/df.jl +++ /dev/null @@ -1,22 +0,0 @@ -""" - df(sem_fit::SemFit) - df(model::AbstractSem) - -Return the degrees of freedom. -""" -function df end - -df(sem_fit::SemFit) = df(sem_fit.model) - -df(model::AbstractSem) = n_dp(model) - n_par(model) - -function n_dp(model::AbstractSemSingle) - nman = n_man(model) - ndp = 0.5(nman^2 + nman) - if !isnothing(model.imply.μ) - ndp += n_man(model) - end - return ndp -end - -n_dp(model::SemEnsemble) = sum(n_dp.(model.sems)) diff --git a/src/frontend/fit/fitmeasures/dof.jl b/src/frontend/fit/fitmeasures/dof.jl new file mode 100644 index 000000000..3df49d89d --- /dev/null +++ b/src/frontend/fit/fitmeasures/dof.jl @@ -0,0 +1,22 @@ +""" + dof(sem_fit::SemFit) + dof(model::AbstractSem) + +Return the degrees of freedom. +""" +function dof end + +dof(sem_fit::SemFit) = dof(sem_fit.model) + +dof(model::AbstractSem) = n_dp(model) - nparams(model) + +function n_dp(model::AbstractSemSingle) + nvars = nobserved_vars(model) + ndp = 0.5(nvars^2 + nvars) + if !isnothing(model.implied.μ) + ndp += nvars + end + return ndp +end + +n_dp(model::SemEnsemble) = sum(n_dp.(model.sems)) diff --git a/src/frontend/fit/fitmeasures/fit_measures.jl b/src/frontend/fit/fitmeasures/fit_measures.jl index e3f85a0f2..2fc4dfba0 100644 --- a/src/frontend/fit/fitmeasures/fit_measures.jl +++ b/src/frontend/fit/fitmeasures/fit_measures.jl @@ -1,5 +1,5 @@ fit_measures(sem_fit) = - fit_measures(sem_fit, n_par, df, AIC, BIC, RMSEA, χ², p_value, minus2ll) + fit_measures(sem_fit, nparams, dof, AIC, BIC, RMSEA, χ², p_value, minus2ll) function fit_measures(sem_fit, args...) measures = Dict{Symbol, Union{Float64, Missing}}() diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index c984555b3..2cb87d79c 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -15,99 +15,49 @@ minus2ll( ) = minus2ll( sem_fit, sem_fit.model.observed, - sem_fit.model.imply, - sem_fit.model.optimizer, + sem_fit.model.implied, sem_fit.model.loss.functions..., ) -minus2ll(sem_fit::SemFit, obs, imp, optimizer, args...) = - minus2ll(sem_fit.minimum, obs, imp, optimizer, args...) +minus2ll(sem_fit::SemFit, obs, imp, args...) = minus2ll(sem_fit.minimum, obs, imp, args...) # SemML ------------------------------------------------------------------------------------ -minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemML) = - n_obs(obs) * (minimum + log(2π) * n_man(obs)) +minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) = + nsamples(obs) * (minimum + log(2π) * nobserved_vars(obs)) # WLS -------------------------------------------------------------------------------------- -minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, optimizer, loss_ml::SemWLS) = - missing +minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) = missing # compute likelihood for missing data - H0 ------------------------------------------------- # -2ll = (∑ log(2π)*(nᵢ + mᵢ)) + F*n -function minus2ll( - minimum::Number, - observed, - imp::Union{RAM, RAMSymbolic}, - optimizer, - loss_ml::SemFIML, -) - F = minimum - F *= n_obs(observed) - F += sum(log(2π) * observed.pattern_n_obs .* observed.pattern_nvar_obs) +function minus2ll(minimum::Number, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemFIML) + F = minimum * nsamples(observed) + F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns) return F end # compute likelihood for missing data - H1 ------------------------------------------------- # -2ll = ∑ log(2π)*(nᵢ + mᵢ) + ln(Σᵢ) + (mᵢ - μᵢ)ᵀ Σᵢ⁻¹ (mᵢ - μᵢ)) + tr(SᵢΣᵢ) function minus2ll(observed::SemObservedMissing) - if observed.em_model.fitted - minus2ll( - observed.em_model.μ, - observed.em_model.Σ, - observed.n_obs, - observed.rows, - observed.patterns, - observed.obs_mean, - observed.obs_cov, - observed.pattern_n_obs, - observed.pattern_nvar_obs, - ) - else - em_mvn(observed) - minus2ll( - observed.em_model.μ, - observed.em_model.Σ, - observed.n_obs, - observed.rows, - observed.patterns, - observed.obs_mean, - observed.obs_cov, - observed.pattern_n_obs, - observed.pattern_nvar_obs, - ) - end -end - -function minus2ll( - μ, - Σ, - N, - rows, - patterns, - obs_mean, - obs_cov, - pattern_n_obs, - pattern_nvar_obs, -) - F = 0.0 + # fit EM-based mean and cov if not yet fitted + # FIXME EM could be very computationally expensive + observed.em_model.fitted || em_mvn(observed) - for i in 1:length(rows) - nᵢ = pattern_n_obs[i] - # missing pattern - pattern = patterns[i] - # observed data - Sᵢ = obs_cov[i] + Σ = observed.em_model.Σ + μ = observed.em_model.μ + F = sum(observed.patterns) do pat # implied covariance/mean - Σᵢ = Σ[pattern, pattern] - ld = logdet(Σᵢ) - Σᵢ⁻¹ = inv(cholesky(Σᵢ)) - meandiffᵢ = obs_mean[i] - μ[pattern] + Σᵢ = Σ[pat.measured_mask, pat.measured_mask] + Σᵢ_chol = cholesky!(Σᵢ) + ld = logdet(Σᵢ_chol) + Σᵢ⁻¹ = LinearAlgebra.inv!(Σᵢ_chol) + meandiffᵢ = pat.measured_mean - μ[pat.measured_mask] - F += F_one_pattern(meandiffᵢ, Σᵢ⁻¹, Sᵢ, ld, nᵢ) + F_one_pattern(meandiffᵢ, Σᵢ⁻¹, pat.measured_cov, ld, nsamples(pat)) end - F += sum(log(2π) * pattern_n_obs .* pattern_nvar_obs) - #F *= N + F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns) return F end @@ -117,7 +67,7 @@ end ############################################################################################ minus2ll(minimum, model::AbstractSemSingle) = - minus2ll(minimum, model.observed, model.imply, model.optimizer, model.loss.functions...) + minus2ll(minimum, model.observed, model.implied, model.loss.functions...) function minus2ll( sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}, diff --git a/src/frontend/fit/fitmeasures/n_man.jl b/src/frontend/fit/fitmeasures/n_man.jl deleted file mode 100644 index 45a7d99de..000000000 --- a/src/frontend/fit/fitmeasures/n_man.jl +++ /dev/null @@ -1,11 +0,0 @@ -""" - n_man(sem_fit::SemFit) - n_man(model::AbstractSemSingle) - -Return the number of manifest variables. -""" -function n_man end - -n_man(sem_fit::SemFit) = n_man(sem_fit.model) - -n_man(model::AbstractSemSingle) = n_man(model.observed) diff --git a/src/frontend/fit/fitmeasures/n_obs.jl b/src/frontend/fit/fitmeasures/n_obs.jl deleted file mode 100644 index cd4bdca30..000000000 --- a/src/frontend/fit/fitmeasures/n_obs.jl +++ /dev/null @@ -1,16 +0,0 @@ -""" - n_obs(sem_fit::SemFit) - n_obs(model::AbstractSemSingle) - n_obs(model::SemEnsemble) - -Return the number of observed data points. - -For ensemble models, return the sum over all submodels. -""" -function n_obs end - -n_obs(sem_fit::SemFit) = n_obs(sem_fit.model) - -n_obs(model::AbstractSemSingle) = n_obs(model.observed) - -n_obs(model::SemEnsemble) = sum(n_obs, model.sems) diff --git a/src/frontend/fit/fitmeasures/n_par.jl b/src/frontend/fit/fitmeasures/n_par.jl deleted file mode 100644 index 9cb2d3479..000000000 --- a/src/frontend/fit/fitmeasures/n_par.jl +++ /dev/null @@ -1,20 +0,0 @@ -############################################################################################ -### get number of parameters -############################################################################################ -""" - n_par(sem_fit::SemFit) - n_par(model::AbstractSemSingle) - n_par(model::SemEnsemble) - n_par(identifier::Dict) - -Return the number of parameters. -""" -function n_par end - -n_par(fit::SemFit) = n_par(fit.model) - -n_par(model::AbstractSemSingle) = n_par(model.imply) - -n_par(model::SemEnsemble) = n_par(model.identifier) - -n_par(identifier::Dict) = length(identifier) diff --git a/src/frontend/fit/fitmeasures/p.jl b/src/frontend/fit/fitmeasures/p.jl index 3d4275f95..8c69d5ec2 100644 --- a/src/frontend/fit/fitmeasures/p.jl +++ b/src/frontend/fit/fitmeasures/p.jl @@ -3,4 +3,4 @@ Return the p value computed from the χ² test statistic. """ -p_value(sem_fit::SemFit) = 1 - cdf(Chisq(df(sem_fit)), χ²(sem_fit)) +p_value(sem_fit::SemFit) = 1 - cdf(Chisq(dof(sem_fit)), χ²(sem_fit)) diff --git a/src/frontend/fit/standard_errors/bootstrap.jl b/src/frontend/fit/standard_errors/bootstrap.jl index 814f46e59..4589dc020 100644 --- a/src/frontend/fit/standard_errors/bootstrap.jl +++ b/src/frontend/fit/standard_errors/bootstrap.jl @@ -1,5 +1,5 @@ """ - se_bootstrap(semfit::SemFit; n_boot = 3000, data = nothing, kwargs...) + se_bootstrap(sem_fit::SemFit; n_boot = 3000, data = nothing, kwargs...) Return boorstrap standard errors. Only works for single models. @@ -7,7 +7,7 @@ Only works for single models. # Arguments - `n_boot`: number of boostrap samples - `data`: data to sample from. Only needed if different than the data from `sem_fit` -- `kwargs...`: passed down to `swap_observed` +- `kwargs...`: passed down to `replace_observed` """ function se_bootstrap( semfit::SemFit; @@ -25,7 +25,7 @@ function se_bootstrap( end if isnothing(data) - data = get_data(observed(model(semfit))) + data = samples(observed(model(semfit))) end data = prepare_data_bootstrap(data) @@ -42,7 +42,7 @@ function se_bootstrap( for _ in 1:n_boot sample_data = bootstrap_sample(data) - new_model = swap_observed( + new_model = replace_observed( model(semfit); data = sample_data, specification = specification, @@ -52,7 +52,7 @@ function se_bootstrap( new_solution .= 0.0 try - new_solution = solution(sem_fit(new_model; start_val = start)) + new_solution = solution(fit(new_model; start_val = start)) catch n_failed += 1 end diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl index 396d3b98c..6ae53407f 100644 --- a/src/frontend/fit/standard_errors/hessian.jl +++ b/src/frontend/fit/standard_errors/hessian.jl @@ -1,58 +1,54 @@ """ - se_hessian(semfit::SemFit; hessian = :finitediff) + se_hessian(fit::SemFit; method = :finitediff) -Return hessian based standard errors. +Return hessian-based standard errors. # Arguments -- `hessian`: how to compute the hessian. Options are +- `method`: how to compute the hessian. Options are - `:analytic`: (only if an analytic hessian for the model can be computed) - `:finitediff`: for finite difference approximation """ -function se_hessian(sem_fit::SemFit; hessian = :finitediff) - c = H_scaling(sem_fit.model) - - if hessian == :analytic - par = solution(sem_fit) - H = zeros(eltype(par), length(par), length(par)) - hessian!(H, sem_fit.model, sem_fit.solution) - elseif hessian == :finitediff - H = FiniteDiff.finite_difference_hessian( - Base.Fix1(objective!, sem_fit.model), - sem_fit.solution, - ) - elseif hessian == :optimizer - throw( - ArgumentError( - "standard errors from the optimizer hessian are not implemented yet", - ), - ) - elseif hessian == :expected - throw( - ArgumentError( - "standard errors based on the expected hessian are not implemented yet", - ), +function se_hessian(fit::SemFit; method = :finitediff) + c = H_scaling(fit.model) + params = solution(fit) + H = similar(params, (length(params), length(params))) + + if method == :analytic + evaluate!(nothing, nothing, H, fit.model, params) + elseif method == :finitediff + FiniteDiff.finite_difference_hessian!( + H, + p -> evaluate!(zero(eltype(H)), nothing, nothing, fit.model, p), + params, ) + elseif method == :optimizer + error("Standard errors from the optimizer hessian are not implemented yet") + elseif method == :expected + error("Standard errors based on the expected hessian are not implemented yet") else - throw(ArgumentError("I don't know how to compute `$hessian` standard-errors")) + throw(ArgumentError("Unsupported hessian calculation method :$method")) end - invH = c * inv(H) - se = sqrt.(diag(invH)) - - return se + H_chol = cholesky!(Symmetric(H)) + H_inv = LinearAlgebra.inv!(H_chol) + return [sqrt(c * H_inv[i]) for i in diagind(H_inv)] end # Addition functions ------------------------------------------------------------- -H_scaling(model::AbstractSemSingle) = - H_scaling(model, model.observed, model.imply, model.optimizer, model.loss.functions...) +function H_scaling(model::AbstractSemSingle) + if length(model.loss.functions) > 1 + @warn "Hessian scaling for multiple loss functions is not implemented yet" + end + return H_scaling(model.loss.functions[1], model) +end -H_scaling(model, obs, imp, optimizer, lossfun::SemML) = 2 / (n_obs(model) - 1) +H_scaling(lossfun::SemML, model::AbstractSemSingle) = 2 / (nsamples(model) - 1) -function H_scaling(model, obs, imp, optimizer, lossfun::SemWLS) +function H_scaling(lossfun::SemWLS, model::AbstractSemSingle) @warn "Standard errors for WLS are only correct if a GLS weight matrix (the default) is used." - return 2 / (n_obs(model) - 1) + return 2 / (nsamples(model) - 1) end -H_scaling(model, obs, imp, optimizer, lossfun::SemFIML) = 2 / n_obs(model) +H_scaling(lossfun::SemFIML, model::AbstractSemSingle) = 2 / nsamples(model) -H_scaling(model::SemEnsemble) = 2 / n_obs(model) +H_scaling(model::SemEnsemble) = 2 / nsamples(model) diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl index 4cda902d7..8ee134a9c 100644 --- a/src/frontend/fit/summary.jl +++ b/src/frontend/fit/summary.jl @@ -1,9 +1,4 @@ -function sem_summary( - sem_fit::SemFit; - show_fitmeasures = false, - color = :light_cyan, - digits = 2, -) +function details(sem_fit::SemFit; show_fitmeasures = false, color = :light_cyan, digits = 2) print("\n") println("Fitted Structural Equation Model") print("\n") @@ -16,8 +11,8 @@ function sem_summary( println("Convergence: $(convergence(sem_fit))") println("No. iterations/evaluations: $(n_iterations(sem_fit))") print("\n") - println("Number of parameters: $(n_par(sem_fit))") - println("Number of observations: $(n_obs(sem_fit))") + println("Number of parameters: $(nparams(sem_fit))") + println("Number of data samples: $(nsamples(sem_fit))") print("\n") printstyled( "----------------------------------- Model ----------------------------------- \n"; @@ -45,12 +40,13 @@ function sem_summary( print("\n") end -function sem_summary( +function details( partable::ParameterTable; color = :light_cyan, secondary_color = :light_yellow, digits = 2, show_variables = true, + show_columns = nothing, ) if show_variables print("\n") @@ -60,19 +56,18 @@ function sem_summary( ) print("\n") printstyled("Latent variables: "; color = color) - for var in partable.variables[:latent_vars] + for var in partable.latent_vars print("$var ") end print("\n") printstyled("Observed variables: "; color = color) - for var in partable.variables[:observed_vars] + for var in partable.observed_vars print("$var ") end print("\n") - if haskey(partable.variables, :sorted_vars) && - (length(partable.variables[:sorted_vars]) > 0) + if length(partable.sorted_vars) > 0 printstyled("Sorted variables: "; color = color) - for var in partable.variables[:sorted_vars] + for var in partable.sorted_vars print("$var ") end print("\n") @@ -87,20 +82,25 @@ function sem_summary( print("\n") columns = keys(partable.columns) + show_columns = isnothing(show_columns) ? nothing : intersect(show_columns, columns) printstyled("Loadings: \n"; color = color) print("\n") - sorted_columns = [:to, :estimate, :identifier, :value_fixed, :start] - loading_columns = sort_partially(sorted_columns, columns) - header_cols = copy(loading_columns) - replace!(header_cols, :parameter_type => :type) + if isnothing(show_columns) + sorted_columns = [:to, :estimate, :param, :value_fixed, :start] + loading_columns = sort_partially(sorted_columns, columns) + header_cols = copy(loading_columns) + else + loading_columns = copy(show_columns) + header_cols = copy(loading_columns) + end - for var in partable.variables[:latent_vars] + for var in partable.latent_vars indicator_indices = findall( - (partable.columns[:from] .== var) .& - (partable.columns[:parameter_type] .== :→) .& - (partable.columns[:to] .∈ [partable.variables[:observed_vars]]), + r -> + (r.from == var) && (r.relation == :→) && (r.to ∈ partable.observed_vars), + partable, ) loading_array = reduce( hcat, @@ -116,6 +116,7 @@ function sem_summary( header = header_cols, tf = PrettyTables.tf_borderless, alignment = :l, + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) print("\n") end @@ -123,33 +124,29 @@ function sem_summary( printstyled("Directed Effects: \n"; color = color) regression_indices = findall( - (partable.columns[:parameter_type] .== :→) .& ( - ( - (partable.columns[:to] .∈ [partable.variables[:observed_vars]]) .& - (partable.columns[:from] .∈ [partable.variables[:observed_vars]]) - ) .| - ( - (partable.columns[:to] .∈ [partable.variables[:latent_vars]]) .& - (partable.columns[:from] .∈ [partable.variables[:observed_vars]]) - ) .| - ( - (partable.columns[:to] .∈ [partable.variables[:latent_vars]]) .& - (partable.columns[:from] .∈ [partable.variables[:latent_vars]]) - ) - ), + r -> + (r.relation == :→) && ( + ((r.to ∈ partable.observed_vars) && (r.from ∈ partable.observed_vars)) || + ((r.to ∈ partable.latent_vars) && (r.from ∈ partable.observed_vars)) || + ((r.to ∈ partable.latent_vars) && (r.from ∈ partable.latent_vars)) + ), + partable, ) - sorted_columns = - [:from, :parameter_type, :to, :estimate, :identifier, :value_fixed, :start] - regression_columns = sort_partially(sorted_columns, columns) + if isnothing(show_columns) + sorted_columns = [:from, :relation, :to, :estimate, :param, :value_fixed, :start] + regression_columns = sort_partially(sorted_columns, columns) + else + regression_columns = copy(show_columns) + end regression_array = reduce( hcat, check_round(partable.columns[c][regression_indices]; digits = digits) for c in regression_columns ) - regression_columns[2] = Symbol("") - replace!(regression_columns, :parameter_type => :type) + regression_columns[2] = + regression_columns[2] == :relation ? Symbol("") : regression_columns[2] print("\n") pretty_table( @@ -157,91 +154,91 @@ function sem_summary( header = regression_columns, tf = PrettyTables.tf_borderless, alignment = :l, + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) print("\n") printstyled("Variances: \n"; color = color) - variance_indices = findall( - (partable.columns[:parameter_type] .== :↔) .& - (partable.columns[:to] .== partable.columns[:from]), - ) + var_indices = findall(r -> r.relation == :↔ && r.to == r.from, partable) - sorted_columns = - [:from, :parameter_type, :to, :estimate, :identifier, :value_fixed, :start] - variance_columns = sort_partially(sorted_columns, columns) + if isnothing(show_columns) + sorted_columns = [:from, :relation, :to, :estimate, :param, :value_fixed, :start] + var_columns = sort_partially(sorted_columns, columns) + else + var_columns = copy(show_columns) + end - variance_array = reduce( + var_array = reduce( hcat, - check_round(partable.columns[c][variance_indices]; digits = digits) for - c in variance_columns + check_round(partable.columns[c][var_indices]; digits) for c in var_columns ) - variance_columns[2] = Symbol("") - replace!(variance_columns, :parameter_type => :type) + var_columns[2] = var_columns[2] == :relation ? Symbol("") : var_columns[2] print("\n") pretty_table( - variance_array; - header = variance_columns, + var_array; + header = var_columns, tf = PrettyTables.tf_borderless, alignment = :l, + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) print("\n") printstyled("Covariances: \n"; color = color) - variance_indices = findall( - (partable.columns[:parameter_type] .== :↔) .& - (partable.columns[:to] .!= partable.columns[:from]), - ) + covar_indices = findall(r -> r.relation == :↔ && r.to != r.from, partable) - sorted_columns = - [:from, :parameter_type, :to, :estimate, :identifier, :value_fixed, :start] - variance_columns = sort_partially(sorted_columns, columns) + if isnothing(show_columns) + covar_columns = sort_partially(sorted_columns, columns) + else + covar_columns = copy(show_columns) + end - variance_array = reduce( + covar_array = reduce( hcat, - check_round(partable.columns[c][variance_indices]; digits = digits) for - c in variance_columns + check_round(partable.columns[c][covar_indices]; digits = digits) for + c in covar_columns ) - variance_columns[2] = Symbol("") - replace!(variance_columns, :parameter_type => :type) + covar_columns[2] = covar_columns[2] == :relation ? Symbol("") : covar_columns[2] print("\n") pretty_table( - variance_array; - header = variance_columns, + covar_array; + header = covar_columns, tf = PrettyTables.tf_borderless, alignment = :l, + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) print("\n") - mean_indices = findall( - (partable.columns[:parameter_type] .== :→) .& - (partable.columns[:from] .== Symbol("1")), - ) + mean_indices = findall(r -> (r.relation == :→) && (r.from == Symbol(1)), partable) if length(mean_indices) > 0 printstyled("Means: \n"; color = color) - sorted_columns = - [:from, :parameter_type, :to, :estimate, :identifier, :value_fixed, :start] - variance_columns = sort_partially(sorted_columns, columns) + if isnothing(show_columns) + sorted_columns = + [:from, :relation, :to, :estimate, :param, :value_fixed, :start] + mean_columns = sort_partially(sorted_columns, columns) + else + mean_columns = copy(show_columns) + end - variance_array = reduce( + mean_array = reduce( hcat, check_round(partable.columns[c][mean_indices]; digits = digits) for - c in variance_columns + c in mean_columns ) - variance_columns[2] = Symbol("") - replace!(variance_columns, :parameter_type => :type) + mean_columns[2] = mean_columns[2] == :relation ? Symbol("") : mean_columns[2] print("\n") pretty_table( - variance_array; - header = variance_columns, + mean_array; + header = mean_columns, tf = PrettyTables.tf_borderless, alignment = :l, + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) print("\n") end @@ -250,12 +247,13 @@ function sem_summary( end -function sem_summary( +function details( partable::EnsembleParameterTable; color = :light_cyan, secondary_color = :light_yellow, digits = 2, show_variables = true, + show_columns = nothing, ) if show_variables print("\n") @@ -266,19 +264,18 @@ function sem_summary( print("\n") let partable = partable.tables[[keys(partable.tables)...][1]] printstyled("Latent variables: "; color = color) - for var in partable.variables[:latent_vars] + for var in partable.latent_vars print("$var ") end print("\n") printstyled("Observed variables: "; color = color) - for var in partable.variables[:observed_vars] + for var in partable.observed_vars print("$var ") end print("\n") - if haskey(partable.variables, :sorted_vars) && - (length(partable.variables[:sorted_vars]) > 0) + if length(partable.sorted_vars) > 0 printstyled("Sorted variables: "; color = color) - for var in partable.variables[:sorted_vars] + for var in partable.sorted_vars print("$var ") end print("\n") @@ -291,12 +288,13 @@ function sem_summary( print("\n") printstyled(rpad(" Group: $k", 78), reverse = true) print("\n") - sem_summary( + details( partable.tables[k]; color = color, secondary_color = secondary_color, digits = digits, show_variables = false, + show_columns = show_columns, ) end @@ -323,10 +321,18 @@ function sort_partially(sorted, to_sort) return out end +function Base.findall(fun::Function, partable::ParameterTable) + rows = Int[] + for (i, r) in enumerate(partable) + fun(r) ? push!(rows, i) : nothing + end + return rows +end + """ - (1) sem_summary(sem_fit::SemFit; show_fitmeasures = false) + (1) details(sem_fit::SemFit; show_fitmeasures = false) - (2) sem_summary(partable::AbstractParameterTable) + (2) details(partable::AbstractParameterTable; ...) Print information about (1) a fitted SEM or (2) a parameter table to stdout. @@ -336,5 +342,6 @@ Print information about (1) a fitted SEM or (2) a parameter table to stdout. - `color = :light_cyan`: color of some parts of the printed output. Can be adjusted for readability. - `secondary_color = :light_yellow` - `show_variables = true` +- `show_columns = nothing`: columns names to include in the output e.g.`[:from, :to, :estimate]`) """ -function sem_summary end +function details end diff --git a/src/frontend/pretty_printing.jl b/src/frontend/pretty_printing.jl index 5b732c980..c1cd72c2f 100644 --- a/src/frontend/pretty_printing.jl +++ b/src/frontend/pretty_printing.jl @@ -25,7 +25,7 @@ function print_type(io::IO, struct_instance) end ############################################################## -# Loss Functions, Imply, +# Loss Functions, Implied, ############################################################## function Base.show(io::IO, struct_inst::SemLossFunction) @@ -33,7 +33,7 @@ function Base.show(io::IO, struct_inst::SemLossFunction) print_field_types(io, struct_inst) end -function Base.show(io::IO, struct_inst::SemImply) +function Base.show(io::IO, struct_inst::SemImplied) print_type_name(io, struct_inst) print_field_types(io, struct_inst) end diff --git a/src/frontend/specification/EnsembleParameterTable.jl b/src/frontend/specification/EnsembleParameterTable.jl index 79283953f..14169dd94 100644 --- a/src/frontend/specification/EnsembleParameterTable.jl +++ b/src/frontend/specification/EnsembleParameterTable.jl @@ -2,8 +2,9 @@ ### Types ############################################################################################ -mutable struct EnsembleParameterTable{C} <: AbstractParameterTable - tables::C +struct EnsembleParameterTable <: AbstractParameterTable + tables::Dict{Symbol, ParameterTable} + param_labels::Vector{Symbol} end ############################################################################################ @@ -11,41 +12,67 @@ end ############################################################################################ # constuct an empty table -function EnsembleParameterTable(::Nothing) - tables = Dict{Symbol, ParameterTable}() - return EnsembleParameterTable(tables) +EnsembleParameterTable(::Nothing; param_labels::Union{Nothing, Vector{Symbol}} = nothing) = + EnsembleParameterTable( + Dict{Symbol, ParameterTable}(), + isnothing(param_labels) ? Symbol[] : copy(param_labels), + ) + +# convert pairs to dict +EnsembleParameterTable(ps::Pair{K, V}...; param_labels = nothing) where {K, V} = + EnsembleParameterTable(Dict(ps...); param_labels = param_labels) + +# dictionary of SEM specifications +function EnsembleParameterTable( + spec_ensemble::AbstractDict{K, V}; + param_labels::Union{Nothing, Vector{Symbol}} = nothing, +) where {K, V <: SemSpecification} + param_labels = if isnothing(param_labels) + # collect all SEM parameters in ensemble if not specified + # and apply the set to all partables + unique(mapreduce(SEM.param_labels, vcat, values(spec_ensemble), init = Vector{Symbol}())) + else + copy(param_labels) + end + + # convert each model specification to ParameterTable + partables = Dict{Symbol, ParameterTable}( + Symbol(group) => convert(ParameterTable, spec; param_labels) for + (group, spec) in pairs(spec_ensemble) + ) + return EnsembleParameterTable(partables, param_labels) end ############################################################################################ ### Convert to other types ############################################################################################ -import Base.Dict - -function Dict(partable::EnsembleParameterTable) - return partable.tables +function Base.convert(::Type{Dict}, partable::EnsembleParameterTable) + return convert(Dict, partable.tables) end -#= function DataFrame( - partable::ParameterTable; - columns = nothing) - if isnothing(columns) columns = keys(partable.columns) end - out = DataFrame([key => partable.columns[key] for key in columns]) - return DataFrame(out) -end =# - -############################################################################################ -### get parameter table from RAMMatrices -############################################################################################ - -function EnsembleParameterTable(args...; groups) - partable = EnsembleParameterTable(nothing) +function Base.convert( + ::Type{Dict{K, RAMMatrices}}, + partables::EnsembleParameterTable; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) where {K} + isnothing(param_labels) || (param_labels = SEM.param_labels(partables)) + + return Dict{K, RAMMatrices}( + K(key) => RAMMatrices(partable; param_labels = param_labels) for + (key, partable) in pairs(partables.tables) + ) +end - for (group, ram_matrices) in zip(groups, args) - push!(partable.tables, group => ParameterTable(ram_matrices)) +function DataFrames.DataFrame( + partables::EnsembleParameterTable; + columns::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + mapreduce(vcat, pairs(partables.tables)) do (key, partable) + df = DataFrame(partable; columns = columns) + df[!, :group] .= key + return df end - - return partable end ############################################################################################ @@ -69,52 +96,58 @@ end ### Additional Methods ############################################################################################ -# Sorting ---------------------------------------------------------------------------------- - -# Sorting ---------------------------------------------------------------------------------- +# Variables Sorting ------------------------------------------------------------------------ -function sort!(ensemble_partable::EnsembleParameterTable) - for partable in values(ensemble_partable.tables) - sort!(partable) +function sort_vars!(partables::EnsembleParameterTable) + for partable in values(partables.tables) + sort_vars!(partable) end - return ensemble_partable + return partables end -function sort(partable::EnsembleParameterTable) - new_partable = deepcopy(partable) - sort!(new_partable) - return new_partable -end +sort_vars(partables::EnsembleParameterTable) = sort_vars!(deepcopy(partables)) # add a row -------------------------------------------------------------------------------- # do we really need this? -import Base.push! - -function push!(partable::EnsembleParameterTable, d::AbstractDict, group) +function Base.push!(partable::EnsembleParameterTable, d::AbstractDict, group) push!(partable.tables[group], d) end -push!(partable::EnsembleParameterTable, d::Nothing, group) = nothing - -# get group -------------------------------------------------------------------------------- - -get_group(partable::EnsembleParameterTable, group) = get_group(partable.tables, group) +Base.getindex(partable::EnsembleParameterTable, group) = partable.tables[group] ############################################################################################ ### Update Partable from Fitted Model ############################################################################################ -# update generic --------------------------------------------------------------------------- function update_partable!( - partable::EnsembleParameterTable, - model_identifier::AbstractDict, - vec, - column, + partables::EnsembleParameterTable, + column::Symbol, + params::AbstractDict{Symbol}, + default::Any = nothing, ) - for k in keys(partable.tables) - update_partable!(partable.tables[k], model_identifier, vec, column) + for partable in values(partables.tables) + update_partable!(partable, column, params, default) end - return partable + return partables +end + +function update_partable!( + partables::EnsembleParameterTable, + column::Symbol, + param_labels::AbstractVector{Symbol}, + values::AbstractVector, + default::Any = nothing, +) + return update_partable!(partables, column, Dict(zip(param_labels, values)), default) +end + +############################################################################################ +### Additional methods +############################################################################################ + +function Base.:(==)(p1::EnsembleParameterTable, p2::EnsembleParameterTable) + out = (p1.tables == p2.tables) && (p1.param_labels == p2.param_labels) + return out end diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 1910d666e..2af269372 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -1,57 +1,99 @@ -abstract type AbstractParameterTable end - ############################################################################################ ### Types ############################################################################################ -mutable struct ParameterTable{C, V} <: AbstractParameterTable +struct ParameterTable{C} <: AbstractParameterTable columns::C - variables::V + observed_vars::Vector{Symbol} + latent_vars::Vector{Symbol} + sorted_vars::Vector{Symbol} + param_labels::Vector{Symbol} end ############################################################################################ ### Constructors ############################################################################################ -# constuct an empty table -function ParameterTable(::Nothing) - columns = Dict{Symbol, Any}( - :from => Vector{Symbol}(), - :parameter_type => Vector{Symbol}(), - :to => Vector{Symbol}(), - :free => Vector{Bool}(), - :value_fixed => Vector{Float64}(), - :start => Vector{Float64}(), - :estimate => Vector{Float64}(), - :identifier => Vector{Symbol}(), - :start => Vector{Float64}(), - ) +# construct a dictionary with the default partable columns +# optionally pre-allocate data for nrows +empty_partable_columns(nrows::Integer = 0) = Dict{Symbol, Vector}( + :from => fill(Symbol(), nrows), + :relation => fill(Symbol(), nrows), + :to => fill(Symbol(), nrows), + :free => fill(true, nrows), + :value_fixed => fill(NaN, nrows), + :start => fill(NaN, nrows), + :estimate => fill(NaN, nrows), + :label => fill(Symbol(), nrows), +) - variables = Dict{Symbol, Any}( - :latent_vars => Vector{Symbol}(), - :observed_vars => Vector{Symbol}(), - :sorted_vars => Vector{Symbol}(), +# construct using the provided columns data or create an empty table +function ParameterTable( + columns::Dict{Symbol, Vector}; + observed_vars::Union{AbstractVector{Symbol}, Nothing} = nothing, + latent_vars::Union{AbstractVector{Symbol}, Nothing} = nothing, + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + param_labels = isnothing(param_labels) ? unique!(filter(!=(:const), columns[:label])) : copy(param_labels) + check_param_labels(param_labels, columns[:label]) + return ParameterTable( + columns, + !isnothing(observed_vars) ? copy(observed_vars) : Vector{Symbol}(), + !isnothing(latent_vars) ? copy(latent_vars) : Vector{Symbol}(), + Vector{Symbol}(), + param_labels, ) +end + +# new parameter table with different parameters order +function ParameterTable( + partable::ParameterTable; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + isnothing(param_labels) || check_param_labels(param_labels, partable.columns[:label]) - return ParameterTable(columns, variables) + return ParameterTable( + Dict(col => copy(values) for (col, values) in pairs(partable.columns)), + observed_vars = copy(observed_vars(partable)), + latent_vars = copy(latent_vars(partable)), + param_labels = param_labels, + ) end +vars(partable::ParameterTable) = + !isempty(partable.sorted_vars) ? partable.sorted_vars : + vcat(partable.latent_vars, partable.observed_vars) +observed_vars(partable::ParameterTable) = partable.observed_vars +latent_vars(partable::ParameterTable) = partable.latent_vars + +nvars(partable::ParameterTable) = + length(partable.latent_vars) + length(partable.observed_vars) + ############################################################################################ ### Convert to other types ############################################################################################ -import Base.Dict - -function Dict(partable::ParameterTable) +function Base.convert(::Type{Dict}, partable::ParameterTable) return partable.columns end -function DataFrame(partable::ParameterTable; columns = nothing) +function Base.convert( + ::Type{ParameterTable}, + partable::ParameterTable; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + return isnothing(param_labels) || partable.param_labels == param_labels ? partable : + ParameterTable(partable; param_labels) +end + +function DataFrames.DataFrame( + partable::ParameterTable; + columns::Union{AbstractVector{Symbol}, Nothing} = nothing, +) if isnothing(columns) columns = keys(partable.columns) end - out = DataFrame([key => partable.columns[key] for key in columns]) - return DataFrame(out) + return DataFrame([col => partable.columns[col] for col in columns]) end ############################################################################################ @@ -59,62 +101,69 @@ end ############################################################################################ function Base.show(io::IO, partable::ParameterTable) - relevant_columns = [ - :from, - :parameter_type, - :to, - :free, - :value_fixed, - :start, - :estimate, - :se, - :identifier, - ] - existing_columns = [haskey(partable.columns, key) for key in relevant_columns] + relevant_columns = + [:from, :relation, :to, :free, :value_fixed, :start, :estimate, :se, :label] + shown_columns = filter!( + col -> haskey(partable.columns, col) && length(partable.columns[col]) > 0, + relevant_columns, + ) - as_matrix = - hcat([partable.columns[key] for key in relevant_columns[existing_columns]]...) + as_matrix = mapreduce(col -> partable.columns[col], hcat, shown_columns) pretty_table( io, as_matrix, - header = ( - relevant_columns[existing_columns], - eltype.([partable.columns[key] for key in relevant_columns[existing_columns]]), - ), + header = (shown_columns, [eltype(partable.columns[col]) for col in shown_columns]), tf = PrettyTables.tf_compact, + # TODO switch to `missing` as non-specified values and suppress printing of `missing` instead + formatters = (v, i, j) -> isa(v, Number) && isnan(v) ? "" : v, ) - if haskey(partable.variables, :latent_vars) - print(io, "Latent Variables: $(partable.variables[:latent_vars]) \n") - end - if haskey(partable.variables, :observed_vars) - print(io, "Observed Variables: $(partable.variables[:observed_vars]) \n") - end + print(io, "Latent Variables: $(partable.latent_vars) \n") + print(io, "Observed Variables: $(partable.observed_vars) \n") end ############################################################################################ ### Additional Methods ############################################################################################ +# Equality -------------------------------------------------------------------------------- +function Base.:(==)(p1::ParameterTable, p2::ParameterTable) + out = + (p1.columns == p2.columns) && + (p1.observed_vars == p2.observed_vars) && + (p1.latent_vars == p2.latent_vars) && + (p1.sorted_vars == p2.sorted_vars) && + (p1.param_labels == p2.param_labels) + return out +end + # Iteration -------------------------------------------------------------------------------- +ParameterTableRow = @NamedTuple begin + from::Symbol + relation::Symbol + to::Symbol + free::Bool + value_fixed::Any + param::Symbol +end -Base.getindex(partable::ParameterTable, i::Int) = ( - partable.columns[:from][i], - partable.columns[:parameter_type][i], - partable.columns[:to][i], - partable.columns[:free][i], - partable.columns[:value_fixed][i], - partable.columns[:identifier][i], +Base.getindex(partable::ParameterTable, i::Integer) = ( + from = partable.columns[:from][i], + relation = partable.columns[:relation][i], + to = partable.columns[:to][i], + free = partable.columns[:free][i], + value_fixed = partable.columns[:value_fixed][i], + param = partable.columns[:label][i], ) -function Base.length(partable::ParameterTable) - len = missing - for key in keys(partable.columns) - len = length(partable.columns[key]) - break - end - return len -end +Base.length(partable::ParameterTable) = length(partable.columns[:label]) +Base.eachindex(partable::ParameterTable) = Base.OneTo(length(partable)) + +Base.eltype(::Type{<:ParameterTable}) = ParameterTableRow +Base.iterate(partable::ParameterTable, i::Integer = 1) = + i > length(partable) ? nothing : (partable[i], i + 1) + +nparams(partable::ParameterTable) = length(param_labels(partable)) # Sorting ---------------------------------------------------------------------------------- @@ -124,128 +173,185 @@ end Base.showerror(io::IO, e::CyclicModelError) = print(io, e.msg) -import Base.sort!, Base.sort +""" + sort_vars!(partable::ParameterTable) + sort_vars!(partables::EnsembleParameterTable) -function sort!(partable::ParameterTable) - variables = [partable.variables[:latent_vars]; partable.variables[:observed_vars]] +Sort variables in `partable` so that all independent variables are +before the dependent variables and store it in `partable.sorted_vars`. - is_regression = - (partable.columns[:parameter_type] .== :→) .& - (partable.columns[:from] .!= Symbol("1")) +If the relations between the variables are acyclic, sorting will +make the resulting `A` matrix in the *RAM* model lower triangular +and allow faster calculations. +""" +function sort_vars!(partable::ParameterTable) + vars = [ + partable.latent_vars + partable.observed_vars + ] - to = partable.columns[:to][is_regression] - from = partable.columns[:from][is_regression] + # regression edges (excluding intercept) + edges = [ + (from, to) for (rel, from, to) in zip( + partable.columns[:relation], + partable.columns[:from], + partable.columns[:to], + ) if (rel == :→) && (from != Symbol(1)) + ] + sort!(edges, by = last) # sort edges by target - sorted_variables = Vector{Symbol}() + sorted_vars = Vector{Symbol}() - sorted = false - while !sorted + while !isempty(vars) acyclic = false - for (i, variable) in enumerate(variables) - if !(variable ∈ to) - push!(sorted_variables, variable) - deleteat!(variables, i) - delete_edges = from .!= variable - to = to[delete_edges] - from = from[delete_edges] + for (i, var) in enumerate(vars) + # check if var has any incoming edge + eix = searchsortedfirst(edges, (var, var), by = last) + if !(eix <= length(edges) && last(edges[eix]) == var) + # var is source, no edges to it + push!(sorted_vars, var) + deleteat!(vars, i) + # remove var outgoing edges + filter!(e -> e[1] != var, edges) acyclic = true + break end end - if !acyclic + # if acyclic is false, all vars have incoming edge + acyclic || throw(CyclicModelError("your model is cyclic and therefore can not be ordered")) - end - acyclic = false - - if length(variables) == 0 - sorted = true - end end - push!(partable.variables, :sorted_vars => sorted_variables) + copyto!(resize!(partable.sorted_vars, length(sorted_vars)), sorted_vars) + @assert length(partable.sorted_vars) == nvars(partable) return partable end -function sort(partable::ParameterTable) - new_partable = deepcopy(partable) - sort!(new_partable) - return new_partable -end +""" + sort_vars(partable::ParameterTable) + sort_vars(partables::EnsembleParameterTable) -# add a row -------------------------------------------------------------------------------- +Sort variables in `partable` so that all independent variables are +before the dependent variables, and return a copy of `partable` +where the sorted variables are in `partable.sorted_vars`. -import Base.push! +See [sort_vars!](@ref) for in-place version. +""" +sort_vars(partable::ParameterTable) = sort_vars!(deepcopy(partable)) + +# add a row -------------------------------------------------------------------------------- -function push!(partable::ParameterTable, d::AbstractDict) - for key in keys(d) - push!(partable.columns[key], d[key]) +function Base.push!(partable::ParameterTable, d::Union{AbstractDict{Symbol}, NamedTuple}) + issetequal(keys(partable.columns), keys(d)) || throw( + ArgumentError( + "The new row needs to have the same keys as the columns of the parameter table.", + ), + ) + for (key, val) in pairs(d) + push!(partable.columns[key], val) end end -push!(partable::ParameterTable, d::Nothing) = nothing - ############################################################################################ ### Update Partable from Fitted Model ############################################################################################ # update generic --------------------------------------------------------------------------- - function update_partable!( partable::ParameterTable, - model_identifier::AbstractDict, - vec, - column, -) - new_col = Vector{eltype(vec)}(undef, length(partable)) - for (i, identifier) in enumerate(partable.columns[:identifier]) - if !(identifier == :const) - new_col[i] = vec[model_identifier[identifier]] - elseif identifier == :const - new_col[i] = zero(eltype(vec)) + column::Symbol, + params::AbstractDict{Symbol, T}, + default::Any = nothing, +) where {T} + coldata = get!(() -> Vector{T}(undef, length(partable)), partable.columns, column) + + isvec_def = (default isa AbstractVector) && (length(default) == length(partable)) + + for (i, par) in enumerate(partable.columns[:label]) + if par == :const + coldata[i] = !isnothing(default) ? (isvec_def ? default[i] : default) : zero(T) + elseif haskey(params, par) + coldata[i] = params[par] + else + if isnothing(default) + throw(KeyError(par)) + else + coldata[i] = isvec_def ? default[i] : default + end end end - push!(partable.columns, column => new_col) + return partable end """ - update_partable!(partable::AbstractParameterTable, sem_fit::SemFit, vec, column) - -Write `vec` to `column` of `partable`. + update_partable!(partable::AbstractParameterTable, param_labels::Vector{Symbol}, params, column) -# Arguments -- `vec::Vector`: has to be in the same order as the `model` parameters +Write parameter `values` into `column` of `partable`. + +The `param_labels` and `params` vectors define the pairs of +parameters, which are being matched to the `:param` column +of the `partable`. """ -update_partable!(partable::AbstractParameterTable, sem_fit::SemFit, vec, column) = - update_partable!(partable, identifier(sem_fit), vec, column) +function update_partable!( + partable::ParameterTable, + column::Symbol, + param_labels::AbstractVector{Symbol}, + params::AbstractVector, + default::Any = nothing, +) + length(param_labels) == length(params) || throw( + ArgumentError( + "The length of `param_labels` ($(length(param_labels))) and their `params` ($(length(param_labels))) must be the same", + ), + ) + check_param_labels(param_labels, nothing) + params = Dict(zip(param_labels, params)) + update_partable!(partable, column, params, default) +end # update estimates ------------------------------------------------------------------------- """ update_estimate!( partable::AbstractParameterTable, - sem_fit::SemFit) + fit::SemFit) -Write parameter estimates from `sem_fit` to the `:estimate` column of `partable` +Write parameter estimates from `fit` to the `:estimate` column of `partable` """ -update_estimate!(partable::AbstractParameterTable, sem_fit::SemFit) = - update_partable!(partable, sem_fit, sem_fit.solution, :estimate) +update_estimate!(partable::ParameterTable, fit::SemFit) = update_partable!( + partable, + :estimate, + param_labels(fit), + fit.solution, + partable.columns[:value_fixed], +) + +# fallback method for ensemble +update_estimate!(partable::AbstractParameterTable, fit::SemFit) = + update_partable!(partable, :estimate, param_labels(fit), fit.solution) # update starting values ------------------------------------------------------------------- """ - update_start!(partable::AbstractParameterTable, sem_fit::SemFit) + update_start!(partable::AbstractParameterTable, fit::SemFit) update_start!(partable::AbstractParameterTable, model::AbstractSem, start_val; kwargs...) -Write starting values from `sem_fit` or `start_val` to the `:estimate` column of `partable`. +Write starting values from `fit` or `start_val` to the `:start` column of `partable`. # Arguments - `start_val`: either a vector of starting values or a function to compute starting values from `model` - `kwargs...`: are passed to `start_val` """ -update_start!(partable::AbstractParameterTable, sem_fit::SemFit) = - update_partable!(partable, sem_fit, sem_fit.start_val, :start) +update_start!(partable::AbstractParameterTable, fit::SemFit) = update_partable!( + partable, + :start, + param_labels(fit), + fit.start_val, + partable.columns[:value_fixed], +) function update_start!( partable::AbstractParameterTable, @@ -256,29 +362,172 @@ function update_start!( if !(start_val isa Vector) start_val = start_val(model; kwargs...) end - return update_partable!(partable, identifier(model), start_val, :start) + return update_partable!(partable, :start, param_labels(model), start_val) end # update partable standard errors ---------------------------------------------------------- """ update_se_hessian!( partable::AbstractParameterTable, - sem_fit::SemFit; - hessian = :finitediff) + fit::SemFit; + method = :finitediff) -Write hessian standard errors computed for `sem_fit` to the `:se` column of `partable` +Write hessian standard errors computed for `fit` to the `:se` column of `partable` # Arguments -- `hessian::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. +- `method::Symbol`: how to compute the hessian, see [se_hessian](@ref) for more information. # Examples """ function update_se_hessian!( partable::AbstractParameterTable, - sem_fit::SemFit; - hessian = :finitediff, + fit::SemFit; + method = :finitediff, +) + se = se_hessian(fit; method) + return update_partable!(partable, :se, param_labels(fit), se) +end + + +""" + lavaan_params!(out::AbstractVector, partable_lav, + partable::ParameterTable, + lav_col::Symbol = :est, lav_group = nothing) + +Extract parameter values from the `partable_lav` lavaan model that +match the parameters of `partable` into the `out` vector. + +The method sets the *i*-th element of the `out` vector to +the value of *i*-th parameter from `param_labels(partable)`. + +Note that the lavaan and `partable` models are matched by the +the names of variables in the tables (`from` and `to` columns) +as well as the type of their relationship (`relation` column), +and not by the names of the model parameters. +""" +function lavaan_params!( + out::AbstractVector, + partable_lav, + partable::ParameterTable, + lav_col::Symbol = :est, + lav_group = nothing, ) - se = se_hessian(sem_fit; hessian = hessian) - return update_partable!(partable, sem_fit, se, :se) + + # find indices of all df row where f is true + findallrows(f::Function, df) = findall(f(r) for r in eachrow(df)) + + (length(out) == nparams(partable)) || throw( + DimensionMismatch( + "The length of parameter values vector ($(length(out))) does not match the number of parameters ($(nparams(partable)))", + ), + ) + partable_mask = findall(partable.columns[:free]) + param_index = param_indices(partable) + + lav_values = partable_lav[:, lav_col] + for (from, to, type, id) in zip( + [ + view(partable.columns[k], partable_mask) for + k in [:from, :to, :relation, :label] + ]..., + ) + lav_ind = nothing + + if from == Symbol(1) + lav_ind = findallrows( + r -> + r[:lhs] == String(to) && + r[:op] == "~1" && + (isnothing(lav_group) || r[:group] == lav_group), + partable_lav, + ) + else + if type == :↔ + lav_type = "~~" + elseif type == :→ + if (from ∈ partable.latent_vars) && (to ∈ partable.observed_vars) + lav_type = "=~" + else + lav_type = "~" + from, to = to, from + end + end + + if lav_type == "~~" + lav_ind = findallrows( + r -> + ( + (r[:lhs] == String(from) && r[:rhs] == String(to)) || + (r[:lhs] == String(to) && r[:rhs] == String(from)) + ) && + r[:op] == lav_type && + (isnothing(lav_group) || r[:group] == lav_group), + partable_lav, + ) + else + lav_ind = findallrows( + r -> + r[:lhs] == String(from) && + r[:rhs] == String(to) && + r[:op] == lav_type && + (isnothing(lav_group) || r[:group] == lav_group), + partable_lav, + ) + end + end + + if length(lav_ind) == 0 + throw( + ErrorException( + "Parameter $id ($from $type $to) could not be found in the lavaan solution", + ), + ) + elseif length(lav_ind) > 1 + throw( + ErrorException( + "At least one parameter was found twice in the lavaan solution", + ), + ) + end + + param_ind = param_index[id] + param_val = lav_values[lav_ind[1]] + if isnan(out[param_ind]) + out[param_ind] = param_val + else + @assert out[param_ind] ≈ param_val atol = 1E-10 "Parameter :$id value at row #$lav_ind ($param_val) differs from the earlier encountered value ($(out[param_ind]))" + end + end + + return out end + +""" + lavaan_params(partable_lav, partable::ParameterTable, + lav_col::Symbol = :est, lav_group = nothing) + +Extract parameter values from the `partable_lav` lavaan model that +match the parameters of `partable`. + +The `out` vector should be of `nparams(partable)` length. +The *i*-th element of the `out` vector will contain the +value of the *i*-th parameter from `param_labels(partable)`. + +Note that the lavaan and `partable` models are matched by the +the names of variables in the tables (`from` and `to` columns), +and the type of their relationship (`relation` column), +but not by the ids of the model parameters. +""" +lavaan_params( + partable_lav, + partable::ParameterTable, + lav_col::Symbol = :est, + lav_group = nothing, +) = lavaan_params!( + fill(NaN, nparams(partable)), + partable_lav, + partable, + lav_col, + lav_group, +) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index e0fcc575c..75175a87d 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -1,282 +1,291 @@ -############################################################################################ -### Type -############################################################################################ - -# map from parameter index to linear indices of matrix/vector positions where it occurs -AbstractArrayParamsMap = AbstractVector{<:AbstractVector{<:Integer}} -ArrayParamsMap = Vector{Vector{Int}} - -struct RAMMatrices - A_ind::ArrayParamsMap - S_ind::ArrayParamsMap - F_ind::Vector{Int} - M_ind::Union{ArrayParamsMap, Nothing} - parameters::Any - colnames::Any - constants::Any - size_F::Any -end ############################################################################################ -### Constructor -############################################################################################ - -function RAMMatrices(; A, S, F, M = nothing, parameters, colnames) - A_indices = array_parameters_map(parameters, A) - S_indices = array_parameters_map(parameters, S) - M_indices = !isnothing(M) ? array_parameters_map(parameters, M) : nothing - F_indices = findall([any(isone.(col)) for col in eachcol(F)]) - constants = get_RAMConstants(A, S, M) - return RAMMatrices( - A_indices, - S_indices, - F_indices, - M_indices, - parameters, - colnames, - constants, - size(F), - ) -end - -RAMMatrices(a::RAMMatrices) = a - -############################################################################################ -### Constants +### Type ############################################################################################ -struct RAMConstant - matrix::Any - index::Any - value::Any +struct RAMMatrices <: SemSpecification + A::ParamsMatrix{Float64} + S::ParamsMatrix{Float64} + F::SparseMatrixCSC{Float64} + M::Union{ParamsVector{Float64}, Nothing} + param_labels::Vector{Symbol} + vars::Union{Vector{Symbol}, Nothing} # better call it "variables": it's a mixture of observed and latent (and it gets confusing with get_vars()) end -import Base.== +nparams(ram::RAMMatrices) = nparams(ram.A) +nvars(ram::RAMMatrices) = size(ram.F, 2) +nobserved_vars(ram::RAMMatrices) = size(ram.F, 1) +nlatent_vars(ram::RAMMatrices) = nvars(ram) - nobserved_vars(ram) -function ==(c1::RAMConstant, c2::RAMConstant) - res = ((c1.matrix == c2.matrix) && (c1.index == c2.index) && (c1.value == c2.value)) - return res -end +vars(ram::RAMMatrices) = ram.vars -function get_RAMConstants(A, S, M) - constants = Vector{RAMConstant}() +isobserved_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] > ram.F.colptr[i] +islatent_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] == ram.F.colptr[i] - for index in CartesianIndices(A) - if (A[index] isa Number) && !iszero(A[index]) - push!(constants, RAMConstant(:A, index, A[index])) +# indices of observed variables in the order as they appear in ram.F rows +function observed_var_indices(ram::RAMMatrices) + obs_inds = Vector{Int}(undef, nobserved_vars(ram)) + @inbounds for i in 1:nvars(ram) + colptr = ram.F.colptr[i] + if ram.F.colptr[i+1] > colptr # is observed + obs_inds[ram.F.rowval[colptr]] = i end end + return obs_inds +end - for index in CartesianIndices(S) - if (S[index] isa Number) && !iszero(S[index]) - push!(constants, RAMConstant(:S, index, S[index])) - end - end +latent_var_indices(ram::RAMMatrices) = [i for i in axes(ram.F, 2) if islatent_var(ram, i)] - if !isnothing(M) - for index in CartesianIndices(M) - if (M[index] isa Number) && !iszero(M[index]) - push!(constants, RAMConstant(:M, index, M[index])) +# observed variables in the order as they appear in ram.F rows +function observed_vars(ram::RAMMatrices) + if isnothing(ram.vars) + @warn "Your RAMMatrices do not contain variable names. Please make sure the order of variables in your data is correct!" + return nothing + else + obs_vars = Vector{Symbol}(undef, nobserved_vars(ram)) + @inbounds for (i, v) in enumerate(vars(ram)) + colptr = ram.F.colptr[i] + if ram.F.colptr[i+1] > colptr # is observed + obs_vars[ram.F.rowval[colptr]] = v end end + return obs_vars end - - return constants end -function set_RAMConstant!(A, S, M, rc::RAMConstant) - if rc.matrix == :A - A[rc.index] = rc.value - elseif rc.matrix == :S - S[rc.index] = rc.value - S[rc.index[2], rc.index[1]] = rc.value - elseif rc.matrix == :M - M[rc.index] = rc.value +function latent_vars(ram::RAMMatrices) + if isnothing(ram.vars) + @warn "Your RAMMatrices do not contain variable names. Please make sure the order of variables in your data is correct!" + return nothing + else + return [col for (i, col) in enumerate(ram.vars) if islatent_var(ram, i)] end end -function set_RAMConstants!(A, S, M, rc_vec::Vector{RAMConstant}) - for rc in rc_vec - set_RAMConstant!(A, S, M, rc) +############################################################################################ +### Constructor +############################################################################################ + +function RAMMatrices(; + A::AbstractMatrix, + S::AbstractMatrix, + F::AbstractMatrix, + M::Union{AbstractVector, Nothing} = nothing, + param_labels::AbstractVector{Symbol}, + vars::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + ncols = size(A, 2) + isnothing(vars) || check_vars(vars, ncols) + + size(A, 1) == size(A, 2) || throw(DimensionMismatch("A must be a square matrix")) + size(S, 1) == size(S, 2) || throw(DimensionMismatch("S must be a square matrix")) + size(A, 2) == ncols || throw( + DimensionMismatch( + "A should have as many rows and columns as vars length ($ncols), $(size(A)) found", + ), + ) + size(S, 2) == ncols || throw( + DimensionMismatch( + "S should have as many rows and columns as vars length ($ncols), $(size(S)) found", + ), + ) + size(F, 2) == ncols || throw( + DimensionMismatch( + "F should have as many columns as vars length ($ncols), $(size(F, 2)) found", + ), + ) + if !isnothing(M) + length(M) == ncols || throw( + DimensionMismatch( + "M should have as many elements as vars length ($ncols), $(length(M)) found", + ), + ) end + check_param_labels(param_labels, nothing) + + A = ParamsMatrix{Float64}(A, param_labels) + S = ParamsMatrix{Float64}(S, param_labels) + M = !isnothing(M) ? ParamsVector{Float64}(M, param_labels) : nothing + spF = sparse(F) + if any(!isone, spF.nzval) + throw(ArgumentError("F should contain only 0s and 1s")) + end + return RAMMatrices(A, S, F, M, copy(param_labels), vars) end ############################################################################################ ### get RAMMatrices from parameter table ############################################################################################ -function RAMMatrices(partable::ParameterTable; par_id = nothing) - if isnothing(par_id) - parameters, n_par, par_positions = get_par_npar_identifier(partable) +function RAMMatrices( + partable::ParameterTable; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) + param_labels = copy(isnothing(param_labels) ? SEM.param_labels(partable) : param_labels) + check_param_labels(param_labels, partable.columns[:label]) + param_labels_index = param_indices(partable) + + n_observed = length(partable.observed_vars) + n_latent = length(partable.latent_vars) + n_vars = n_observed + n_latent + + if length(partable.sorted_vars) != 0 + @assert length(partable.sorted_vars) == nvars(partable) + vars_sorted = copy(partable.sorted_vars) else - parameters, n_par, par_positions = - par_id[:parameters], par_id[:n_par], par_id[:par_positions] + vars_sorted = [ + partable.observed_vars + partable.latent_vars + ] end - n_observed = size(partable.variables[:observed_vars], 1) - n_latent = size(partable.variables[:latent_vars], 1) - n_node = n_observed + n_latent - - # F indices - if length(partable.variables[:sorted_vars]) != 0 - F_ind = findall( - x -> x ∈ partable.variables[:observed_vars], - partable.variables[:sorted_vars], - ) - else - F_ind = 1:n_observed - end - - # indices of the colnames - if length(partable.variables[:sorted_vars]) != 0 - positions = - Dict(zip(partable.variables[:sorted_vars], collect(1:n_observed+n_latent))) - colnames = copy(partable.variables[:sorted_vars]) - else - positions = Dict( - zip( - [partable.variables[:observed_vars]; partable.variables[:latent_vars]], - collect(1:n_observed+n_latent), - ), - ) - colnames = [partable.variables[:observed_vars]; partable.variables[:latent_vars]] - end + # indices of the vars (A/S/M rows or columns) + vars_index = Dict(col => i for (i, col) in enumerate(vars_sorted)) # fill Matrices # known_labels = Dict{Symbol, Int64}() - A_ind = Vector{Vector{Int64}}(undef, n_par) - for i in 1:length(A_ind) - A_ind[i] = Vector{Int64}() - end - S_ind = Vector{Vector{Int64}}(undef, n_par) - S_ind .= [Vector{Int64}()] - for i in 1:length(S_ind) - S_ind[i] = Vector{Int64}() - end - + T = nonmissingtype(eltype(partable.columns[:value_fixed])) + A_inds = [Vector{Int64}() for _ in 1:length(param_labels)] + A_lin_ixs = LinearIndices((n_vars, n_vars)) + S_inds = [Vector{Int64}() for _ in 1:length(param_labels)] + S_lin_ixs = LinearIndices((n_vars, n_vars)) + A_consts = Vector{Pair{Int, T}}() + S_consts = Vector{Pair{Int, T}}() # is there a meanstructure? - if any(partable.columns[:from] .== Symbol("1")) - M_ind = Vector{Vector{Int64}}(undef, n_par) - for i in 1:length(M_ind) - M_ind[i] = Vector{Int64}() - end - else - M_ind = nothing - end - - # handel constants - constants = Vector{RAMConstant}() - - for i in 1:length(partable) - from, parameter_type, to, free, value_fixed, identifier = partable[i] - - row_ind = positions[to] - if from != Symbol("1") - col_ind = positions[from] - end - - if !free - if (parameter_type == :→) & (from == Symbol("1")) - push!(constants, RAMConstant(:M, row_ind, value_fixed)) - elseif (parameter_type == :→) + M_inds = + any(==(Symbol(1)), partable.columns[:from]) ? + [Vector{Int64}() for _ in 1:length(param_labels)] : nothing + M_consts = !isnothing(M_inds) ? Vector{Pair{Int, T}}() : nothing + + for r in partable + row_ind = vars_index[r.to] + col_ind = r.from != Symbol(1) ? vars_index[r.from] : nothing + + if !r.free + if (r.relation == :→) && (r.from == Symbol(1)) + push!(M_consts, row_ind => r.value_fixed) + elseif r.relation == :→ push!( - constants, - RAMConstant(:A, CartesianIndex(row_ind, col_ind), value_fixed), + A_consts, + A_lin_ixs[CartesianIndex(row_ind, col_ind)] => r.value_fixed, ) - else + elseif r.relation == :↔ push!( - constants, - RAMConstant(:S, CartesianIndex(row_ind, col_ind), value_fixed), + S_consts, + S_lin_ixs[CartesianIndex(row_ind, col_ind)] => r.value_fixed, ) + if row_ind != col_ind # symmetric + push!( + S_consts, + S_lin_ixs[CartesianIndex(col_ind, row_ind)] => r.value_fixed, + ) + end + else + error("Unsupported relation: $(r.relation)") end else - par_ind = par_positions[identifier] - if (parameter_type == :→) && (from == Symbol("1")) - push!(M_ind[par_ind], row_ind) - elseif parameter_type == :→ - push!(A_ind[par_ind], (row_ind + (col_ind - 1) * n_node)) - else - push!(S_ind[par_ind], row_ind + (col_ind - 1) * n_node) - if row_ind != col_ind - push!(S_ind[par_ind], col_ind + (row_ind - 1) * n_node) + par_ind = param_labels_index[r.param] + if (r.relation == :→) && (r.from == Symbol(1)) + push!(M_inds[par_ind], row_ind) + elseif r.relation == :→ + push!(A_inds[par_ind], A_lin_ixs[CartesianIndex(row_ind, col_ind)]) + elseif r.relation == :↔ + push!(S_inds[par_ind], S_lin_ixs[CartesianIndex(row_ind, col_ind)]) + if row_ind != col_ind # symmetric + push!(S_inds[par_ind], S_lin_ixs[CartesianIndex(col_ind, row_ind)]) end + else + error("Unsupported relation: $(r.relation)") end end end + # sort linear indices + for A_ind in A_inds + sort!(A_ind) + end + for S_ind in S_inds + unique!(sort!(S_ind)) # also symmetric duplicates + end + if !isnothing(M_inds) + for M_ind in M_inds + sort!(M_ind) + end + end + sort!(A_consts, by = first) + sort!(S_consts, by = first) + if !isnothing(M_consts) + sort!(M_consts, by = first) + end return RAMMatrices( - A_ind, - S_ind, - F_ind, - M_ind, - parameters, - colnames, - constants, - (n_observed, n_node), + ParamsMatrix{T}(A_inds, A_consts, (n_vars, n_vars)), + ParamsMatrix{T}(S_inds, S_consts, (n_vars, n_vars)), + sparse( + 1:n_observed, + [vars_index[var] for var in partable.observed_vars], + ones(T, n_observed), + n_observed, + n_vars, + ), + !isnothing(M_inds) ? ParamsVector{T}(M_inds, M_consts, (n_vars,)) : nothing, + param_labels, + vars_sorted, ) end +Base.convert( + ::Type{RAMMatrices}, + partable::ParameterTable; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) = RAMMatrices(partable; param_labels) + ############################################################################################ ### get parameter table from RAMMatrices ############################################################################################ -function ParameterTable(ram_matrices::RAMMatrices) - partable = ParameterTable(nothing) - - colnames = ram_matrices.colnames - position_names = Dict{Int64, Symbol}(1:length(colnames) .=> colnames) - - # observed and latent variables - names_obs = colnames[ram_matrices.F_ind] - names_lat = colnames[findall(x -> !(x ∈ ram_matrices.F_ind), 1:length(colnames))] +function ParameterTable( + ram::RAMMatrices; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, + observed_var_prefix::Symbol = :obs, + latent_var_prefix::Symbol = :var, +) + # defer parameter checks until we know which ones are used + + if !isnothing(ram.vars) + latent_vars = SEM.latent_vars(ram) + observed_vars = SEM.observed_vars(ram) + vars = ram.vars + else + observed_vars = [Symbol("$(observed_var_prefix)_$i") for i in 1:nobserved_vars(ram)] + latent_vars = [Symbol("$(latent_var_prefix)_$i") for i in 1:nlatent_vars(ram)] + vars = vcat(observed_vars, latent_vars) + end - partable.variables = Dict( - :sorted_vars => Vector{Symbol}(), - :observed_vars => names_obs, - :latent_vars => names_lat, + # construct an empty table + partable = ParameterTable( + observed_vars = observed_vars, + latent_vars = latent_vars, + param_labels = isnothing(param_labels) ? SEM.param_labels(ram) : param_labels, ) - # constants - for c in ram_matrices.constants - push!(partable, get_partable_row(c, position_names)) + # fill the table + append_rows!(partable, ram.S, :S, ram.param_labels, vars, skip_symmetric = true) + append_rows!(partable, ram.A, :A, ram.param_labels, vars) + if !isnothing(ram.M) + append_rows!(partable, ram.M, :M, ram.param_labels, vars) end - # parameters - for (i, par) in enumerate(ram_matrices.parameters) - push_partable_rows!( - partable, - position_names, - par, - i, - ram_matrices.A_ind, - ram_matrices.S_ind, - ram_matrices.M_ind, - ram_matrices.size_F[2], - ) - end + check_param_labels(SEM.param_labels(partable), partable.columns[:label]) return partable end -############################################################################################ -### get RAMMatrices from EnsembleParameterTable -############################################################################################ - -function RAMMatrices(partable::EnsembleParameterTable) - ram_matrices = Dict{Symbol, RAMMatrices}() - - parameters, n_par, par_positions = get_par_npar_identifier(partable) - par_id = - Dict(:parameters => parameters, :n_par => n_par, :par_positions => par_positions) - - for key in keys(partable.tables) - ram_mat = RAMMatrices(partable.tables[key]; par_id = par_id) - push!(ram_matrices, key => ram_mat) - end - - return ram_matrices -end +Base.convert( + ::Type{<:ParameterTable}, + ram::RAMMatrices; + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, +) = ParameterTable(ram; param_labels) ############################################################################################ ### Pretty Printing @@ -291,156 +300,107 @@ end ### Additional Functions ############################################################################################ -function get_par_npar_identifier(partable::ParameterTable) - parameters = unique(partable.columns[:identifier]) - filter!(x -> x != :const, parameters) - n_par = length(parameters) - par_positions = Dict(parameters .=> 1:n_par) - return parameters, n_par, par_positions -end - -function get_par_npar_identifier(partable::EnsembleParameterTable) - parameters = Vector{Symbol}() - for key in keys(partable.tables) - append!(parameters, partable.tables[key].columns[:identifier]) - end - parameters = unique(parameters) - filter!(x -> x != :const, parameters) - - n_par = length(parameters) - - par_positions = Dict(parameters .=> 1:n_par) - - return parameters, n_par, par_positions -end - -function get_partable_row(c::RAMConstant, position_names) - # variable names - from = position_names[c.index[2]] - to = position_names[c.index[1]] - # parameter type - if c.matrix == :A - parameter_type = :→ - elseif c.matrix == :S - parameter_type = :↔ - elseif c.matrix == :M - parameter_type = :→ - end - free = false - value_fixed = c.value - start = 0.0 - estimate = 0.0 - identifier = :const - return Dict( - :from => from, - :parameter_type => parameter_type, - :to => to, - :free => free, - :value_fixed => value_fixed, - :start => start, - :estimate => estimate, - :identifier => identifier, - ) -end - -function cartesian_is_known(index, known_indices) - known = false - for k_in in known_indices - if (index == k_in) | ((index[1] == k_in[2]) & (index[2] == k_in[1])) - known = true - end +# return the `from □ to` variables relation symbol (□) given the name of the source RAM matrix +function matrix_to_relation(matrix::Symbol) + if matrix == :A + return :→ + elseif matrix == :S + return :↔ + elseif matrix == :M + return :→ + else + throw( + ArgumentError( + "Unsupported matrix $matrix, supported matrices are :A, :S and :M", + ), + ) end - return known end -cartesian_is_known(index, known_indices::Nothing) = false - -function get_partable_row(par, position_names, index, matrix, n_nod, known_indices) +# generates a ParTable row NamedTuple for a given element of RAM matrix +function partable_row( + val, + index, + matrix::Symbol, + varnames::AbstractVector{Symbol}; + free::Bool = true, +) # variable names if matrix == :M - from = Symbol("1") - to = position_names[index] + from = Symbol(1) + to = varnames[index] else - index = linear2cartesian(index, (n_nod, n_nod)) - - if (matrix == :S) & (cartesian_is_known(index, known_indices)) - return nothing - elseif matrix == :S - push!(known_indices, index) - end - - from = position_names[index[2]] - to = position_names[index[1]] + from = varnames[index[2]] + to = varnames[index[1]] end - # parameter type - if matrix == :A - parameter_type = :→ - elseif matrix == :S - parameter_type = :↔ - elseif matrix == :M - parameter_type = :→ - end - - free = true - value_fixed = 0.0 - start = 0.0 - estimate = 0.0 - identifier = par - - return Dict( - :from => from, - :parameter_type => parameter_type, - :to => to, - :free => free, - :value_fixed => value_fixed, - :start => start, - :estimate => estimate, - :identifier => identifier, + return ( + from = from, + relation = matrix_to_relation(matrix), + to = to, + free = free, + value_fixed = free ? 0.0 : val, + start = 0.0, + estimate = 0.0, + label = free ? val : :const, ) end -function push_partable_rows!(partable, position_names, par, i, A_ind, S_ind, M_ind, n_nod) - A_ind = A_ind[i] - S_ind = S_ind[i] - isnothing(M_ind) || (M_ind = M_ind[i]) - - for ind in A_ind - push!(partable, get_partable_row(par, position_names, ind, :A, n_nod, nothing)) - end - - known_indices = Vector{CartesianIndex}() - for ind in S_ind - push!( - partable, - get_partable_row(par, position_names, ind, :S, n_nod, known_indices), - ) +function append_rows!( + partable::ParameterTable, + arr::ParamsArray, + arr_name::Symbol, + param_labels::AbstractVector, + varnames::AbstractVector{Symbol}; + skip_symmetric::Bool = false, +) + nparams(arr) == length(param_labels) || throw( + ArgumentError( + "Length of parameters vector ($(length(param_labels))) does not match the number of parameters in the matrix ($(nparams(arr)))", + ), + ) + arr_ixs = eachindex(arr) + + # add parameters + visited_indices = Set{eltype(arr_ixs)}() + for (i, par) in enumerate(param_labels) + for j in param_occurences_range(arr, i) + arr_ix = arr_ixs[arr.linear_indices[j]] + skip_symmetric && (arr_ix ∈ visited_indices) && continue + + push!(partable, partable_row(par, arr_ix, arr_name, varnames, free = true)) + if skip_symmetric + # mark index and its symmetric as visited + push!(visited_indices, arr_ix) + push!(visited_indices, CartesianIndex(arr_ix[2], arr_ix[1])) + end + end end - if !isnothing(M_ind) - for ind in M_ind - push!(partable, get_partable_row(par, position_names, ind, :M, n_nod, nothing)) + # add constants + for (i, _, val) in arr.constants + arr_ix = arr_ixs[i] + skip_symmetric && (arr_ix ∈ visited_indices) && continue + push!(partable, partable_row(val, arr_ix, arr_name, varnames, free = false)) + if skip_symmetric + # mark index and its symmetric as visited + push!(visited_indices, arr_ix) + push!(visited_indices, CartesianIndex(arr_ix[2], arr_ix[1])) end end return nothing end -function ==(mat1::RAMMatrices, mat2::RAMMatrices) +function Base.:(==)(mat1::RAMMatrices, mat2::RAMMatrices) res = ( - (mat1.A_ind == mat2.A_ind) && - (mat1.S_ind == mat2.S_ind) && - (mat1.F_ind == mat2.F_ind) && - (mat1.M_ind == mat2.M_ind) && - (mat1.parameters == mat2.parameters) && - (mat1.colnames == mat2.colnames) && - (mat1.size_F == mat2.size_F) && - (mat1.constants == mat2.constants) + (mat1.A == mat2.A) && + (mat1.S == mat2.S) && + (mat1.F == mat2.F) && + (mat1.M == mat2.M) && + (mat1.param_labels == mat2.param_labels) && + (mat1.vars == mat2.vars) ) return res end - -function get_group(d::Dict, group) - return d[group] -end diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl index 208ef3000..7ba8f7fb7 100644 --- a/src/frontend/specification/Sem.jl +++ b/src/frontend/specification/Sem.jl @@ -3,37 +3,79 @@ ############################################################################################ function Sem(; + specification = ParameterTable, observed::O = SemObservedData, - imply::I = RAM, + implied::I = RAM, loss::L = SemML, - optimizer::D = SemOptimizerOptim, kwargs..., -) where {O, I, L, D} +) where {O, I, L} kwdict = Dict{Symbol, Any}(kwargs...) - set_field_type_kwargs!(kwdict, observed, imply, loss, optimizer, O, I, D) + set_field_type_kwargs!(kwdict, observed, implied, loss, O, I) - observed, imply, loss, optimizer = get_fields!(kwdict, observed, imply, loss, optimizer) + observed, implied, loss = get_fields!(kwdict, specification, observed, implied, loss) - sem = Sem(observed, imply, loss, optimizer) + sem = Sem(observed, implied, loss) return sem end +""" + implied(model::AbstractSemSingle) -> SemImplied + +Returns the [*implied*](@ref SemImplied) part of a model. +""" +implied(model::AbstractSemSingle) = model.implied + +nvars(model::AbstractSemSingle) = nvars(implied(model)) +nobserved_vars(model::AbstractSemSingle) = nobserved_vars(implied(model)) +nlatent_vars(model::AbstractSemSingle) = nlatent_vars(implied(model)) + +vars(model::AbstractSemSingle) = vars(implied(model)) +observed_vars(model::AbstractSemSingle) = observed_vars(implied(model)) +latent_vars(model::AbstractSemSingle) = latent_vars(implied(model)) + +param_labels(model::AbstractSemSingle) = param_labels(implied(model)) +nparams(model::AbstractSemSingle) = nparams(implied(model)) + +""" + observed(model::AbstractSemSingle) -> SemObserved + +Returns the [*observed*](@ref SemObserved) part of a model. +""" +observed(model::AbstractSemSingle) = model.observed + +""" + nsamples(model::AbstractSem) -> Int + +Returns the number of samples from the [*observed*](@ref SemObserved) part of a model. +""" +nsamples(model::AbstractSemSingle) = nsamples(observed(model)) + +""" + loss(model::AbstractSemSingle) -> SemLoss + +Returns the [*loss*](@ref SemLoss) function of a model. +""" +loss(model::AbstractSemSingle) = model.loss + +# sum of samples in all sub-models +nsamples(ensemble::SemEnsemble) = sum(nsamples, ensemble.sems) + function SemFiniteDiff(; + specification = ParameterTable, observed::O = SemObservedData, - imply::I = RAM, + implied::I = RAM, loss::L = SemML, - optimizer::D = SemOptimizerOptim, kwargs..., -) where {O, I, L, D} +) where {O, I, L} kwdict = Dict{Symbol, Any}(kwargs...) - set_field_type_kwargs!(kwdict, observed, imply, loss, optimizer, O, I, D) + set_field_type_kwargs!(kwdict, observed, implied, loss, O, I) - observed, imply, loss, optimizer = get_fields!(kwdict, observed, imply, loss, optimizer) + observed, implied, loss = get_fields!(kwdict, specification, observed, implied, loss) - sem = SemFiniteDiff(observed, imply, loss, optimizer) + sem = SemFiniteDiff(observed, implied, loss) return sem end @@ -42,9 +84,9 @@ end # functions ############################################################################################ -function set_field_type_kwargs!(kwargs, observed, imply, loss, optimizer, O, I, D) +function set_field_type_kwargs!(kwargs, observed, implied, loss, O, I) kwargs[:observed_type] = O <: Type ? observed : typeof(observed) - kwargs[:imply_type] = I <: Type ? imply : typeof(imply) + kwargs[:implied_type] = I <: Type ? implied : typeof(implied) if loss isa SemLoss kwargs[:loss_types] = [ lossfun isa SemLossFunction ? typeof(lossfun) : lossfun for @@ -56,35 +98,33 @@ function set_field_type_kwargs!(kwargs, observed, imply, loss, optimizer, O, I, else kwargs[:loss_types] = [loss isa SemLossFunction ? typeof(loss) : loss] end - kwargs[:optimizer_type] = D <: Type ? optimizer : typeof(optimizer) end # construct Sem fields -function get_fields!(kwargs, observed, imply, loss, optimizer) +function get_fields!(kwargs, specification, observed, implied, loss) + if !isa(specification, SemSpecification) + specification = specification(; kwargs...) + end + # observed if !isa(observed, SemObserved) - observed = observed(; kwargs...) + observed = observed(; specification, kwargs...) end kwargs[:observed] = observed - # imply - if !isa(imply, SemImply) - imply = imply(; kwargs...) + # implied + if !isa(implied, SemImplied) + implied = implied(; specification, kwargs...) end - kwargs[:imply] = imply - kwargs[:n_par] = n_par(imply) + kwargs[:implied] = implied + kwargs[:nparams] = nparams(implied) # loss - loss = get_SemLoss(loss; kwargs...) + loss = get_SemLoss(loss; specification, kwargs...) kwargs[:loss] = loss - # optimizer - if !isa(optimizer, SemOptimizer) - optimizer = optimizer(; kwargs...) - end - - return observed, imply, loss, optimizer + return observed, implied, loss end # construct loss field @@ -121,7 +161,7 @@ end print(io, "Sem{$(nameof(O)), $(nameof(I)), $lossfuntypes, $(nameof(D))}") end =# -function Base.show(io::IO, sem::Sem{O, I, L, D}) where {O, I, L, D} +function Base.show(io::IO, sem::Sem{O, I, L}) where {O, I, L} lossfuntypes = @. string(nameof(typeof(sem.loss.functions))) lossfuntypes = " " .* lossfuntypes .* ("\n") print(io, "Structural Equation Model \n") @@ -129,11 +169,10 @@ function Base.show(io::IO, sem::Sem{O, I, L, D}) where {O, I, L, D} print(io, lossfuntypes...) print(io, "- Fields \n") print(io, " observed: $(nameof(O)) \n") - print(io, " imply: $(nameof(I)) \n") - print(io, " optimizer: $(nameof(D)) \n") + print(io, " implied: $(nameof(I)) \n") end -function Base.show(io::IO, sem::SemFiniteDiff{O, I, L, D}) where {O, I, L, D} +function Base.show(io::IO, sem::SemFiniteDiff{O, I, L}) where {O, I, L} lossfuntypes = @. string(nameof(typeof(sem.loss.functions))) lossfuntypes = " " .* lossfuntypes .* ("\n") print(io, "Structural Equation Model : Finite Diff Approximation\n") @@ -141,8 +180,7 @@ function Base.show(io::IO, sem::SemFiniteDiff{O, I, L, D}) where {O, I, L, D} print(io, lossfuntypes...) print(io, "- Fields \n") print(io, " observed: $(nameof(O)) \n") - print(io, " imply: $(nameof(I)) \n") - print(io, " optimizer: $(nameof(D)) \n") + print(io, " implied: $(nameof(I)) \n") end function Base.show(io::IO, loss::SemLoss) @@ -165,7 +203,6 @@ function Base.show(io::IO, models::SemEnsemble) print(io, "SemEnsemble \n") print(io, "- Number of Models: $(models.n) \n") print(io, "- Weights: $(round.(models.weights, digits = 2)) \n") - print(io, "- optimizer: $(nameof(typeof(optimizer(models)))) \n") print(io, "\n", "Models: \n") print(io, "===============================================", "\n") diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl index 5c9ce7fdb..314abcc35 100644 --- a/src/frontend/specification/StenoGraphs.jl +++ b/src/frontend/specification/StenoGraphs.jl @@ -4,6 +4,9 @@ ### Define Modifiers ############################################################################################ +#FIXME: remove when StenoGraphs.jl will provide AbstractStenoGraph +const AbstractStenoGraph = AbstractArray{T, 1} where {T <: StenoGraphs.AbstractEdge} + # fixed parameter values struct Fixed{N} <: EdgeModifier value::N @@ -24,63 +27,85 @@ struct Label{N} <: EdgeModifier end label(args...) = Label(args) +# test whether the modifier is NaN +isnanmodval(val::Number) = isnan(val) +isnanmodval(val::Symbol) = val == :NaN +isnanmodval(val::SimpleNode{Symbol}) = val.node == :NaN + ############################################################################################ ### constructor for parameter table from graph ############################################################################################ -function ParameterTable(; graph, observed_vars, latent_vars, g = 1, parname = :θ) +function ParameterTable( + graph::AbstractStenoGraph; + observed_vars::AbstractVector{Symbol}, + latent_vars::AbstractVector{Symbol}, + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, + group::Union{Integer, Nothing} = nothing, + param_prefix::Symbol = :θ, +) graph = unique(graph) n = length(graph) - from = Vector{Symbol}(undef, n) - parameter_type = Vector{Symbol}(undef, n) - to = Vector{Symbol}(undef, n) - free = ones(Bool, n) - value_fixed = zeros(n) - start = zeros(n) - estimate = zeros(n) - identifier = Vector{Symbol}(undef, n) - identifier .= Symbol("") - # group = Vector{Symbol}(undef, n) - # start_partable = zeros(Bool, n) - sorted_vars = Vector{Symbol}() + columns = empty_partable_columns(n) + from = columns[:from] + relation = columns[:relation] + to = columns[:to] + free = columns[:free] + value_fixed = columns[:value_fixed] + start = columns[:start] + param_refs = columns[:label] + # group = Vector{Symbol}(undef, n) for (i, element) in enumerate(graph) - if element isa DirectedEdge - from[i] = element.src.node - to[i] = element.dst.node - parameter_type[i] = :→ - elseif element isa UndirectedEdge - from[i] = element.src.node - to[i] = element.dst.node - parameter_type[i] = :↔ - elseif element isa ModifiedEdge - if element.edge isa DirectedEdge - from[i] = element.edge.src.node - to[i] = element.edge.dst.node - parameter_type[i] = :→ - elseif element.edge isa UndirectedEdge - from[i] = element.edge.src.node - to[i] = element.edge.dst.node - parameter_type[i] = :↔ + edge = element isa ModifiedEdge ? element.edge : element + from[i] = edge.src.node + to[i] = edge.dst.node + if edge isa DirectedEdge + relation[i] = :→ + elseif edge isa UndirectedEdge + relation[i] = :↔ + else + throw( + ArgumentError( + "The graph contains an unsupported edge of type $(typeof(edge)).", + ), + ) + end + if element isa ModifiedEdge + if any(Base.Fix2(isa, Fixed), values(element.modifiers)) && + any(Base.Fix2(isa, Label), values(element.modifiers)) + throw(ArgumentError("It is not allowed to label fixed parameters.")) end for modifier in values(element.modifiers) + if isnothing(group) && + modifier.value isa Union{AbstractVector, Tuple} && + length(modifier.value) > 1 + throw( + ArgumentError( + "The graph contains a group of parameters, ParameterTable expects a single value.\n" * + "For SEM ensembles, use EnsembleParameterTable instead.", + ), + ) + end + modval = modifier.value[something(group, 1)] if modifier isa Fixed - if modifier.value[g] == :NaN + if isnanmodval(modval) free[i] = true value_fixed[i] = 0.0 else free[i] = false - value_fixed[i] = modifier.value[g] + value_fixed[i] = modval end elseif modifier isa Start - start_partable[i] = modifier.value[g] == :NaN - start[i] = modifier.value[g] + if !isnanmodval(modval) + start[i] = modval + end elseif modifier isa Label - if modifier.value[g] == :NaN + if isnanmodval(modval) throw(DomainError(NaN, "NaN is not allowed as a parameter label.")) end - identifier[i] = modifier.value[g] + param_refs[i] = modval end end end @@ -88,57 +113,56 @@ function ParameterTable(; graph, observed_vars, latent_vars, g = 1, parname = : # make identifiers for parameters that are not labeled current_id = 1 - for i in 1:length(identifier) - if (identifier[i] == Symbol("")) & free[i] - identifier[i] = Symbol(parname, :_, current_id) - current_id += 1 - elseif (identifier[i] == Symbol("")) & !free[i] - identifier[i] = :const - elseif (identifier[i] != Symbol("")) & !free[i] - @warn "You labeled a constant. Please check if the labels of your graph are correct." + for i in eachindex(param_refs) + if param_refs[i] == Symbol("") + if free[i] + param_refs[i] = Symbol(param_prefix, :_, current_id) + current_id += 1 + else + param_refs[i] = :const + end + elseif !free[i] + @warn "You labeled a constant ($(param_refs[i])=$(value_fixed[i])). Please check if the labels of your graph are correct." end end - return StructuralEquationModels.ParameterTable( - Dict( - :from => from, - :parameter_type => parameter_type, - :to => to, - :free => free, - :value_fixed => value_fixed, - :start => start, - :estimate => estimate, - :identifier => identifier, - ), - Dict( - :latent_vars => latent_vars, - :observed_vars => observed_vars, - :sorted_vars => sorted_vars, - ), - ) + return ParameterTable(columns; latent_vars, observed_vars, param_labels) end +############################################################################################ +### keyword only constructor (for call in `Sem` constructor) +############################################################################################ + +# FIXME: this kw-only ctor conflicts with the empty ParTable constructor; +# it is left here for compatibility with the current Sem construction API, +# the proper fix would be to move away from kw-only ctors in general +ParameterTable(; graph::Union{AbstractStenoGraph, Nothing} = nothing, kwargs...) = + !isnothing(graph) ? ParameterTable(graph; kwargs...) : + ParameterTable(empty_partable_columns(); kwargs...) + ############################################################################################ ### constructor for EnsembleParameterTable from graph ############################################################################################ -function EnsembleParameterTable(; graph, observed_vars, latent_vars, groups) +function EnsembleParameterTable( + graph::AbstractStenoGraph; + observed_vars::AbstractVector{Symbol}, + latent_vars::AbstractVector{Symbol}, + param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, + groups, +) graph = unique(graph) - partable = EnsembleParameterTable(nothing) - - for (i, group) in enumerate(groups) - push!( - partable.tables, - Symbol(group) => ParameterTable(; - graph = graph, - observed_vars = observed_vars, - latent_vars = latent_vars, - g = i, - parname = Symbol(:g, i), - ), - ) - end + partables = Dict( + group => ParameterTable( + graph; + observed_vars, + latent_vars, + param_labels, + group = i, + param_prefix = Symbol(:g, group), + ) for (i, group) in enumerate(groups) + ) - return partable + return EnsembleParameterTable(partables; param_labels) end diff --git a/src/frontend/specification/checks.jl b/src/frontend/specification/checks.jl new file mode 100644 index 000000000..5ef41c59d --- /dev/null +++ b/src/frontend/specification/checks.jl @@ -0,0 +1,42 @@ +# check if params vector correctly matches the parameter references (from the ParameterTable) +function check_param_labels( + param_labels::AbstractVector{Symbol}, + param_refs::Union{AbstractVector{Symbol}, Nothing}, +) + dup_param_labels = nonunique(param_labels) + isempty(dup_param_labels) || + throw(ArgumentError("Duplicate parameter labels detected: $(join(dup_param_labels, ", "))")) + any(==(:const), param_labels) && + throw(ArgumentError("Parameters constain reserved :const name")) + + if !isnothing(param_refs) + # check if all references parameters are present + all_refs = Set(id for id in param_refs if id != :const) + undecl_params = setdiff(all_refs, param_labels) + if !isempty(undecl_params) + throw( + ArgumentError( + "The following $(length(undecl_params)) parameters present in the table, but are not declared: " * + join(sort!(collect(undecl_params))), + ), + ) + end + end + + return nothing +end + +function check_vars(vars::AbstractVector{Symbol}, nvars::Union{Integer, Nothing}) + isnothing(nvars) || + length(vars) == nvars || + throw( + DimensionMismatch( + "variables length ($(length(vars))) does not match the number of columns in A matrix ($nvars)", + ), + ) + dup_vars = nonunique(vars) + isempty(dup_vars) || + throw(ArgumentError("Duplicate variables detected: $(join(dup_vars, ", "))")) + + return nothing +end diff --git a/src/frontend/specification/documentation.jl b/src/frontend/specification/documentation.jl index e3be49971..54f43fa9c 100644 --- a/src/frontend/specification/documentation.jl +++ b/src/frontend/specification/documentation.jl @@ -1,3 +1,37 @@ +param_labels(spec::SemSpecification) = spec.param_labels + +""" + vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model variables (both observed and latent) +in the order specified by the model. +""" +function vars end + +vars(spec::SemSpecification) = error("vars(spec::$(typeof(spec))) is not implemented") + +""" + observed_vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model observed variable in the order specified by the +model, which also should match the order of variables in [`SemObserved`](@ref). +""" +function observed_vars end + +observed_vars(spec::SemSpecification) = + error("observed_vars(spec::$(typeof(spec))) is not implemented") + +""" + latent_vars(semobj) -> Vector{Symbol} + +Return the vector of SEM model latent variable in the order specified by the +model. +""" +function latent_vars end + +latent_vars(spec::SemSpecification) = + error("latent_vars(spec::$(typeof(spec))) is not implemented") + """ `ParameterTable`s contain the specification of a structural equation model. @@ -14,7 +48,7 @@ Return a `ParameterTable` constructed from (1) a graph or (2) RAM matrices. - `observed_vars::Vector{Symbol}`: observed variable names - `latent_vars::Vector{Symbol}`: latent variable names - `ram_matrices::RAMMatrices`: a `RAMMatrices` object - + # Examples See the online documentation on [Model specification](@ref) and the [ParameterTable interface](@ref). @@ -31,16 +65,16 @@ function ParameterTable end (1) EnsembleParameterTable(;graph, observed_vars, latent_vars, groups) - (2) EnsembleParameterTable(args...; groups) + (2) EnsembleParameterTable(ps::Pair...; param_labels = nothing) -Return an `EnsembleParameterTable` constructed from (1) a graph or (2) multiple RAM matrices. +Return an `EnsembleParameterTable` constructed from (1) a graph or (2) multiple specifications. # Arguments - `graph`: graph defined via `@StenoGraph` - `observed_vars::Vector{Symbol}`: observed variable names - `latent_vars::Vector{Symbol}`: latent variable names -- `groups::Vector{Symbol}`: group names -- `args...`: `RAMMatrices` for each model +- `param_labels::Vector{Symbol}`: (optional) a vector of parameter names +- `ps::Pair...`: `:group_name => specification`, where `specification` is either a `ParameterTable` or `RAMMatrices` # Examples See the online documentation on [Multigroup models](@ref). @@ -54,11 +88,11 @@ function EnsembleParameterTable end (1) RAMMatrices(partable::ParameterTable) - (2) RAMMatrices(;A, S, F, M = nothing, parameters, colnames) + (2) RAMMatrices(;A, S, F, M = nothing, param_labels, vars) (3) RAMMatrices(partable::EnsembleParameterTable) - -Return `RAMMatrices` constructed from (1) a parameter table or (2) individual matrices. + +Return `RAMMatrices` constructed from (1) a parameter table or (2) individual matrices. (3) Return a dictionary of `RAMMatrices` from an `EnsembleParameterTable` (keys are the group names). @@ -68,8 +102,8 @@ Return `RAMMatrices` constructed from (1) a parameter table or (2) individual ma - `S`: matrix of undirected effects - `F`: filter matrix - `M`: vector of mean effects -- `parameters::Vector{Symbol}`: parameter labels -- `colnames::Vector{Symbol}`: variable names corresponding to the A, S and F matrix columns +- `param_labels::Vector{Symbol}`: parameter labels +- `vars::Vector{Symbol}`: variable names corresponding to the A, S and F matrix columns # Examples See the online documentation on [Model specification](@ref) and the [RAMMatrices interface](@ref). @@ -79,7 +113,7 @@ function RAMMatrices end """ fixed(args...) -Fix parameters to a certain value. +Fix parameters to a certain value. For ensemble models, multiple values (one for each submodel/group) are needed. # Examples @@ -94,7 +128,7 @@ function fixed end """ start(args...) -Define starting values for parameters. +Define starting values for parameters. For ensemble models, multiple values (one for each submodel/group) are needed. # Examples diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl new file mode 100644 index 000000000..301c455e9 --- /dev/null +++ b/src/implied/RAM/generic.jl @@ -0,0 +1,208 @@ +############################################################################################ +### Types +############################################################################################ +@doc raw""" +Model implied covariance and means via RAM notation. + +# Constructor + + RAM(; + specification, + meanstructure = false, + gradient = true, + kwargs...) + +# Arguments +- `specification`: either a `RAMMatrices` or `ParameterTable` object +- `meanstructure::Bool`: does the model have a meanstructure? +- `gradient::Bool`: is gradient-based optimization used + +# Extended help + +## Implementation +Subtype of `SemImplied`. + +## RAM notation + +The model implied covariance matrix is computed as +```math + \Sigma = F(I-A)^{-1}S(I-A)^{-T}F^T +``` +and for models with a meanstructure, the model implied means are computed as +```math + \mu = F(I-A)^{-1}M +``` + +## Interfaces +- `param_labels(::RAM) `-> vector of parameter labels +- `nparams(::RAM)` -> number of parameters + +- `Σ(::RAM)` -> model implied covariance matrix +- `μ(::RAM)` -> model implied mean vector + +RAM matrices for the current parameter values: +- `A(::RAM)` +- `S(::RAM)` +- `F(::RAM)` +- `M(::RAM)` + +Jacobians of RAM matrices w.r.t to the parameter vector `θ` +- `∇A(::RAM)` -> ``∂vec(A)/∂θᵀ`` +- `∇S(::RAM)` -> ``∂vec(S)/∂θᵀ`` +- `∇M(::RAM)` = ``∂M/∂θᵀ`` + +Vector of indices of each parameter in the respective RAM matrix: +- `A_indices(::RAM)` +- `S_indices(::RAM)` +- `M_indices(::RAM)` + +Additional interfaces +- `F⨉I_A⁻¹(::RAM)` -> ``F(I-A)^{-1}`` +- `F⨉I_A⁻¹S(::RAM)` -> ``F(I-A)^{-1}S`` +- `I_A(::RAM)` -> ``I-A`` +- `has_meanstructure(::RAM)` -> `Val{Bool}` does the model have a meanstructure? + +Only available in gradient! calls: +- `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}`` +""" +mutable struct RAM{MS, A1, A2, A3, A4, A5, A6, V2, M1, M2, M3, M4, S1, S2, S3} <: SemImplied + meanstruct::MS + hessianeval::ExactHessian + + Σ::A1 + A::A2 + S::A3 + F::A4 + μ::A5 + M::A6 + + ram_matrices::V2 + + F⨉I_A⁻¹::M1 + F⨉I_A⁻¹S::M2 + I_A::M3 + I_A⁻¹::M4 + + ∇A::S1 + ∇S::S2 + ∇M::S3 + + RAM{MS}(args...) where {MS <: MeanStruct} = + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) +end + +############################################################################################ +### Constructors +############################################################################################ + +function RAM(; + specification::SemSpecification, + #vech = false, + gradient_required = true, + meanstructure = false, + kwargs..., +) + ram_matrices = convert(RAMMatrices, specification) + + # get dimensions of the model + n_par = nparams(ram_matrices) + n_obs = nobserved_vars(ram_matrices) + n_var = nvars(ram_matrices) + + #preallocate arrays + rand_params = randn(Float64, n_par) + A_pre = check_acyclic(materialize(ram_matrices.A, rand_params)) + S_pre = materialize(ram_matrices.S, rand_params) + F = copy(ram_matrices.F) + + # pre-allocate some matrices + Σ = zeros(n_obs, n_obs) + F⨉I_A⁻¹ = zeros(n_obs, n_var) + F⨉I_A⁻¹S = zeros(n_obs, n_var) + I_A = similar(A_pre) + + if gradient_required + ∇A = sparse_gradient(ram_matrices.A) + ∇S = sparse_gradient(ram_matrices.S) + else + ∇A = nothing + ∇S = nothing + end + + # μ + if meanstructure + MS = HasMeanStruct + !isnothing(ram_matrices.M) || throw( + ArgumentError( + "You set `meanstructure = true`, but your model specification contains no mean parameters.", + ), + ) + M_pre = materialize(ram_matrices.M, rand_params) + ∇M = gradient_required ? sparse_gradient(ram_matrices.M) : nothing + μ = zeros(n_obs) + else + MS = NoMeanStruct + M_pre = nothing + μ = nothing + ∇M = nothing + end + + return RAM{MS}( + Σ, + A_pre, + S_pre, + F, + μ, + M_pre, + ram_matrices, + F⨉I_A⁻¹, + F⨉I_A⁻¹S, + I_A, + copy(I_A), + ∇A, + ∇S, + ∇M, + ) +end + +############################################################################################ +### methods +############################################################################################ + +function update!(targets::EvaluationTargets, implied::RAM, model::AbstractSemSingle, param_labels) + materialize!(implied.A, implied.ram_matrices.A, param_labels) + materialize!(implied.S, implied.ram_matrices.S, param_labels) + if !isnothing(implied.M) + materialize!(implied.M, implied.ram_matrices.M, param_labels) + end + + parent(implied.I_A) .= .-implied.A + @view(implied.I_A[diagind(implied.I_A)]) .+= 1 + + if is_gradient_required(targets) || is_hessian_required(targets) + implied.I_A⁻¹ = LinearAlgebra.inv!(factorize(implied.I_A)) + mul!(implied.F⨉I_A⁻¹, implied.F, implied.I_A⁻¹) + else + copyto!(implied.F⨉I_A⁻¹, implied.F) + rdiv!(implied.F⨉I_A⁻¹, factorize(implied.I_A)) + end + + mul!(implied.F⨉I_A⁻¹S, implied.F⨉I_A⁻¹, implied.S) + mul!(parent(implied.Σ), implied.F⨉I_A⁻¹S, implied.F⨉I_A⁻¹') + + if MeanStruct(implied) === HasMeanStruct + mul!(implied.μ, implied.F⨉I_A⁻¹, implied.M) + end +end + +############################################################################################ +### Recommended methods +############################################################################################ + +function update_observed(implied::RAM, observed::SemObserved; kwargs...) + if nobserved_vars(observed) == size(implied.Σ, 1) + return implied + else + return RAM(; observed = observed, kwargs...) + end +end diff --git a/src/imply/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl similarity index 61% rename from src/imply/RAM/symbolic.jl rename to src/implied/RAM/symbolic.jl index 5c5e52112..eff193c17 100644 --- a/src/imply/RAM/symbolic.jl +++ b/src/implied/RAM/symbolic.jl @@ -2,7 +2,7 @@ ### Types ############################################################################################ @doc raw""" -Subtype of `SemImply` that implements the RAM notation with symbolic precomputation. +Subtype of `SemImplied` that implements the RAM notation with symbolic precomputation. # Constructor @@ -26,11 +26,11 @@ Subtype of `SemImply` that implements the RAM notation with symbolic precomputat # Extended help ## Implementation -Subtype of `SemImply`. +Subtype of `SemImplied`. ## Interfaces -- `identifier(::RAMSymbolic) `-> Dict containing the parameter labels and their position -- `n_par(::RAMSymbolic)` -> Number of parameters +- `param_labels(::RAMSymbolic) `-> vector of parameter ids +- `nparams(::RAMSymbolic)` -> number of parameters - `Σ(::RAMSymbolic)` -> model implied covariance matrix - `μ(::RAMSymbolic)` -> model implied mean vector @@ -62,8 +62,10 @@ and for models with a meanstructure, the model implied means are computed as \mu = F(I-A)^{-1}M ``` """ -struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V, V2, F4, A4, F5, A5, D1, B} <: - SemImplySymbolic +struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <: + SemImpliedSymbolic + meanstruct::MS + hessianeval::ExactHessian Σ_function::F1 ∇Σ_function::F2 ∇²Σ_function::F3 @@ -73,14 +75,14 @@ struct RAMSymbolic{F1, F2, F3, A1, A2, A3, S1, S2, S3, V, V2, F4, A4, F5, A5, D1 Σ_symbolic::S1 ∇Σ_symbolic::S2 ∇²Σ_symbolic::S3 - n_par::V ram_matrices::V2 μ_function::F4 μ::A4 ∇μ_function::F5 ∇μ::A5 - identifier::D1 - has_meanstructure::B + + RAMSymbolic{MS}(args...) where {MS <: MeanStruct} = + new{MS, map(typeof, args)...}(MS(), ExactHessian(), args...) end ############################################################################################ @@ -88,42 +90,34 @@ end ############################################################################################ function RAMSymbolic(; - specification, + specification::SemSpecification, loss_types = nothing, vech = false, + simplify_symbolics = false, gradient = true, hessian = false, meanstructure = false, approximate_hessian = false, kwargs..., ) - ram_matrices = RAMMatrices(specification) - identifier = StructuralEquationModels.identifier(ram_matrices) - - n_par = length(ram_matrices.parameters) - n_var, n_nod = ram_matrices.size_F + ram_matrices = convert(RAMMatrices, specification) + n_par = nparams(ram_matrices) par = (Symbolics.@variables θ[1:n_par])[1] - A = zeros(Num, n_nod, n_nod) - S = zeros(Num, n_nod, n_nod) - !isnothing(ram_matrices.M_ind) ? M = zeros(Num, n_nod) : M = nothing - F = zeros(ram_matrices.size_F) - F[CartesianIndex.(1:n_var, ram_matrices.F_ind)] .= 1.0 - - set_RAMConstants!(A, S, M, ram_matrices.constants) - fill_A_S_M!(A, S, M, ram_matrices.A_ind, ram_matrices.S_ind, ram_matrices.M_ind, par) + A = sparse_materialize(Num, ram_matrices.A, par) + S = sparse_materialize(Num, ram_matrices.S, par) + M = !isnothing(ram_matrices.M) ? materialize(Num, ram_matrices.M, par) : nothing + F = ram_matrices.F - A, S, F = sparse(A), sparse(S), sparse(F) - - if !isnothing(loss_types) - any(loss_types .<: SemWLS) ? vech = true : nothing + if !isnothing(loss_types) && any(T -> T <: SemWLS, loss_types) + vech = true end I_A⁻¹ = neumann_series(A) # Σ - Σ_symbolic = eval_Σ_symbolic(S, I_A⁻¹, F; vech = vech) + Σ_symbolic = eval_Σ_symbolic(S, I_A⁻¹, F; vech = vech, simplify = simplify_symbolics) #print(Symbolics.build_function(Σ_symbolic)[2]) Σ_function = Symbolics.build_function(Σ_symbolic, par, expression = Val{false})[2] Σ = zeros(size(Σ_symbolic)) @@ -142,9 +136,8 @@ function RAMSymbolic(; ∇Σ = nothing end - if hessian & !approximate_hessian + if hessian && !approximate_hessian n_sig = length(Σ_symbolic) - n_par = size(par, 1) ∇²Σ_symbolic_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_symbolic)] @variables J[1:n_sig] @@ -164,8 +157,8 @@ function RAMSymbolic(; # μ if meanstructure - has_meanstructure = Val(true) - μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F) + MS = HasMeanStruct + μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F; simplify = simplify_symbolics) μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2] μ = zeros(size(μ_symbolic)) if gradient @@ -178,14 +171,14 @@ function RAMSymbolic(; ∇μ = nothing end else - has_meanstructure = Val(false) + MS = NoMeanStruct μ_function = nothing μ = nothing ∇μ_function = nothing ∇μ = nothing end - return RAMSymbolic( + return RAMSymbolic{MS}( Σ_function, ∇Σ_function, ∇²Σ_function, @@ -195,14 +188,11 @@ function RAMSymbolic(; Σ_symbolic, ∇Σ_symbolic, ∇²Σ_symbolic, - n_par, ram_matrices, μ_function, μ, ∇μ_function, ∇μ, - identifier, - has_meanstructure, ) end @@ -210,88 +200,62 @@ end ### objective, gradient, hessian ############################################################################################ -# dispatch on meanstructure -objective!(imply::RAMSymbolic, par, model) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAMSymbolic, par, model) = - gradient!(imply, par, model, imply.has_meanstructure) - -# objective -function objective!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} - imply.Σ_function(imply.Σ, par) - T && imply.μ_function(imply.μ, par) -end +function update!( + targets::EvaluationTargets, + implied::RAMSymbolic, + model::AbstractSemSingle, + par, +) + implied.Σ_function(implied.Σ, par) + if MeanStruct(implied) === HasMeanStruct + implied.μ_function(implied.μ, par) + end -# gradient -function gradient!(imply::RAMSymbolic, par, model, has_meanstructure::Val{T}) where {T} - objective!(imply, par, model, imply.has_meanstructure) - imply.∇Σ_function(imply.∇Σ, par) - T && imply.∇μ_function(imply.∇μ, par) + if is_gradient_required(targets) || is_hessian_required(targets) + implied.∇Σ_function(implied.∇Σ, par) + if MeanStruct(implied) === HasMeanStruct + implied.∇μ_function(implied.∇μ, par) + end + end end -# other methods -hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) -objective_gradient_hessian!(imply::RAMSymbolic, par, model) = gradient!(imply, par, model) - ############################################################################################ ### Recommended methods ############################################################################################ -identifier(imply::RAMSymbolic) = imply.identifier -n_par(imply::RAMSymbolic) = imply.n_par - -function update_observed(imply::RAMSymbolic, observed::SemObserved; kwargs...) - if n_man(observed) == size(imply.Σ, 1) - return imply +function update_observed(implied::RAMSymbolic, observed::SemObserved; kwargs...) + if nobserved_vars(observed) == size(implied.Σ, 1) + return implied else return RAMSymbolic(; observed = observed, kwargs...) end end -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAMSymbolic) = imply.Σ -∇Σ(imply::RAMSymbolic) = imply.∇Σ -∇²Σ(imply::RAMSymbolic) = imply.∇²Σ - -μ(imply::RAMSymbolic) = imply.μ -∇μ(imply::RAMSymbolic) = imply.∇μ - -Σ_function(imply::RAMSymbolic) = imply.Σ_function -∇Σ_function(imply::RAMSymbolic) = imply.∇Σ_function -∇²Σ_function(imply::RAMSymbolic) = imply.∇²Σ_function - -has_meanstructure(imply::RAMSymbolic) = imply.has_meanstructure - -ram_matrices(imply::RAMSymbolic) = imply.ram_matrices - ############################################################################################ ### additional functions ############################################################################################ # expected covariations of observed vars -function eval_Σ_symbolic(S, I_A⁻¹, F; vech = false) +function eval_Σ_symbolic(S, I_A⁻¹, F; vech = false, simplify = false) Σ = F * I_A⁻¹ * S * permutedims(I_A⁻¹) * permutedims(F) Σ = Array(Σ) vech && (Σ = Σ[tril(trues(size(F, 1), size(F, 1)))]) - # Σ = Symbolics.simplify.(Σ) - Threads.@threads for i in eachindex(Σ) - Σ[i] = Symbolics.simplify(Σ[i]) + if simplify + Threads.@threads for i in eachindex(Σ) + Σ[i] = Symbolics.simplify(Σ[i]) + end end return Σ end # expected means of observed vars -function eval_μ_symbolic(M, I_A⁻¹, F) +function eval_μ_symbolic(M, I_A⁻¹, F; simplify = false) μ = F * I_A⁻¹ * M μ = Array(μ) - Threads.@threads for i in eachindex(μ) - μ[i] = Symbolics.simplify(μ[i]) + if simplify + Threads.@threads for i in eachindex(μ) + μ[i] = Symbolics.simplify(μ[i]) + end end return μ end diff --git a/src/implied/abstract.jl b/src/implied/abstract.jl new file mode 100644 index 000000000..af51440c6 --- /dev/null +++ b/src/implied/abstract.jl @@ -0,0 +1,34 @@ + +# vars and params API methods for SemImplied +vars(implied::SemImplied) = vars(implied.ram_matrices) +observed_vars(implied::SemImplied) = observed_vars(implied.ram_matrices) +latent_vars(implied::SemImplied) = latent_vars(implied.ram_matrices) + +nvars(implied::SemImplied) = nvars(implied.ram_matrices) +nobserved_vars(implied::SemImplied) = nobserved_vars(implied.ram_matrices) +nlatent_vars(implied::SemImplied) = nlatent_vars(implied.ram_matrices) + +param_labels(implied::SemImplied) = param_labels(implied.ram_matrices) +nparams(implied::SemImplied) = nparams(implied.ram_matrices) + +# checks if the A matrix is acyclic +# wraps A in LowerTriangular/UpperTriangular if it is triangular +function check_acyclic(A::AbstractMatrix; verbose::Bool = false) + # check if A is lower or upper triangular + if istril(A) + verbose && @info "A matrix is lower triangular" + return LowerTriangular(A) + elseif istriu(A) + verbose && @info "A matrix is upper triangular" + return UpperTriangular(A) + else + # check if non-triangular matrix is acyclic + acyclic = isone(det(I - A)) + if acyclic + verbose && + @info "The matrix is acyclic. Reordering variables in the model to make the A matrix either Upper or Lower Triangular can significantly improve performance.\n" maxlog = + 1 + end + return A + end +end diff --git a/src/imply/empty.jl b/src/implied/empty.jl similarity index 54% rename from src/imply/empty.jl rename to src/implied/empty.jl index ba8580d16..3b0292e73 100644 --- a/src/imply/empty.jl +++ b/src/implied/empty.jl @@ -2,59 +2,56 @@ ### Types ############################################################################################ """ -Empty placeholder for models that don't need an imply part. +Empty placeholder for models that don't need an implied part. (For example, models that only regularize parameters.) # Constructor - ImplyEmpty(;specification, kwargs...) + ImpliedEmpty(;specification, kwargs...) # Arguments - `specification`: either a `RAMMatrices` or `ParameterTable` object # Examples A multigroup model with ridge regularization could be specified as a `SemEnsemble` with one -model per group and an additional model with `ImplyEmpty` and `SemRidge` for the regularization part. +model per group and an additional model with `ImpliedEmpty` and `SemRidge` for the regularization part. # Extended help ## Interfaces -- `identifier(::RAMSymbolic) `-> Dict containing the parameter labels and their position -- `n_par(::RAMSymbolic)` -> Number of parameters +- `param_labels(::RAMSymbolic) `-> Vector of parameter labels +- `nparams(::RAMSymbolic)` -> Number of parameters ## Implementation -Subtype of `SemImply`. +Subtype of `SemImplied`. """ -struct ImplyEmpty{V, V2} <: SemImply - identifier::V2 - n_par::V +struct ImpliedEmpty{A, B, C} <: SemImplied + hessianeval::A + meanstruct::B + ram_matrices::C end ############################################################################################ ### Constructors ############################################################################################ -function ImplyEmpty(; specification, kwargs...) - ram_matrices = RAMMatrices(specification) - - n_par = length(ram_matrices.parameters) - - return ImplyEmpty(identifier(ram_matrices), n_par) +function ImpliedEmpty(; + specification, + meanstruct = NoMeanStruct(), + hessianeval = ExactHessian(), + kwargs..., +) + return ImpliedEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification)) end ############################################################################################ ### methods ############################################################################################ -objective!(imply::ImplyEmpty, par, model) = nothing -gradient!(imply::ImplyEmpty, par, model) = nothing -hessian!(imply::ImplyEmpty, par, model) = nothing +update!(targets::EvaluationTargets, implied::ImpliedEmpty, par, model) = nothing ############################################################################################ ### Recommended methods ############################################################################################ -identifier(imply::ImplyEmpty) = imply.identifier -n_par(imply::ImplyEmpty) = imply.n_par - -update_observed(imply::ImplyEmpty, observed::SemObserved; kwargs...) = imply +update_observed(implied::ImpliedEmpty, observed::SemObserved; kwargs...) = implied diff --git a/src/imply/RAM/generic.jl b/src/imply/RAM/generic.jl deleted file mode 100644 index 00c0d0ef9..000000000 --- a/src/imply/RAM/generic.jl +++ /dev/null @@ -1,358 +0,0 @@ -############################################################################################ -### Types -############################################################################################ -@doc raw""" -Model implied covariance and means via RAM notation. - -# Constructor - - RAM(; - specification, - meanstructure = false, - gradient = true, - kwargs...) - -# Arguments -- `specification`: either a `RAMMatrices` or `ParameterTable` object -- `meanstructure::Bool`: does the model have a meanstructure? -- `gradient::Bool`: is gradient-based optimization used - -# Extended help - -## Implementation -Subtype of `SemImply`. - -## RAM notation - -The model implied covariance matrix is computed as -```math - \Sigma = F(I-A)^{-1}S(I-A)^{-T}F^T -``` -and for models with a meanstructure, the model implied means are computed as -```math - \mu = F(I-A)^{-1}M -``` - -## Interfaces -- `identifier(::RAM) `-> Dict containing the parameter labels and their position -- `n_par(::RAM)` -> Number of parameters - -- `Σ(::RAM)` -> model implied covariance matrix -- `μ(::RAM)` -> model implied mean vector - -RAM matrices for the current parameter values: -- `A(::RAM)` -- `S(::RAM)` -- `F(::RAM)` -- `M(::RAM)` - -Jacobians of RAM matrices w.r.t to the parameter vector `θ` -- `∇A(::RAM)` -> ``∂vec(A)/∂θᵀ`` -- `∇S(::RAM)` -> ``∂vec(S)/∂θᵀ`` -- `∇M(::RAM)` = ``∂M/∂θᵀ`` - -Vector of indices of each parameter in the respective RAM matrix: -- `A_indices(::RAM)` -- `S_indices(::RAM)` -- `M_indices(::RAM)` - -Additional interfaces -- `F⨉I_A⁻¹(::RAM)` -> ``F(I-A)^{-1}`` -- `F⨉I_A⁻¹S(::RAM)` -> ``F(I-A)^{-1}S`` -- `I_A(::RAM)` -> ``I-A`` -- `has_meanstructure(::RAM)` -> `Val{Bool}` does the model have a meanstructure? - -Only available in gradient! calls: -- `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}`` -""" -mutable struct RAM{ - A1, - A2, - A3, - A4, - A5, - A6, - V, - V2, - I1, - I2, - I3, - M1, - M2, - M3, - M4, - S1, - S2, - S3, - B, - D, -} <: SemImply - Σ::A1 - A::A2 - S::A3 - F::A4 - μ::A5 - M::A6 - - n_par::V - ram_matrices::V2 - has_meanstructure::B - - A_indices::I1 - S_indices::I2 - M_indices::I3 - - F⨉I_A⁻¹::M1 - F⨉I_A⁻¹S::M2 - I_A::M3 - I_A⁻¹::M4 - - ∇A::S1 - ∇S::S2 - ∇M::S3 - - identifier::D -end - -using StructuralEquationModels - -############################################################################################ -### Constructors -############################################################################################ - -function RAM(; - specification, - #vech = false, - gradient = true, - meanstructure = false, - kwargs..., -) - ram_matrices = RAMMatrices(specification) - identifier = StructuralEquationModels.identifier(ram_matrices) - - # get dimensions of the model - n_par = length(ram_matrices.parameters) - n_var, n_nod = ram_matrices.size_F - parameters = ram_matrices.parameters - F = zeros(ram_matrices.size_F) - F[CartesianIndex.(1:n_var, ram_matrices.F_ind)] .= 1.0 - - # get indices - A_indices = copy(ram_matrices.A_ind) - S_indices = copy(ram_matrices.S_ind) - M_indices = !isnothing(ram_matrices.M_ind) ? copy(ram_matrices.M_ind) : nothing - - #preallocate arrays - A_pre = zeros(n_nod, n_nod) - S_pre = zeros(n_nod, n_nod) - !isnothing(M_indices) ? M_pre = zeros(n_nod) : M_pre = nothing - - set_RAMConstants!(A_pre, S_pre, M_pre, ram_matrices.constants) - - A_pre = check_acyclic(A_pre, n_par, A_indices) - - # pre-allocate some matrices - Σ = zeros(n_var, n_var) - F⨉I_A⁻¹ = zeros(n_var, n_nod) - F⨉I_A⁻¹S = zeros(n_var, n_nod) - I_A = similar(A_pre) - - if gradient - ∇A = matrix_gradient(A_indices, n_nod^2) - ∇S = matrix_gradient(S_indices, n_nod^2) - else - ∇A = nothing - ∇S = nothing - end - - # μ - if meanstructure - has_meanstructure = Val(true) - ∇M = gradient ? matrix_gradient(M_indices, n_nod) : nothing - μ = zeros(n_var) - else - has_meanstructure = Val(false) - M_indices = nothing - M_pre = nothing - μ = nothing - ∇M = nothing - end - - return RAM( - Σ, - A_pre, - S_pre, - F, - μ, - M_pre, - n_par, - ram_matrices, - has_meanstructure, - A_indices, - S_indices, - M_indices, - F⨉I_A⁻¹, - F⨉I_A⁻¹S, - I_A, - copy(I_A), - ∇A, - ∇S, - ∇M, - identifier, - ) -end - -############################################################################################ -### methods -############################################################################################ - -# dispatch on meanstructure -objective!(imply::RAM, par, model::AbstractSemSingle) = - objective!(imply, par, model, imply.has_meanstructure) -gradient!(imply::RAM, par, model::AbstractSemSingle) = - gradient!(imply, par, model, imply.has_meanstructure) - -# objective and gradient -function objective!(imply::RAM, parameters, model, has_meanstructure::Val{T}) where {T} - fill_A_S_M!( - imply.A, - imply.S, - imply.M, - imply.A_indices, - imply.S_indices, - imply.M_indices, - parameters, - ) - - @. imply.I_A = -imply.A - @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - - copyto!(imply.F⨉I_A⁻¹, imply.F) - rdiv!(imply.F⨉I_A⁻¹, factorize(imply.I_A)) - - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - - if T - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) - end -end - -function gradient!( - imply::RAM, - parameters, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - fill_A_S_M!( - imply.A, - imply.S, - imply.M, - imply.A_indices, - imply.S_indices, - imply.M_indices, - parameters, - ) - - @. imply.I_A = -imply.A - @view(imply.I_A[diagind(imply.I_A)]) .+= 1 - - imply.I_A⁻¹ = LinearAlgebra.inv!(factorize(imply.I_A)) - mul!(imply.F⨉I_A⁻¹, imply.F, imply.I_A⁻¹) - - Σ_RAM!(imply.Σ, imply.F⨉I_A⁻¹, imply.S, imply.F⨉I_A⁻¹S) - - if T - μ_RAM!(imply.μ, imply.F⨉I_A⁻¹, imply.M) - end -end - -hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) -objective_gradient_hessian!(imply::RAM, par, model::AbstractSemSingle, has_meanstructure) = - gradient!(imply, par, model, has_meanstructure) - -############################################################################################ -### Recommended methods -############################################################################################ - -identifier(imply::RAM) = imply.identifier -n_par(imply::RAM) = imply.n_par - -function update_observed(imply::RAM, observed::SemObserved; kwargs...) - if n_man(observed) == size(imply.Σ, 1) - return imply - else - return RAM(; observed = observed, kwargs...) - end -end - -############################################################################################ -### additional methods -############################################################################################ - -Σ(imply::RAM) = imply.Σ -μ(imply::RAM) = imply.μ - -A(imply::RAM) = imply.A -S(imply::RAM) = imply.S -F(imply::RAM) = imply.F -M(imply::RAM) = imply.M - -∇A(imply::RAM) = imply.∇A -∇S(imply::RAM) = imply.∇S -∇M(imply::RAM) = imply.∇M - -A_indices(imply::RAM) = imply.A_indices -S_indices(imply::RAM) = imply.S_indices -M_indices(imply::RAM) = imply.M_indices - -F⨉I_A⁻¹(imply::RAM) = imply.F⨉I_A⁻¹ -F⨉I_A⁻¹S(imply::RAM) = imply.F⨉I_A⁻¹S -I_A(imply::RAM) = imply.I_A -I_A⁻¹(imply::RAM) = imply.I_A⁻¹ # only for gradient available! - -has_meanstructure(imply::RAM) = imply.has_meanstructure - -ram_matrices(imply::RAM) = imply.ram_matrices - -############################################################################################ -### additional functions -############################################################################################ - -function Σ_RAM!(Σ, F⨉I_A⁻¹, S, pre2) - mul!(pre2, F⨉I_A⁻¹, S) - mul!(Σ, pre2, F⨉I_A⁻¹') -end - -function μ_RAM!(μ, F⨉I_A⁻¹, M) - mul!(μ, F⨉I_A⁻¹, M) -end - -function check_acyclic(A_pre, n_par, A_indices) - # fill copy of A-matrix with random parameters - A_rand = copy(A_pre) - randpar = rand(n_par) - - fill_matrix!(A_rand, A_indices, randpar) - - # check if the model is acyclic - acyclic = isone(det(I - A_rand)) - - # check if A is lower or upper triangular - if istril(A_rand) - A_pre = LowerTriangular(A_pre) - elseif istriu(A_rand) - A_pre = UpperTriangular(A_pre) - elseif acyclic - @info "Your model is acyclic, specifying the A Matrix as either Upper or Lower Triangular can have great performance benefits.\n" maxlog = - 1 - end - - return A_pre -end diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 7a27e7615..ca23ded97 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -24,7 +24,8 @@ Analytic gradients are available. ## Implementation Subtype of `SemLossFunction`. """ -mutable struct SemFIML{INV, C, L, O, M, IM, I, T, U, W} <: SemLossFunction +mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction + hessianeval::ExactHessian inverses::INV #preallocated inverses of imp_cov choleskys::C #preallocated choleskys logdets::L #logdets of implied covmats @@ -37,7 +38,7 @@ mutable struct SemFIML{INV, C, L, O, M, IM, I, T, U, W} <: SemLossFunction mult::T - commutation_indices::U + commutator::CommutationMatrix interaction::W end @@ -46,27 +47,30 @@ end ### Constructors ############################################################################################ -function SemFIML(; observed, specification, kwargs...) - inverses = broadcast(x -> zeros(x, x), Int64.(pattern_nvar_obs(observed))) +function SemFIML(; observed::SemObservedMissing, specification, kwargs...) + inverses = + [zeros(nmeasured_vars(pat), nmeasured_vars(pat)) for pat in observed.patterns] choleskys = Array{Cholesky{Float64, Array{Float64, 2}}, 1}(undef, length(inverses)) - n_patterns = size(rows(observed), 1) + n_patterns = length(observed.patterns) logdets = zeros(n_patterns) - imp_mean = zeros.(Int64.(pattern_nvar_obs(observed))) - meandiff = zeros.(Int64.(pattern_nvar_obs(observed))) + imp_mean = [zeros(nmeasured_vars(pat)) for pat in observed.patterns] + meandiff = [zeros(nmeasured_vars(pat)) for pat in observed.patterns] - nman = Int64(n_man(observed)) - imp_inv = zeros(nman, nman) + nobs_vars = nobserved_vars(observed) + imp_inv = zeros(nobs_vars, nobs_vars) mult = similar.(inverses) - ∇ind = vec(CartesianIndices(Array{Float64}(undef, nman, nman))) - ∇ind = - [findall(x -> !(x[1] ∈ ind || x[2] ∈ ind), ∇ind) for ind in patterns_not(observed)] - - commutation_indices = get_commutation_lookup(get_n_nodes(specification)^2) + # generate linear indicies of co-observed variable pairs for each pattern + Σ_linind = LinearIndices((nobs_vars, nobs_vars)) + ∇ind = map(observed.patterns) do pat + pat_vars = findall(pat.measured_mask) + vec(Σ_linind[pat_vars, pat_vars]) + end return SemFIML( + ExactHessian(), inverses, choleskys, logdets, @@ -75,7 +79,7 @@ function SemFIML(; observed, specification, kwargs...) meandiff, imp_inv, mult, - commutation_indices, + CommutationMatrix(nvars(specification)), nothing, ) end @@ -84,40 +88,32 @@ end ### methods ############################################################################################ -function objective!(semfiml::SemFIML, parameters, model) - if !check_fiml(semfiml, model) - return non_posdef_return(parameters) - end - - prepare_SemFIML!(semfiml, model) - - objective = F_FIML(rows(observed(model)), semfiml, model, parameters) - return objective / n_obs(observed(model)) -end - -function gradient!(semfiml::SemFIML, parameters, model) - if !check_fiml(semfiml, model) - return ones(eltype(parameters), size(parameters)) - end - - prepare_SemFIML!(semfiml, model) - - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / n_obs(observed(model)) - return gradient -end +function evaluate!( + objective, + gradient, + hessian, + semfiml::SemFIML, + implied::SemImplied, + model::AbstractSemSingle, + param_labels, +) + isnothing(hessian) || error("Hessian not implemented for FIML") -function objective_gradient!(semfiml::SemFIML, parameters, model) if !check_fiml(semfiml, model) - return non_posdef_return(parameters), ones(eltype(parameters), size(parameters)) + isnothing(objective) || (objective = non_posdef_return(param_labels)) + isnothing(gradient) || fill!(gradient, 1) + return objective end prepare_SemFIML!(semfiml, model) - objective = - F_FIML(rows(observed(model)), semfiml, model, parameters) / n_obs(observed(model)) - gradient = ∇F_FIML(rows(observed(model)), semfiml, model) / n_obs(observed(model)) + scale = inv(nsamples(observed(model))) + isnothing(objective) || + (objective = scale * F_FIML(observed(model), semfiml, model, param_labels)) + isnothing(gradient) || + (∇F_FIML!(gradient, observed(model), semfiml, model); gradient .*= scale) - return objective, gradient + return objective end ############################################################################################ @@ -132,83 +128,79 @@ update_observed(lossfun::SemFIML, observed::SemObserved; kwargs...) = ############################################################################################ function F_one_pattern(meandiff, inverse, obs_cov, logdet, N) - F = logdet - F += meandiff' * inverse * meandiff + F = logdet + dot(meandiff, inverse, meandiff) if N > one(N) F += dot(obs_cov, inverse) end - F = N * F - return F + return F * N end -function ∇F_one_pattern(μ_diff, Σ⁻¹, S, pattern, ∇ind, N, Jμ, JΣ, model) +function ∇F_one_pattern(μ_diff, Σ⁻¹, S, obs_mask, ∇ind, N, Jμ, JΣ, model) diff⨉inv = μ_diff' * Σ⁻¹ if N > one(N) JΣ[∇ind] .+= N * vec(Σ⁻¹ * (I - S * Σ⁻¹ - μ_diff * diff⨉inv)) - @. Jμ[pattern] += (N * 2 * diff⨉inv)' + @. Jμ[obs_mask] += (N * 2 * diff⨉inv)' else JΣ[∇ind] .+= vec(Σ⁻¹ * (I - μ_diff * diff⨉inv)) - @. Jμ[pattern] += (2 * diff⨉inv)' + @. Jμ[obs_mask] += (2 * diff⨉inv)' end end -function ∇F_fiml_outer(JΣ, Jμ, imply::SemImplySymbolic, model, semfiml) - G = transpose(JΣ' * ∇Σ(imply) - Jμ' * ∇μ(imply)) - return G +function ∇F_fiml_outer!(G, JΣ, Jμ, implied::SemImpliedSymbolic, model, semfiml) + mul!(G, implied.∇Σ', JΣ) # should be transposed + mul!(G, implied.∇μ', Jμ, -1, 1) end -function ∇F_fiml_outer(JΣ, Jμ, imply, model, semfiml) - Iₙ = sparse(1.0I, size(A(imply))...) - P = kron(F⨉I_A⁻¹(imply), F⨉I_A⁻¹(imply)) - Q = kron(S(imply) * I_A⁻¹(imply)', Iₙ) - #commutation_matrix_pre_square_add!(Q, Q) - Q2 = commutation_matrix_pre_square(Q, semfiml.commutation_indices) +function ∇F_fiml_outer!(G, JΣ, Jμ, implied, model, semfiml) + Iₙ = sparse(1.0I, size(implied.A)...) + P = kron(implied.F⨉I_A⁻¹, implied.F⨉I_A⁻¹) + Q = kron(implied.S * implied.I_A⁻¹', Iₙ) + Q .+= semfiml.commutator * Q - ∇Σ = P * (∇S(imply) + (Q + Q2) * ∇A(imply)) + ∇Σ = P * (implied.∇S + Q * implied.∇A) ∇μ = - F⨉I_A⁻¹(imply) * ∇M(imply) + - kron((I_A⁻¹(imply) * M(imply))', F⨉I_A⁻¹(imply)) * ∇A(imply) - - G = transpose(JΣ' * ∇Σ - Jμ' * ∇μ) + implied.F⨉I_A⁻¹ * implied.∇M + + kron((implied.I_A⁻¹ * implied.M)', implied.F⨉I_A⁻¹) * implied.∇A - return G + mul!(G, ∇Σ', JΣ) # actually transposed + mul!(G, ∇μ', Jμ, -1, 1) end -function F_FIML(rows, semfiml, model, parameters) - F = zero(eltype(parameters)) - for i in 1:size(rows, 1) +function F_FIML(observed::SemObservedMissing, semfiml, model, param_labels) + F = zero(eltype(param_labels)) + for (i, pat) in enumerate(observed.patterns) F += F_one_pattern( semfiml.meandiff[i], semfiml.inverses[i], - obs_cov(observed(model))[i], + pat.measured_cov, semfiml.logdets[i], - pattern_n_obs(observed(model))[i], + nsamples(pat), ) end return F end -function ∇F_FIML(rows, semfiml, model) - Jμ = zeros(Int64(n_man(model))) - JΣ = zeros(Int64(n_man(model)^2)) +function ∇F_FIML!(G, observed::SemObservedMissing, semfiml, model) + Jμ = zeros(nobserved_vars(model)) + JΣ = zeros(nobserved_vars(model)^2) - for i in 1:size(rows, 1) + for (i, pat) in enumerate(observed.patterns) ∇F_one_pattern( semfiml.meandiff[i], semfiml.inverses[i], - obs_cov(observed(model))[i], - patterns(observed(model))[i], + pat.measured_cov, + pat.measured_mask, semfiml.∇ind[i], - pattern_n_obs(observed(model))[i], + nsamples(pat), Jμ, JΣ, model, ) end - return ∇F_fiml_outer(JΣ, Jμ, imply(model), model, semfiml) + return ∇F_fiml_outer!(G, JΣ, Jμ, implied(model), model, semfiml) end function prepare_SemFIML!(semfiml, model) @@ -216,29 +208,21 @@ function prepare_SemFIML!(semfiml, model) batch_cholesky!(semfiml, model) #batch_sym_inv_update!(semfiml, model) batch_inv!(semfiml, model) - for i in 1:size(pattern_n_obs(observed(model)), 1) - semfiml.meandiff[i] .= obs_mean(observed(model))[i] - semfiml.imp_mean[i] + for (i, pat) in enumerate(observed(model).patterns) + semfiml.meandiff[i] .= pat.measured_mean .- semfiml.imp_mean[i] end end -function copy_per_pattern!(inverses, source_inverses, means, source_means, patterns) - @views for i in 1:size(patterns, 1) - inverses[i] .= source_inverses[patterns[i], patterns[i]] - end - - @views for i in 1:size(patterns, 1) - means[i] .= source_means[patterns[i]] +function copy_per_pattern!(fiml::SemFIML, model::AbstractSem) + Σ = implied(model).Σ + μ = implied(model).μ + data = observed(model) + @inbounds @views for (i, pat) in enumerate(data.patterns) + fiml.inverses[i] .= Σ[pat.measured_mask, pat.measured_mask] + fiml.imp_mean[i] .= μ[pat.measured_mask] end end -copy_per_pattern!(semfiml, model::M where {M <: AbstractSem}) = copy_per_pattern!( - semfiml.inverses, - Σ(imply(model)), - semfiml.imp_mean, - μ(imply(model)), - patterns(observed(model)), -) - function batch_cholesky!(semfiml, model) for i in 1:size(semfiml.inverses, 1) semfiml.choleskys[i] = cholesky!(Symmetric(semfiml.inverses[i])) @@ -248,12 +232,7 @@ function batch_cholesky!(semfiml, model) end function check_fiml(semfiml, model) - copyto!(semfiml.imp_inv, Σ(imply(model))) + copyto!(semfiml.imp_inv, implied(model).Σ) a = cholesky!(Symmetric(semfiml.imp_inv); check = false) return isposdef(a) end - -get_n_nodes(specification::RAMMatrices) = specification.size_F[2] -get_n_nodes(specification::ParameterTable) = - length(specification.variables[:observed_vars]) + - length(specification.variables[:latent_vars]) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 7811cda7f..d14af648c 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,26 +27,29 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemML{INV, M, M2, B, V} <: SemLossFunction +struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction + hessianeval::HE Σ⁻¹::INV Σ⁻¹Σₒ::M meandiff::M2 - approximate_hessian::B - has_meanstructure::V + + SemML{HE}(args...) where {HE <: HessianEval} = + new{HE, map(typeof, args)...}(HE(), args...) end ############################################################################################ ### Constructors ############################################################################################ -function SemML(; observed, meanstructure = false, approximate_hessian = false, kwargs...) - isnothing(obs_mean(observed)) ? meandiff = nothing : meandiff = copy(obs_mean(observed)) - return SemML( - similar(obs_cov(observed)), - similar(obs_cov(observed)), +function SemML(; observed::SemObserved, approximate_hessian::Bool = false, kwargs...) + obsmean = obs_mean(observed) + obscov = obs_cov(observed) + meandiff = isnothing(obsmean) ? nothing : copy(obsmean) + + return SemML{approximate_hessian ? ApproxHessian : ExactHessian}( + similar(obscov), + similar(obscov), meandiff, - approximate_hessian, - Val(meanstructure), ) end @@ -54,511 +57,151 @@ end ### objective, gradient, hessian methods ############################################################################################ -# first, dispatch for meanstructure -objective!(semml::SemML, par, model::AbstractSemSingle) = - objective!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -gradient!(semml::SemML, par, model::AbstractSemSingle) = - gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -hessian!(semml::SemML, par, model::AbstractSemSingle) = - hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_gradient!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - gradient_hessian!(semml::SemML, par, model, semml.has_meanstructure, imply(model)) -objective_gradient_hessian!(semml::SemML, par, model::AbstractSemSingle) = - objective_gradient_hessian!( - semml::SemML, - par, - model, - semml.has_meanstructure, - imply(model), - ) - ############################################################################################ -### Symbolic Imply Types +### Symbolic Implied Types -function objective!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::SemImpliedSymbolic, model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - #mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + par, +) + if !isnothing(hessian) + (MeanStruct(implied) === HasMeanStruct) && + throw(DomainError(H, "hessian of ML + meanstructure is not available")) + end - if T - μ₋ = μₒ - μ - return ld + dot(Σ⁻¹, Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + dot(Σ⁻¹, Σₒ) - end + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "∑⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective end -end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ)) -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - μ = μ(imply(model)), - ∇μ = ∇μ(imply(model)), + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if T - μ₋ = μₒ - μ + isnothing(objective) || (objective += dot(μ₋, Σ⁻¹, μ₋)) + if !isnothing(gradient) + ∇Σ = implied.∇Σ + ∇μ = implied.∇μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return gradient' - else - gradient = vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹)' * ∇Σ - return gradient' + mul!(gradient, ∇Σ', vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)) + mul!(gradient, ∇μ', μ₋ᵀΣ⁻¹', -2, 1) end - end -end - -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - ∇Σ = ∇Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return hessian - end -end - -function hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function objective_gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), ones(eltype(par), size(par)) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - if T - μ₋ = μₒ - μ - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - - objective = ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - gradient = vec(Σ⁻¹ * (I - Σₒ * Σ⁻¹ - μ₋ * μ₋ᵀΣ⁻¹))' * ∇Σ - 2 * μ₋ᵀΣ⁻¹ * ∇μ - return objective, gradient' - else - objective = ld + tr(Σ⁻¹Σₒ) - gradient = (vec(Σ⁻¹) - vec(Σ⁻¹Σₒ * Σ⁻¹))' * ∇Σ - return objective, gradient' - end + elseif !isnothing(gradient) || !isnothing(hessian) + ∇Σ = implied.∇Σ + Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' + if !isnothing(gradient) + mul!(gradient, ∇Σ', J') end - end -end - -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::SemImplySymbolic, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - return non_posdef_return(par), diagm(fill(one(eltype(par)), length(par))) - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ + if !isnothing(hessian) + if HessianEval(semml) === ApproxHessian + mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0) else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ # inner - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' ∇²Σ_function!(∇²Σ, J, par) # outer H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ + mul!(hessian, ∇Σ' * H_outer, ∇Σ) hessian .+= ∇²Σ end - - return objective, hessian - end - end -end - -function objective_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || - return ones(eltype(par), size(par)), diagm(fill(one(eltype(par)), length(par))) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ end - - return gradient', hessian end -end - -function gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, - imp::SemImplySymbolic, -) - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - ∇Σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - hessian = diagm(fill(one(eltype(par)), length(par))) - return objective, gradient, hessian - end - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - - J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' - gradient = J * ∇Σ - - if semml.approximate_hessian - hessian = 2 * ∇Σ' * kron(Σ⁻¹, Σ⁻¹) * ∇Σ - else - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ - # inner - ∇²Σ_function!(∇²Σ, J, par) - # outer - H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹) - hessian = ∇Σ' * H_outer * ∇Σ - hessian .+= ∇²Σ - end - - return objective, gradient', hessian - end -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, - imp::SemImplySymbolic, -) - throw(DomainError(H, "hessian of ML + meanstructure is not available")) + return objective end ############################################################################################ -### Non-Symbolic Imply Types - -# no hessians ------------------------------------------------------------------------------ - -function hessian!(semml::SemML, par, model::AbstractSemSingle, has_meanstructure, imp::RAM) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end +### Non-Symbolic Implied Types -function objective_hessian!( +function evaluate!( + objective, + gradient, + hessian, semml::SemML, - par, + implied::RAM, model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function gradient_hessian!( - semml::SemML, par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, ) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -function objective_gradient_hessian!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure, - imp::RAM, -) - throw(DomainError(H, "hessian of ML + non-symbolic imply type is not available")) -end - -# objective, gradient ---------------------------------------------------------------------- + if !isnothing(hessian) + error("hessian of ML + non-symbolic implied type is not available") + end -function objective!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) + Σ = implied.Σ + Σₒ = obs_cov(observed(model)) + Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ + Σ⁻¹ = semml.Σ⁻¹ + + copyto!(Σ⁻¹, Σ) + Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) + if !isposdef(Σ_chol) + #@warn "Σ⁻¹ is not positive definite" + isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(gradient) || fill!(gradient, 1) + isnothing(hessian) || copyto!(hessian, I) + return objective + end + ld = logdet(Σ_chol) + Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) + mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return non_posdef_return(par) - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(objective) + objective = ld + tr(Σ⁻¹Σₒ) - if T + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) μ₋ = μₒ - μ - return ld + tr(Σ⁻¹Σₒ) + dot(μ₋, Σ⁻¹, μ₋) - else - return ld + tr(Σ⁻¹Σₒ) + objective += dot(μ₋, Σ⁻¹, μ₋) end end -end - -function gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - isposdef(Σ_chol) || return ones(eltype(par), size(par)) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + if !isnothing(gradient) + S = implied.S + F⨉I_A⁻¹ = implied.F⨉I_A⁻¹ + I_A⁻¹ = implied.I_A⁻¹ + ∇A = implied.∇A + ∇S = implied.∇S C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - - if T + mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) + mul!(gradient, ∇S', vec(C), 1, 1) + + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) + ∇M = implied.∇M + M = implied.M μ₋ = μₒ - μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S + mul!(gradient, ∇M', k', -2, 1) + mul!(gradient, ∇A', vec(k' * (I_A⁻¹ * (M + S * k'))'), -2, 1) + mul!(gradient, ∇S', vec(k'k), -1, 1) end - - return gradient' end -end - -function objective_gradient!( - semml::SemML, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, - imp::RAM, -) where {T} - let Σ = Σ(imply(model)), - Σₒ = obs_cov(observed(model)), - Σ⁻¹Σₒ = Σ⁻¹Σₒ(semml), - Σ⁻¹ = Σ⁻¹(semml), - S = S(imply(model)), - M = M(imply(model)), - F⨉I_A⁻¹ = F⨉I_A⁻¹(imply(model)), - I_A⁻¹ = I_A⁻¹(imply(model)), - ∇A = ∇A(imply(model)), - ∇S = ∇S(imply(model)), - ∇M = ∇M(imply(model)), - μ = μ(imply(model)), - μₒ = obs_mean(observed(model)) - copyto!(Σ⁻¹, Σ) - Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) - if !isposdef(Σ_chol) - objective = non_posdef_return(par) - gradient = ones(eltype(par), size(par)) - return objective, gradient - else - ld = logdet(Σ_chol) - Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) - objective = ld + tr(Σ⁻¹Σₒ) - - C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ - gradient = 2vec(C * S * I_A⁻¹')'∇A + vec(C)'∇S - - if T - μ₋ = μₒ - μ - objective += dot(μ₋, Σ⁻¹, μ₋) - - μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ - gradient .+= -2k * ∇M - 2vec(k' * (M' + k * S) * I_A⁻¹')'∇A - vec(k'k)'∇S - end - - return objective, gradient' - end - end + return objective end ############################################################################################ @@ -578,7 +221,7 @@ end ############################################################################################ update_observed(lossfun::SemML, observed::SemObservedMissing; kwargs...) = - throw(ArgumentError("ML estimation does not work with missing data - use FIML instead")) + error("ML estimation does not work with missing data - use FIML instead") function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) if size(lossfun.Σ⁻¹) == size(obs_cov(observed)) @@ -587,10 +230,3 @@ function update_observed(lossfun::SemML, observed::SemObserved; kwargs...) return SemML(; observed = observed, kwargs...) end end - -############################################################################################ -### additional methods -############################################################################################ - -Σ⁻¹(semml::SemML) = semml.Σ⁻¹ -Σ⁻¹Σₒ(semml::SemML) = semml.Σ⁻¹Σₒ diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index 61c89fc85..0fe2c9b3c 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -38,18 +38,20 @@ Analytic gradients are available, and for models without a meanstructure, also a ## Implementation Subtype of `SemLossFunction`. """ -struct SemWLS{Vt, St, B, C, B2} <: SemLossFunction +struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction + hessianeval::HE V::Vt σₒ::St - approximate_hessian::B V_μ::C - has_meanstructure::B2 end ############################################################################################ ### Constructors ############################################################################################ +SemWLS{HE}(args...) where {HE <: HessianEval} = + SemWLS{HE, map(typeof, args)...}(HE(), args...) + function SemWLS(; observed, wls_weight_matrix = nothing, @@ -58,269 +60,98 @@ function SemWLS(; meanstructure = false, kwargs..., ) - ind = CartesianIndices(obs_cov(observed)) - ind = filter(x -> (x[1] >= x[2]), ind) - s = obs_cov(observed)[ind] + nobs_vars = nobserved_vars(observed) + tril_ind = filter(x -> (x[1] >= x[2]), CartesianIndices(obs_cov(observed))) + s = obs_cov(observed)[tril_ind] # compute V here if isnothing(wls_weight_matrix) - D = duplication_matrix(n_man(observed)) + D = duplication_matrix(nobs_vars) S = inv(obs_cov(observed)) S = kron(S, S) wls_weight_matrix = 0.5 * (D' * S * D) + else + size(wls_weight_matrix) == (length(tril_ind), length(tril_ind)) || + DimensionMismatch( + "wls_weight_matrix has to be of size $(length(tril_ind))×$(length(tril_ind))", + ) end if meanstructure if isnothing(wls_weight_matrix_mean) wls_weight_matrix_mean = inv(obs_cov(observed)) + else + size(wls_weight_matrix_mean) == (nobs_vars, nobs_vars) || DimensionMismatch( + "wls_weight_matrix_mean has to be of size $(nobs_vars)×$(nobs_vars)", + ) end else + isnothing(wls_weight_matrix_mean) || + @warn "Ignoring wls_weight_matrix_mean since meanstructure is disabled" wls_weight_matrix_mean = nothing end + HE = approximate_hessian ? ApproxHessian : ExactHessian - return SemWLS( - wls_weight_matrix, - s, - approximate_hessian, - wls_weight_matrix_mean, - Val(meanstructure), - ) + return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean) end ############################################################################ ### methods ############################################################################ -objective!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient!(semwls::SemWLS, par, model, semwls.has_meanstructure) -objective_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) -gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -objective_gradient_hessian!(semwls::SemWLS, par, model::AbstractSemSingle) = - objective_gradient_hessian!(semwls::SemWLS, par, model, semwls.has_meanstructure) - -function objective!( +function evaluate!( + objective, + gradient, + hessian, semwls::SemWLS, - par, + implied::SemImpliedSymbolic, model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - return dot(σ₋, V, σ₋) + dot(μ₋, V_μ, μ₋) - else - return dot(σ₋, V, σ₋) - end - end -end - -function gradient!( - semwls::SemWLS, par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - return -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - else - return -2 * (σ₋' * V * ∇σ)' - end +) + if !isnothing(hessian) && (MeanStruct(implied) === HasMeanStruct) + error("hessian of WLS with meanstructure is not available") end -end -function hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) + V = semwls.V + ∇σ = implied.∇Σ - σ₋ = σₒ - σ + σ = implied.Σ + σₒ = semwls.σₒ + σ₋ = σₒ - σ - if T - throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - else - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return hessian + isnothing(objective) || (objective = dot(σ₋, V, σ₋)) + if !isnothing(gradient) + if issparse(∇σ) + gradient .= (σ₋' * V * ∇σ)' + else # save one allocation + mul!(gradient, σ₋' * V, ∇σ) # actually transposed, but should be fine for vectors end + gradient .*= -2 end -end - -function objective_gradient!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - μ = μ(imply(model)), - σₒ = semwls.σₒ, - μₒ = obs_mean(observed(model)), - V = semwls.V, - V_μ = semwls.V_μ, - ∇σ = ∇Σ(imply(model)), - ∇μ = ∇μ(imply(model)) - - σ₋ = σₒ - σ - - if T - μ₋ = μₒ - μ - objective = dot(σ₋, V, σ₋) + dot(μ₋', V_μ, μ₋) - gradient = -2 * (σ₋' * V * ∇σ + μ₋' * V_μ * ∇μ)' - return objective, gradient - else - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - return objective, gradient - end + isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0)) + if !isnothing(hessian) && (HessianEval(semwls) === ExactHessian) + ∇²Σ_function! = implied.∇²Σ_function + ∇²Σ = implied.∇²Σ + J = -2 * (σ₋' * semwls.V)' + ∇²Σ_function!(∇²Σ, J, par) + hessian .+= ∇²Σ end -end - -function objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{T}, -) where {T} - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if MeanStruct(implied) === HasMeanStruct + μ = implied.μ + μₒ = obs_mean(observed(model)) + μ₋ = μₒ - μ + V_μ = semwls.V_μ + if !isnothing(objective) + objective += dot(μ₋, V_μ, μ₋) end - - return objective, hessian - end -end - -objective_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - -function gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - gradient = -2 * (σ₋' * V * ∇σ)' - - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ + if !isnothing(gradient) + mul!(gradient, (V_μ * implied.∇μ)', μ₋, -2, 1) end - - return gradient, hessian end -end - -gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) -function objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{false}, -) - let σ = Σ(imply(model)), - σₒ = semwls.σₒ, - V = semwls.V, - ∇σ = ∇Σ(imply(model)), - ∇²Σ_function! = ∇²Σ_function(imply(model)), - ∇²Σ = ∇²Σ(imply(model)) - - σ₋ = σₒ - σ - - objective = dot(σ₋, V, σ₋) - gradient = -2 * (σ₋' * V * ∇σ)' - hessian = 2 * ∇σ' * V * ∇σ - if !semwls.approximate_hessian - J = -2 * (σ₋' * semwls.V)' - ∇²Σ_function!(∇²Σ, J, par) - hessian .+= ∇²Σ - end - return objective, gradient, hessian - end + return objective end -objective_gradient_hessian!( - semwls::SemWLS, - par, - model::AbstractSemSingle, - has_meanstructure::Val{true}, -) = throw(DomainError(H, "hessian of WLS with meanstructure is not available")) - ############################################################################################ ### Recommended methods ############################################################################################ diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl index f3165b541..cb5157346 100644 --- a/src/loss/constant/constant.jl +++ b/src/loss/constant/constant.jl @@ -26,6 +26,7 @@ Analytic gradients and hessians are available. Subtype of `SemLossFunction`. """ struct SemConstant{C} <: SemLossFunction + hessianeval::ExactHessian c::C end @@ -34,16 +35,17 @@ end ############################################################################################ function SemConstant(; constant_loss, kwargs...) - return SemConstant(constant_loss) + return SemConstant(ExactHessian(), constant_loss) end ############################################################################################ ### methods ############################################################################################ -objective!(constant::SemConstant, par, model) = constant.c -gradient!(constant::SemConstant, par, model) = zero(par) -hessian!(constant::SemConstant, par, model) = zeros(eltype(par), length(par), length(par)) +objective(constant::SemConstant, model::AbstractSem, par) = constant.c +gradient(constant::SemConstant, model::AbstractSem, par) = zero(par) +hessian(constant::SemConstant, model::AbstractSem, par) = + zeros(eltype(par), length(par), length(par)) ############################################################################################ ### Recommended methods diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl index ebf3e7bfe..aee521624 100644 --- a/src/loss/regularization/ridge.jl +++ b/src/loss/regularization/ridge.jl @@ -8,18 +8,18 @@ Ridge regularization. # Constructor - SemRidge(;α_ridge, which_ridge, n_par, parameter_type = Float64, imply = nothing, kwargs...) + SemRidge(;α_ridge, which_ridge, nparams, parameter_type = Float64, implied = nothing, kwargs...) # Arguments - `α_ridge`: hyperparameter for penalty term - `which_ridge::Vector`: Vector of parameter labels (Symbols) or indices that indicate which parameters should be regularized. -- `n_par::Int`: number of parameters of the model -- `imply::SemImply`: imply part of the model +- `nparams::Int`: number of parameters of the model +- `implied::SemImplied`: implied part of the model - `parameter_type`: type of the parameters # Examples ```julia -my_ridge = SemRidge(;α_ridge = 0.02, which_ridge = [:λ₁, :λ₂, :ω₂₃], n_par = 30, imply = my_imply) +my_ridge = SemRidge(;α_ridge = 0.02, which_ridge = [:λ₁, :λ₂, :ω₂₃], nparams = 30, implied = my_implied) ``` # Interfaces @@ -30,6 +30,7 @@ Analytic gradients and hessians are available. Subtype of `SemLossFunction`. """ struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction + hessianeval::ExactHessian α::P which::W1 which_H::W2 @@ -45,30 +46,31 @@ end function SemRidge(; α_ridge, which_ridge, - n_par, + nparams, parameter_type = Float64, - imply = nothing, + implied = nothing, kwargs..., ) if eltype(which_ridge) <: Symbol - if isnothing(imply) + if isnothing(implied) throw( ArgumentError( - "When referring to parameters by label, `imply = ...` has to be specified", + "When referring to parameters by label, `implied = ...` has to be specified", ), ) else - which_ridge = get_identifier_indices(which_ridge, imply) + par2ind = param_indices(implied) + which_ridge = getindex.(Ref(par2ind), which_ridge) end end - which = [CartesianIndex(x) for x in which_ridge] which_H = [CartesianIndex(x, x) for x in which_ridge] return SemRidge( + ExactHessian(), α_ridge, - which, + which_ridge, which_H, - zeros(parameter_type, n_par), - zeros(parameter_type, n_par, n_par), + zeros(parameter_type, nparams), + zeros(parameter_type, nparams, nparams), ) end @@ -76,15 +78,16 @@ end ### methods ############################################################################################ -objective!(ridge::SemRidge, par, model) = @views ridge.α * sum(x -> x^2, par[ridge.which]) +objective(ridge::SemRidge, model::AbstractSem, par) = + @views ridge.α * sum(abs2, par[ridge.which]) -function gradient!(ridge::SemRidge, par, model) - @views ridge.gradient[ridge.which] .= 2 * ridge.α * par[ridge.which] +function gradient(ridge::SemRidge, model::AbstractSem, par) + @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which] return ridge.gradient end -function hessian!(ridge::SemRidge, par, model) - @views @. ridge.hessian[ridge.which_H] += ridge.α * 2.0 +function hessian(ridge::SemRidge, model::AbstractSem, par) + @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α return ridge.hessian end diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl index 53d68ec2c..4aafe4235 100644 --- a/src/objective_gradient_hessian.jl +++ b/src/objective_gradient_hessian.jl @@ -1,303 +1,160 @@ -############################################################################################ -# methods for AbstractSem -############################################################################################ - -function objective!(model::AbstractSemSingle, parameters) - objective!(imply(model), parameters, model) - return objective!(loss(model), parameters, model) -end - -function gradient!(gradient, model::AbstractSemSingle, parameters) - fill!(gradient, zero(eltype(gradient))) - gradient!(imply(model), parameters, model) - gradient!(gradient, loss(model), parameters, model) -end - -function hessian!(hessian, model::AbstractSemSingle, parameters) - fill!(hessian, zero(eltype(hessian))) - hessian!(imply(model), parameters, model) - hessian!(hessian, loss(model), parameters, model) -end - -function objective_gradient!(gradient, model::AbstractSemSingle, parameters) - fill!(gradient, zero(eltype(gradient))) - objective_gradient!(imply(model), parameters, model) - objective_gradient!(gradient, loss(model), parameters, model) -end - -function objective_hessian!(hessian, model::AbstractSemSingle, parameters) - fill!(hessian, zero(eltype(hessian))) - objective_hessian!(imply(model), parameters, model) - objective_hessian!(hessian, loss(model), parameters, model) -end - -function gradient_hessian!(gradient, hessian, model::AbstractSemSingle, parameters) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - gradient_hessian!(imply(model), parameters, model) - gradient_hessian!(gradient, hessian, loss(model), parameters, model) -end - -function objective_gradient_hessian!( - gradient, - hessian, - model::AbstractSemSingle, - parameters, +"Specifies whether objective (O), gradient (G) or hessian (H) evaluation is required" +struct EvaluationTargets{O, G, H} end + +EvaluationTargets(objective, gradient, hessian) = + EvaluationTargets{!isnothing(objective), !isnothing(gradient), !isnothing(hessian)}() + +# convenience methods to check type params +is_objective_required(::EvaluationTargets{O}) where {O} = O +is_gradient_required(::EvaluationTargets{<:Any, G}) where {G} = G +is_hessian_required(::EvaluationTargets{<:Any, <:Any, H}) where {H} = H + +# return the tuple of the required results +(::EvaluationTargets{true, false, false})(objective, gradient, hessian) = objective +(::EvaluationTargets{false, true, false})(objective, gradient, hessian) = gradient +(::EvaluationTargets{false, false, true})(objective, gradient, hessian) = hessian +(::EvaluationTargets{true, true, false})(objective, gradient, hessian) = + (objective, gradient) +(::EvaluationTargets{true, false, true})(objective, gradient, hessian) = + (objective, hessian) +(::EvaluationTargets{false, true, true})(objective, gradient, hessian) = (gradient, hessian) +(::EvaluationTargets{true, true, true})(objective, gradient, hessian) = + (objective, gradient, hessian) + +(targets::EvaluationTargets)(arg_tuple::Tuple) = targets(arg_tuple...) + +# dispatch on SemImplied +evaluate!(objective, gradient, hessian, loss::SemLossFunction, model::AbstractSem, params) = + evaluate!(objective, gradient, hessian, loss, implied(model), model, params) + +# fallback method +function evaluate!( + obj, + grad, + hess, + loss::SemLossFunction, + implied::SemImplied, + model, + params, ) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - objective_gradient_hessian!(imply(model), parameters, model) - return objective_gradient_hessian!(gradient, hessian, loss(model), parameters, model) -end + isnothing(obj) || (obj = objective(loss, implied, model, params)) + isnothing(grad) || copyto!(grad, gradient(loss, implied, model, params)) + isnothing(hess) || copyto!(hess, hessian(loss, implied, model, params)) + return obj +end + +# fallback methods +objective(f::SemLossFunction, implied::SemImplied, model, params) = + objective(f, model, params) +gradient(f::SemLossFunction, implied::SemImplied, model, params) = + gradient(f, model, params) +hessian(f::SemLossFunction, implied::SemImplied, model, params) = hessian(f, model, params) + +# fallback method for SemImplied that calls update_xxx!() methods +function update!(targets::EvaluationTargets, implied::SemImplied, model, params) + is_objective_required(targets) && update_objective!(implied, model, params) + is_gradient_required(targets) && update_gradient!(implied, model, params) + is_hessian_required(targets) && update_hessian!(implied, model, params) +end + +# guess objective type +objective_type(model::AbstractSem, params::Any) = Float64 +objective_type(model::AbstractSem, params::AbstractVector{T}) where {T <: Number} = T +objective_zero(model::AbstractSem, params::Any) = zero(objective_type(model, params)) + +objective_type(objective::T, gradient, hessian) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::AbstractArray{T}, + hessian, +) where {T <: Number} = T +objective_type( + objective::Nothing, + gradient::Nothing, + hessian::AbstractArray{T}, +) where {T <: Number} = T +objective_zero(objective, gradient, hessian) = + zero(objective_type(objective, gradient, hessian)) ############################################################################################ -# methods for SemFiniteDiff +# methods for AbstractSem ############################################################################################ -gradient!(gradient, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_gradient!(gradient, x -> objective!(model, x), par) - -hessian!(hessian, model::SemFiniteDiff, par) = - FiniteDiff.finite_difference_hessian!(hessian, x -> objective!(model, x), par) - -function objective_gradient!(gradient, model::SemFiniteDiff, parameters) - gradient!(gradient, model, parameters) - return objective!(model, parameters) -end - -# other methods -function gradient_hessian!(gradient, hessian, model::SemFiniteDiff, parameters) - gradient!(gradient, model, parameters) - hessian!(hessian, model, parameters) -end - -function objective_hessian!(hessian, model::SemFiniteDiff, parameters) - hessian!(hessian, model, parameters) - return objective!(model, parameters) -end - -function objective_gradient_hessian!(gradient, hessian, model::SemFiniteDiff, parameters) - hessian!(hessian, model, parameters) - return objective_gradient!(gradient, model, parameters) +function evaluate!(objective, gradient, hessian, model::AbstractSemSingle, params) + targets = EvaluationTargets(objective, gradient, hessian) + # update implied state, its gradient and hessian (if required) + update!(targets, implied(model), model, params) + return evaluate!( + !isnothing(objective) ? zero(objective) : nothing, + gradient, + hessian, + loss(model), + model, + params, + ) end ############################################################################################ -# methods for SemLoss +# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective) ############################################################################################ -function objective!(loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> weight * objective!(fun, par, model), - +, - loss.functions, - loss.weights, - ) -end - -function gradient!(gradient, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient = gradient!(lossfun, par, model) - gradient .+= w * new_gradient +function evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params) + function obj(p) + # recalculate implied state for p + update!(EvaluationTargets{true, false, false}(), implied(model), model, p) + evaluate!( + objective_zero(objective, gradient, hessian), + nothing, + nothing, + loss(model), + model, + p, + ) end + isnothing(gradient) || FiniteDiff.finite_difference_gradient!(gradient, obj, params) + isnothing(hessian) || FiniteDiff.finite_difference_hessian!(hessian, obj, params) + return !isnothing(objective) ? obj(params) : nothing end -function hessian!(hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - hessian .+= w * hessian!(lossfun, par, model) - end -end - -function objective_gradient!(gradient, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_gradient_wrap_(gradient, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function objective_hessian!(hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> objective_hessian_wrap_(hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -function gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - for (lossfun, w) in zip(loss.functions, loss.weights) - new_gradient, new_hessian = gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - end -end - -function objective_gradient_hessian!(gradient, hessian, loss::SemLoss, par, model) - return mapreduce( - (fun, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, fun, par, model, weight), - +, - loss.functions, - loss.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, lossfun, par, model, w) - new_objective, new_gradient = objective_gradient!(lossfun, par, model) - gradient .+= w * new_gradient - return w * new_objective -end - -function objective_hessian_wrap_(hessian, lossfun, par, model, w) - new_objective, new_hessian = objective_hessian!(lossfun, par, model) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_(gradient, hessian, lossfun, par, model, w) - new_objective, new_gradient, new_hessian = - objective_gradient_hessian!(lossfun, par, model) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective -end +objective(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) ############################################################################################ -# methods for SemEnsemble +# methods for SemLoss (weighted sum of individual SemLossFunctions) ############################################################################################ -function objective!(ensemble::SemEnsemble, par) - return mapreduce( - (model, weight) -> weight * objective!(model, par), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - gradient_new = similar(gradient) - gradient!(gradient_new, model, par) - gradient .+= w * gradient_new +function evaluate!(objective, gradient, hessian, loss::SemLoss, model::AbstractSem, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + f_grad = isnothing(gradient) ? nothing : similar(gradient) + f_hess = isnothing(hessian) ? nothing : similar(hessian) + for (f, weight) in zip(loss.functions, loss.weights) + f_obj = evaluate!(objective, f_grad, f_hess, f, model, params) + isnothing(objective) || (objective += weight * f_obj) + isnothing(gradient) || (gradient .+= weight * f_grad) + isnothing(hessian) || (hessian .+= weight * f_hess) end -end - -function hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - hessian_new = similar(hessian) - hessian!(hessian_new, model, par) - hessian .+= w * hessian_new - end -end - -function objective_gradient!(gradient, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - return mapreduce( - (model, weight) -> objective_gradient_wrap_(gradient, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function objective_hessian!(hessian, ensemble::SemEnsemble, par) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> objective_hessian_wrap_(hessian, model, par, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -function gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - for (model, w) in zip(ensemble.sems, ensemble.weights) - new_gradient = similar(gradient) - new_hessian = similar(hessian) - - gradient_hessian!(new_gradient, new_hessian, model, par) - - gradient .+= w * new_gradient - hessian .+= w * new_hessian - end -end - -function objective_gradient_hessian!(gradient, hessian, ensemble::SemEnsemble, par) - fill!(gradient, zero(eltype(gradient))) - fill!(hessian, zero(eltype(hessian))) - return mapreduce( - (model, weight) -> - objective_gradient_hessian_wrap_(gradient, hessian, model, par, model, weight), - +, - ensemble.sems, - ensemble.weights, - ) -end - -# wrapper to update gradient/hessian and return objective value -function objective_gradient_wrap_(gradient, model::AbstractSemSingle, par, w) - gradient_pre = similar(gradient) - new_objective = objective_gradient!(gradient_pre, model, par) - gradient .+= w * gradient_pre - return w * new_objective -end - -function objective_hessian_wrap_(hessian, model::AbstractSemSingle, par, w) - hessian_pre = similar(hessian) - new_objective = objective_hessian!(hessian_pre, model, par) - hessian .+= w * new_hessian - return w * new_objective -end - -function objective_gradient_hessian_wrap_( - gradient, - hessian, - model::AbstractSemSingle, - par, - w, -) - gradient_pre = similar(gradient) - hessian_pre = similar(hessian) - new_objective = objective_gradient_hessian!(gradient_pre, hessian_pre, model, par) - gradient .+= w * new_gradient - hessian .+= w * new_hessian - return w * new_objective + return objective end ############################################################################################ -# generic methods for loss functions +# methods for SemEnsemble (weighted sum of individual AbstractSemSingle models) ############################################################################################ -function objective_gradient!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - return objective, gradient -end - -function objective_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, hessian -end - -function gradient_hessian!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return gradient, hessian -end - -function objective_gradient_hessian!(lossfun::SemLossFunction, par, model) - objective = objective!(lossfun::SemLossFunction, par, model) - gradient = gradient!(lossfun::SemLossFunction, par, model) - hessian = hessian!(lossfun::SemLossFunction, par, model) - return objective, gradient, hessian +function evaluate!(objective, gradient, hessian, ensemble::SemEnsemble, params) + isnothing(objective) || (objective = zero(objective)) + isnothing(gradient) || fill!(gradient, zero(eltype(gradient))) + isnothing(hessian) || fill!(hessian, zero(eltype(hessian))) + sem_grad = isnothing(gradient) ? nothing : similar(gradient) + sem_hess = isnothing(hessian) ? nothing : similar(hessian) + for (sem, weight) in zip(ensemble.sems, ensemble.weights) + sem_obj = evaluate!(objective, sem_grad, sem_hess, sem, params) + isnothing(objective) || (objective += weight * sem_obj) + isnothing(gradient) || (gradient .+= weight * sem_grad) + isnothing(hessian) || (hessian .+= weight * sem_hess) + end + return objective end # throw an error by default if gradient! and hessian! are not implemented @@ -308,77 +165,63 @@ end hessian!(lossfun::SemLossFunction, par, model) = throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =# -############################################################################################ -# generic methods for imply -############################################################################################ - -function objective_gradient!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - return nothing -end - -function objective_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function gradient_hessian!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - -function objective_gradient_hessian!(semimp::SemImply, par, model) - objective!(semimp::SemImply, par, model) - gradient!(semimp::SemImply, par, model) - hessian!(semimp::SemImply, par, model) - return nothing -end - ############################################################################################ # Documentation ############################################################################################ """ - objective!(model::AbstractSem, parameters) + objective!(model::AbstractSem, params) -Returns the objective value at `parameters`. +Returns the objective value at `params`. The model object can be modified. # Implementation -To implement a new `SemImply` or `SemLossFunction` subtype, you need to add a method for - objective!(newtype::MyNewType, parameters, model::AbstractSemSingle) +To implement a new `SemImplied` or `SemLossFunction` subtype, you need to add a method for + objective!(newtype::MyNewType, params, model::AbstractSemSingle) To implement a new `AbstractSem` subtype, you need to add a method for - objective!(model::MyNewType, parameters) + objective!(model::MyNewType, params) """ function objective! end """ - gradient!(gradient, model::AbstractSem, parameters) + gradient!(gradient, model::AbstractSem, params) -Writes the gradient value at `parameters` to `gradient`. +Writes the gradient value at `params` to `gradient`. # Implementation -To implement a new `SemImply` or `SemLossFunction` type, you can add a method for - gradient!(newtype::MyNewType, parameters, model::AbstractSemSingle) +To implement a new `SemImplied` or `SemLossFunction` type, you can add a method for + gradient!(newtype::MyNewType, params, model::AbstractSemSingle) To implement a new `AbstractSem` subtype, you can add a method for - gradient!(gradient, model::MyNewType, parameters) + gradient!(gradient, model::MyNewType, params) """ function gradient! end """ - hessian!(hessian, model::AbstractSem, parameters) + hessian!(hessian, model::AbstractSem, params) -Writes the hessian value at `parameters` to `hessian`. +Writes the hessian value at `params` to `hessian`. # Implementation -To implement a new `SemImply` or `SemLossFunction` type, you can add a method for - hessian!(newtype::MyNewType, parameters, model::AbstractSemSingle) +To implement a new `SemImplied` or `SemLossFunction` type, you can add a method for + hessian!(newtype::MyNewType, params, model::AbstractSemSingle) To implement a new `AbstractSem` subtype, you can add a method for - hessian!(hessian, model::MyNewType, parameters) + hessian!(hessian, model::MyNewType, params) """ function hessian! end + +objective!(model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, nothing, model, params) +gradient!(gradient, model::AbstractSem, params) = + evaluate!(nothing, gradient, nothing, model, params) +hessian!(hessian, model::AbstractSem, params) = + evaluate!(nothing, nothing, hessian, model, params) +objective_gradient!(gradient, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, nothing, model, params) +objective_hessian!(hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), nothing, hessian, model, params) +gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(nothing, gradient, hessian, model, params) +objective_gradient_hessian!(gradient, hessian, model::AbstractSem, params) = + evaluate!(objective_zero(model, params), gradient, hessian, model, params) diff --git a/src/observed/EM.jl b/src/observed/EM.jl index 09dfbd82e..beac45ca8 100644 --- a/src/observed/EM.jl +++ b/src/observed/EM.jl @@ -29,39 +29,40 @@ function em_mvn( rtol_em = 1e-4, kwargs..., ) - n_obs, n_man = observed.n_obs, Int(observed.n_man) + nvars = nobserved_vars(observed) + nsamps = nsamples(observed) # preallocate stuff? - 𝔼x_pre = zeros(n_man) - 𝔼xxᵀ_pre = zeros(n_man, n_man) + 𝔼x_pre = zeros(nvars) + 𝔼xxᵀ_pre = zeros(nvars, nvars) ### precompute for full cases - if length(observed.patterns[1]) == observed.n_man - for row in observed.rows[1] - row = observed.data_rowwise[row] + fullpat = observed.patterns[1] + if nmissed_vars(fullpat) == 0 + for row in eachrow(fullpat.data) 𝔼x_pre += row 𝔼xxᵀ_pre += row * row' end end - # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, n_obs - # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, n_obs) + # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, nsamps + # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, nsamps) # initialize em_model = start_em(observed; kwargs...) - em_model_prev = EmMVNModel(zeros(n_man, n_man), zeros(n_man), false) + em_model_prev = EmMVNModel(zeros(nvars, nvars), zeros(nvars), false) iter = 1 done = false - 𝔼x = zeros(n_man) - 𝔼xxᵀ = zeros(n_man, n_man) + 𝔼x = zeros(nvars) + 𝔼xxᵀ = zeros(nvars, nvars) while !done em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xxᵀ_pre) - em_mvn_Mstep!(em_model, n_obs, 𝔼x, 𝔼xxᵀ) + em_mvn_Mstep!(em_model, nsamps, 𝔼x, 𝔼xxᵀ) if iter > max_iter_em done = true - @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable. + @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable. Maybe try passing different starting values via 'start_em = ...' " elseif iter > 1 # done = isapprox(ll, ll_prev; rtol = rtol) @@ -96,21 +97,27 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx Σ = em_model.Σ # Compute the expected sufficient statistics - for i in 2:length(observed.pattern_n_obs) + for pat in observed.patterns + (nmissed_vars(pat) == 0) && continue # skip full cases # observed and unobserved vars - u = observed.patterns_not[i] - o = observed.patterns[i] + u = pat.miss_mask + o = pat.measured_mask # precompute for pattern - V = Σ[u, u] - Σ[u, o] * (Σ[o, o] \ Σ[o, u]) + Σoo = Σ[o, o] + Σuo = Σ[u, o] + μu = μ[u] + μo = μ[o] + + V = Σ[u, u] - Σuo * (Σoo \ Σ[o, u]) # loop trough data - for row in observed.rows[i] - m = μ[u] + Σ[u, o] * (Σ[o, o] \ (observed.data_rowwise[row] - μ[o])) + for rowdata in eachrow(pat.data) + m = μu + Σuo * (Σoo \ (rowdata - μo)) 𝔼xᵢ[u] = m - 𝔼xᵢ[o] = observed.data_rowwise[row] + 𝔼xᵢ[o] = rowdata 𝔼xxᵀᵢ[u, u] = 𝔼xᵢ[u] * 𝔼xᵢ[u]' + V 𝔼xxᵀᵢ[o, o] = 𝔼xᵢ[o] * 𝔼xᵢ[o]' 𝔼xxᵀᵢ[o, u] = 𝔼xᵢ[o] * 𝔼xᵢ[u]' @@ -125,9 +132,9 @@ function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xx 𝔼xxᵀ .+= 𝔼xxᵀ_pre end -function em_mvn_Mstep!(em_model, n_obs, 𝔼x, 𝔼xxᵀ) - em_model.μ = 𝔼x / n_obs - Σ = Symmetric(𝔼xxᵀ / n_obs - em_model.μ * em_model.μ') +function em_mvn_Mstep!(em_model, nsamples, 𝔼x, 𝔼xxᵀ) + em_model.μ = 𝔼x / nsamples + Σ = Symmetric(𝔼xxᵀ / nsamples - em_model.μ * em_model.μ') # ridge Σ # while !isposdef(Σ) @@ -152,9 +159,10 @@ end # use μ and Σ of full cases function start_em_observed(observed::SemObservedMissing; kwargs...) - if (length(observed.patterns[1]) == observed.n_man) & (observed.pattern_n_obs[1] > 1) - μ = copy(observed.obs_mean[1]) - Σ = copy(Symmetric(observed.obs_cov[1])) + fullpat = observed.patterns[1] + if (nmissed_vars(fullpat) == 0) && (nobserved_vars(fullpat) > 1) + μ = copy(fullpat.measured_mean) + Σ = copy(Symmetric(fullpat.measured_cov)) if !isposdef(Σ) Σ = Matrix(Diagonal(Σ)) end @@ -166,11 +174,11 @@ end # use μ = O and Σ = I function start_em_simple(observed::SemObservedMissing; kwargs...) - n_man = Int(observed.n_man) - μ = zeros(n_man) - Σ = rand(n_man, n_man) + nvars = nobserved_vars(observed) + μ = zeros(nvars) + Σ = rand(nvars, nvars) Σ = Σ * Σ' - # Σ = Matrix(1.0I, n_man, n_man) + # Σ = Matrix(1.0I, nvars, nvars) return EmMVNModel(Σ, μ, false) end diff --git a/src/observed/abstract.jl b/src/observed/abstract.jl new file mode 100644 index 000000000..bb92ea12e --- /dev/null +++ b/src/observed/abstract.jl @@ -0,0 +1,153 @@ +""" + samples(observed::SemObservedData) + +Gets the matrix of observed data samples. +Rows are samples, columns are observed variables. + +## See Also +[`nsamples`](@ref), [`observed_vars`](@ref). +""" +samples(observed::SemObserved) = observed.data +nsamples(observed::SemObserved) = observed.nsamples + +observed_vars(observed::SemObserved) = observed.observed_vars + +############################################################################################ +### Additional functions +############################################################################################ + +# generate default observed variable names if none provided +default_observed_vars(nobserved_vars::Integer, prefix::Union{Symbol, AbstractString}) = + Symbol.(prefix, 1:nobserved_vars) + +# compute the permutation that subsets and reorders source elements +# to match the destination order. +# if multiple identical elements are present in the source, the last one is used. +# if one_to_one is true, checks that the source and destination have the same length. +function source_to_dest_perm( + src::AbstractVector, + dest::AbstractVector; + one_to_one::Bool = false, + entities::String = "elements", +) + if dest == src # exact match + return eachindex(dest) + else + one_to_one && + length(dest) != length(src) && + throw( + DimensionMismatch( + "The length of the new $entities order ($(length(dest))) " * + "does not match the number of $entities ($(length(src)))", + ), + ) + src_inds = Dict(el => i for (i, el) in enumerate(src)) + return [src_inds[el] for el in dest] + end +end + +# function to prepare input data shared by SemObserved implementations +# returns tuple of +# 1) the matrix of data +# 2) the observed variable symbols that match matrix columns +# 3) the permutation of the original observed_vars (nothing if no reordering) +# If observed_vars is not specified, the vars order is taken from the specification. +# If both observed_vars and specification are provided, the observed_vars are used to match +# the column of the user-provided data matrix, and observed_vars(specification) is used to +# reorder the columns of the data to match the speciation. +# If no variable names are provided at all, generates the symbols in the form +# Symbol(observed_var_prefix, i) for i=1:nobserved_vars. +function prepare_data( + data::Union{AbstractDataFrame, AbstractMatrix, NTuple{2, Integer}, Nothing}, + observed_vars::Union{AbstractVector, Nothing}, + spec::Union{SemSpecification, Nothing}; + observed_var_prefix::Union{Symbol, AbstractString}, +) + obs_vars = nothing + obs_vars_perm = nothing + if !isnothing(observed_vars) + obs_vars = Symbol.(observed_vars) + if !isnothing(spec) + obs_vars_spec = SEM.observed_vars(spec) + try + obs_vars_perm = source_to_dest_perm( + obs_vars, + obs_vars_spec, + one_to_one = false, + entities = "observed_vars", + ) + catch err + if isa(err, KeyError) + throw( + ArgumentError( + "observed_var \"$(err.key)\" from SEM specification is not listed in observed_vars argument", + ), + ) + else + rethrow(err) + end + end + # ignore trivial reorder + if obs_vars_perm == eachindex(obs_vars) + obs_vars_perm = nothing + end + end + elseif !isnothing(spec) + obs_vars = SEM.observed_vars(spec) + end + # observed vars in the order that matches the specification + obs_vars_reordered = isnothing(obs_vars_perm) ? obs_vars : obs_vars[obs_vars_perm] + + # subset the data, check that obs_vars matches data or guess the obs_vars + if data isa AbstractDataFrame + if !isnothing(obs_vars_reordered) # subset/reorder columns + data = data[:, obs_vars_reordered] + if obs_vars_reordered != obs_vars + @warn "The order of variables in observed_vars argument does not match the order of observed_vars(specification). The specification order is used." + end + else # default symbol names + obs_vars = obs_vars_reordered = Symbol.(names(data)) + end + data_mtx = Matrix(data) + elseif data isa AbstractMatrix + if !isnothing(obs_vars) + size(data, 2) == length(obs_vars) || DimensionMismatch( + "The number of columns in the data matrix ($(size(data, 2))) does not match the length of observed_vars ($(length(obs_vars))).", + ) + # reorder columns to match the spec + data_ordered = !isnothing(obs_vars_perm) ? data[:, obs_vars_perm] : data + else + obs_vars = + obs_vars_reordered = + default_observed_vars(size(data, 2), observed_var_prefix) + data_ordered = data + end + # make sure data_mtx is a dense matrix (required for methods like mean_and_cov()) + data_mtx = convert(Matrix, data_ordered) + elseif data isa NTuple{2, Integer} # given the dimensions of the data matrix, but no data itself + data_mtx = nothing + nobs_vars = data[2] + if isnothing(obs_vars) + obs_vars = + obs_vars_reordered = default_observed_vars(nobs_vars, observed_var_prefix) + elseif length(obs_vars) != nobs_vars + throw( + DimensionMismatch( + "The length of observed_vars ($(length(obs_vars))) does not match the data matrix columns ($(nobs_vars)).", + ), + ) + end + elseif isnothing(data) + data_mtx = nothing + if isnothing(obs_vars) + throw( + ArgumentError( + "No data, specification or observed_vars provided. Cannot infer observed_vars from provided inputs", + ), + ) + end + else + throw(ArgumentError("Unsupported data type: $(typeof(data))")) + end + return data_mtx, obs_vars_reordered, obs_vars_perm +end diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index 1b5de9fc2..221ef5ca3 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -1,125 +1,80 @@ """ -For observed covariance matrices and means. +Type alias for [`SemObservedData`](@ref) that has mean and covariance, but no actual data. -# Constructor +For instances of `SemObservedCovariance` [`samples`](@ref) returns `nothing`. +""" +const SemObservedCovariance{S} = SemObservedData{Nothing, S} +""" SemObservedCovariance(; specification, obs_cov, obs_colnames = nothing, meanstructure = false, obs_mean = nothing, - n_obs = nothing, + nsamples::Integer, kwargs...) -# Arguments -- `specification`: either a `RAMMatrices` or `ParameterTable` object (1) -- `obs_cov`: observed covariance matrix -- `obs_colnames::Vector{Symbol}`: column names of the covariance matrix -- `meanstructure::Bool`: does the model have a meanstructure? -- `obs_mean`: observed mean vector -- `n_obs::Number`: number of observed data points (necessary for fit statistics) - -# Extended help -## Interfaces -- `n_obs(::SemObservedCovariance)` -> number of observed data points -- `n_man(::SemObservedCovariance)` -> number of manifest variables +Construct [`SemObserved`](@ref) without providing the observations data, +but with the covariations (`obs_cov`) and the means (`obs_means`) of the observed variables. -- `obs_cov(::SemObservedCovariance)` -> observed covariance matrix -- `obs_mean(::SemObservedCovariance)` -> observed means +Returns [`SemObservedCovariance`](@ref) object. -## Implementation -Subtype of `SemObserved` - -## Remarks -(1) the `specification` argument can also be `nothing`, but this turns of checking whether -the observed data/covariance columns are in the correct order! As a result, you should only -use this if you are sure your covariance matrix is in the right format. - -## Additional keyword arguments: -- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object +# Arguments +- `obs_cov`: pre-computed covariations of the observed variables +- `obs_mean`: optional pre-computed means of the observed variables +- `observed_vars::AbstractVector`: IDs of the observed variables (rows and columns of the `obs_cov` matrix) +- `specification`: optional SEM specification ([`SemSpecification`](@ref)) +- `nsamples::Number`: number of samples (observed data points) used to compute `obs_cov` and `obs_means` + necessary for calculating fit statistics """ -struct SemObservedCovariance{B, C} <: SemObserved - obs_cov::B - obs_mean::C - n_man::Int - n_obs::Int -end - function SemObservedCovariance(; - specification, - obs_cov, - obs_colnames = nothing, - spec_colnames = nothing, - obs_mean = nothing, - meanstructure = false, - n_obs = nothing, + obs_cov::AbstractMatrix, + obs_mean::Union{AbstractVector, Nothing} = nothing, + observed_vars::Union{AbstractVector, Nothing} = nothing, + specification::Union{SemSpecification, Nothing} = nothing, + nsamples::Integer, + observed_var_prefix::Union{Symbol, AbstractString} = :obs, kwargs..., ) - if !meanstructure & !isnothing(obs_mean) - throw(ArgumentError("observed means were passed, but `meanstructure = false`")) - - elseif meanstructure & isnothing(obs_mean) - throw(ArgumentError("`meanstructure = true`, but no observed means were passed")) - end - - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) + nvars = size(obs_cov, 1) + size(obs_cov, 2) == nvars || throw( + DimensionMismatch( + "The covariance matrix should be square, $(size(obs_cov)) was found.", + ), + ) + S = eltype(obs_cov) + + if isnothing(obs_mean) + obs_mean = zeros(S, nvars) + else + length(obs_mean) == nvars || throw( + DimensionMismatch( + "The length of the mean vector $(length(obs_mean)) does not match the size of the covariance matrix $(size(obs_cov))", + ), + ) + S = promote_type(S, eltype(obs_mean)) end - if !isnothing(spec_colnames) & isnothing(obs_colnames) - throw(ArgumentError("no `obs_colnames` were specified")) + obs_cov = convert(Matrix{S}, obs_cov) + obs_mean = convert(Vector{S}, obs_mean) - elseif !isnothing(spec_colnames) & !(eltype(obs_colnames) <: Symbol) - throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols")) + if !isnothing(observed_vars) + length(observed_vars) == nvars || throw( + DimensionMismatch( + "The length of the observed_vars $(length(observed_vars)) does not match the size of the covariance matrix $(size(obs_cov))", + ), + ) end - if !isnothing(spec_colnames) - obs_cov = reorder_obs_cov(obs_cov, spec_colnames, obs_colnames) - isnothing(obs_mean) || - (obs_mean = reorder_obs_mean(obs_mean, spec_colnames, obs_colnames)) - end - - return SemObservedCovariance(obs_cov, obs_mean, size(obs_cov, 1), n_obs) -end - -############################################################################################ -### Recommended methods -############################################################################################ - -n_obs(observed::SemObservedCovariance) = observed.n_obs -n_man(observed::SemObservedCovariance) = observed.n_man - -############################################################################################ -### additional methods -############################################################################################ - -obs_cov(observed::SemObservedCovariance) = observed.obs_cov -obs_mean(observed::SemObservedCovariance) = observed.obs_mean + _, obs_vars, obs_vars_perm = + prepare_data((nsamples, nvars), observed_vars, specification; observed_var_prefix) -############################################################################################ -### Additional functions -############################################################################################ - -# reorder covariance matrices -------------------------------------------------------------- -function reorder_obs_cov(obs_cov, spec_colnames, obs_colnames) - if spec_colnames == obs_colnames - return obs_cov - else - new_position = [findfirst(==(x), obs_colnames) for x in spec_colnames] - obs_cov = obs_cov[new_position, new_position] - return obs_cov + # reorder to match the specification + if !isnothing(obs_vars_perm) + obs_cov = obs_cov[obs_vars_perm, obs_vars_perm] + obs_mean = obs_mean[obs_vars_perm] end -end -# reorder means ---------------------------------------------------------------------------- - -function reorder_obs_mean(obs_mean, spec_colnames, obs_colnames) - if spec_colnames == obs_colnames - return obs_mean - else - new_position = [findfirst(==(x), obs_colnames) for x in spec_colnames] - obs_mean = obs_mean[new_position] - return obs_mean - end + return SemObservedData(nothing, obs_vars, obs_cov, obs_mean, nsamples) end diff --git a/src/observed/data.jl b/src/observed/data.jl index 0d9ad3a04..b6ddaa43d 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -4,142 +4,57 @@ For observed data without missings. # Constructor SemObservedData(; - specification, data, - meanstructure = false, - obs_colnames = nothing, + observed_vars = nothing, + specification = nothing, kwargs...) # Arguments -- `specification`: either a `RAMMatrices` or `ParameterTable` object (1) -- `data`: observed data -- `meanstructure::Bool`: does the model have a meanstructure? -- `obs_colnames::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame) +- `specification`: optional SEM specification ([`SemSpecification`](@ref)) +- `data`: observed data -- *DataFrame* or *Matrix* +- `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame) # Extended help ## Interfaces -- `n_obs(::SemObservedData)` -> number of observed data points -- `n_man(::SemObservedData)` -> number of manifest variables +- `nsamples(::SemObservedData)` -> number of observed data points +- `nobserved_vars(::SemObservedData)` -> number of observed (manifested) variables -- `get_data(::SemObservedData)` -> observed data +- `samples(::SemObservedData)` -> observed data - `obs_cov(::SemObservedData)` -> observed.obs_cov - `obs_mean(::SemObservedData)` -> observed.obs_mean -- `data_rowwise(::SemObservedData)` -> observed data, stored as vectors per observation ## Implementation Subtype of `SemObserved` - -## Remarks -(1) the `specification` argument can also be `nothing`, but this turns of checking whether -the observed data/covariance columns are in the correct order! As a result, you should only -use this if you are sure your observed data is in the right format. - -## Additional keyword arguments: -- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object -- `compute_covariance::Bool ) = true`: should the covariance of `data` be computed and stored? -- `rowwise::Bool = false`: should the data be stored also as vectors per observation """ -struct SemObservedData{A, B, C, R} <: SemObserved - data::A - obs_cov::B - obs_mean::C - n_man::Int - n_obs::Int - data_rowwise::R -end - -# error checks -function check_arguments_SemObservedData(kwargs...) - # data is a data frame, - +struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, S <: Number} <: SemObserved + data::D + observed_vars::Vector{Symbol} + obs_cov::Matrix{S} + obs_mean::Vector{S} + nsamples::Int end function SemObservedData(; - specification, data, - obs_colnames = nothing, - spec_colnames = nothing, - meanstructure = false, - compute_covariance = true, - rowwise = false, + observed_vars::Union{AbstractVector, Nothing} = nothing, + specification::Union{SemSpecification, Nothing} = nothing, + observed_var_prefix::Union{Symbol, AbstractString} = :obs, kwargs..., ) - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) - end - - if !isnothing(spec_colnames) - if isnothing(obs_colnames) - try - data = data[:, spec_colnames] - catch - throw( - ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", - ), - ) - end - else - if data isa DataFrame - throw( - ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", - ), - ) - end - - if !(eltype(obs_colnames) <: Symbol) - throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols")) - end - - data = reorder_data(data, spec_colnames, obs_colnames) - end - end + data, obs_vars, _ = + prepare_data(data, observed_vars, specification; observed_var_prefix) + obs_mean, obs_cov = mean_and_cov(data, 1) - if data isa DataFrame - data = Matrix(data) - end - - return SemObservedData( - data, - compute_covariance ? Statistics.cov(data) : nothing, - meanstructure ? vec(Statistics.mean(data, dims = 1)) : nothing, - size(data, 2), - size(data, 1), - rowwise ? [data[i, :] for i in axes(data, 1)] : nothing, - ) + return SemObservedData(data, obs_vars, obs_cov, vec(obs_mean), size(data, 1)) end ############################################################################################ ### Recommended methods ############################################################################################ -n_obs(observed::SemObservedData) = observed.n_obs -n_man(observed::SemObservedData) = observed.n_man - ############################################################################################ ### additional methods ############################################################################################ -get_data(observed::SemObservedData) = observed.data obs_cov(observed::SemObservedData) = observed.obs_cov obs_mean(observed::SemObservedData) = observed.obs_mean -data_rowwise(observed::SemObservedData) = observed.data_rowwise - -############################################################################################ -### Additional functions -############################################################################################ - -# reorder data ----------------------------------------------------------------------------- -function reorder_data(data::AbstractArray, spec_colnames, obs_colnames) - if spec_colnames == obs_colnames - return data - else - obs_positions = Dict(col => i for (i, col) in enumerate(obs_colnames)) - new_positions = [obs_positions[col] for col in spec_colnames] - return data[:, new_positions] - end -end diff --git a/src/observed/get_colnames.jl b/src/observed/get_colnames.jl deleted file mode 100644 index d620de659..000000000 --- a/src/observed/get_colnames.jl +++ /dev/null @@ -1,28 +0,0 @@ -# specification colnames -function get_colnames(specification::ParameterTable) - if !haskey(specification.variables, :sorted_vars) || - (length(specification.variables[:sorted_vars]) == 0) - colnames = specification.variables[:observed_vars] - else - is_obs = [ - var ∈ specification.variables[:observed_vars] for - var in specification.variables[:sorted_vars] - ] - colnames = specification.variables[:sorted_vars][is_obs] - end - return colnames -end - -function get_colnames(specification::RAMMatrices) - if isnothing(specification.colnames) - @warn "Your RAMMatrices do not contain column names. Please make sure the order of variables in your data is correct!" - return nothing - else - colnames = specification.colnames[specification.F_ind] - return colnames - end -end - -function get_colnames(specification::Nothing) - return nothing -end diff --git a/src/observed/missing.jl b/src/observed/missing.jl index 6cfd09391..cf699252e 100644 --- a/src/observed/missing.jl +++ b/src/observed/missing.jl @@ -9,76 +9,44 @@ mutable struct EmMVNModel{A, b, B} fitted::B end +# FIXME type unstable +obs_mean(em::EmMVNModel) = ifelse(em.fitted, em.μ, nothing) +obs_cov(em::EmMVNModel) = ifelse(em.fitted, em.Σ, nothing) + """ For observed data with missing values. # Constructor SemObservedMissing(; - specification, data, - obs_colnames = nothing, + observed_vars = nothing, + specification = nothing, kwargs...) # Arguments -- `specification`: either a `RAMMatrices` or `ParameterTable` object (1) +- `specification`: optional SEM model specification ([`SemSpecification`](@ref)) - `data`: observed data -- `obs_colnames::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame) +- `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame) # Extended help ## Interfaces -- `n_obs(::SemObservedMissing)` -> number of observed data points -- `n_man(::SemObservedMissing)` -> number of manifest variables - -- `get_data(::SemObservedMissing)` -> observed data -- `data_rowwise(::SemObservedMissing)` -> observed data as vector per observation, with missing values deleted +- `nsamples(::SemObservedMissing)` -> number of samples (data points) +- `nobserved_vars(::SemObservedMissing)` -> number of observed variables -- `patterns(::SemObservedMissing)` -> indices of non-missing variables per missing patterns -- `patterns_not(::SemObservedMissing)` -> indices of missing variables per missing pattern -- `rows(::SemObservedMissing)` -> row indices of observed data points that belong to each pattern -- `pattern_n_obs(::SemObservedMissing)` -> number of data points per pattern -- `pattern_nvar_obs(::SemObservedMissing)` -> number of non-missing observed variables per pattern -- `obs_mean(::SemObservedMissing)` -> observed mean per pattern -- `obs_cov(::SemObservedMissing)` -> observed covariance per pattern -- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via optimization maximization +- `samples(::SemObservedMissing)` -> data matrix (contains both measured and missing values) +- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via expectation maximization ## Implementation Subtype of `SemObserved` - -## Remarks -(1) the `specification` argument can also be `nothing`, but this turns of checking whether -the observed data/covariance columns are in the correct order! As a result, you should only -use this if you are sure your observed data is in the right format. - -## Additional keyword arguments: -- `spec_colnames::Vector{Symbol} = nothing`: overwrites column names of the specification object """ -mutable struct SemObservedMissing{ - A <: AbstractArray, - D <: AbstractFloat, - O <: AbstractFloat, - P <: Vector, - P2 <: Vector, - R <: Vector, - PD <: AbstractArray, - PO <: AbstractArray, - PVO <: AbstractArray, - A2 <: AbstractArray, - A3 <: AbstractArray, - S <: EmMVNModel, -} <: SemObserved - data::A - n_man::D - n_obs::O - patterns::P # missing patterns - patterns_not::P2 - rows::R # coresponding rows in data_rowwise - data_rowwise::PD # list of data - pattern_n_obs::PO # observed rows per pattern - pattern_nvar_obs::PVO # number of non-missing variables per pattern - obs_mean::A2 - obs_cov::A3 - em_model::S +struct SemObservedMissing{T <: Real, S <: Real, E <: EmMVNModel} <: SemObserved + data::Matrix{Union{T, Missing}} + observed_vars::Vector{Symbol} + nsamples::Int + patterns::Vector{SemObservedMissingPattern{T, S}} + + em_model::E end ############################################################################################ @@ -86,116 +54,39 @@ end ############################################################################################ function SemObservedMissing(; - specification, data, - obs_colnames = nothing, - spec_colnames = nothing, + observed_vars::Union{AbstractVector, Nothing} = nothing, + specification::Union{SemSpecification, Nothing} = nothing, + observed_var_prefix::Union{Symbol, AbstractString} = :obs, kwargs..., ) - if isnothing(spec_colnames) - spec_colnames = get_colnames(specification) - end - - if !isnothing(spec_colnames) - if isnothing(obs_colnames) - try - data = data[:, spec_colnames] - catch - throw( - ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", - ), - ) - end - else - if data isa DataFrame - throw( - ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", - ), - ) - end - - if !(eltype(obs_colnames) <: Symbol) - throw(ArgumentError("please specify `obs_colnames` as a vector of Symbols")) - end - - data = reorder_data(data, spec_colnames, obs_colnames) - end - end - - if data isa DataFrame - data = Matrix(data) - end - - # remove persons with only missings - keep = Vector{Int64}() - for i in 1:size(data, 1) - if any(.!ismissing.(data[i, :])) - push!(keep, i) - end - end - data = data[keep, :] - - n_obs, n_man = size(data) - - # compute and store the different missing patterns with their rowindices - missings = ismissing.(data) - patterns = [missings[i, :] for i in 1:size(missings, 1)] - - patterns_cart = findall.(!, patterns) - data_rowwise = [data[i, patterns_cart[i]] for i in 1:n_obs] - data_rowwise = convert.(Array{Float64}, data_rowwise) - - remember = Vector{BitArray{1}}() - rows = [Vector{Int64}(undef, 0) for i in 1:size(patterns, 1)] - for i in 1:size(patterns, 1) - unknown = true - for j in 1:size(remember, 1) - if patterns[i] == remember[j] - push!(rows[j], i) - unknown = false - end - end - if unknown - push!(remember, patterns[i]) - push!(rows[size(remember, 1)], i) + data, obs_vars, _ = + prepare_data(data, observed_vars, specification; observed_var_prefix) + nsamples, nobs_vars = size(data) + + # detect all different missing patterns with their row indices + pattern_to_rows = Dict{BitVector, Vector{Int}}() + for (i, datarow) in zip(axes(data, 1), eachrow(data)) + pattern = BitVector(.!ismissing.(datarow)) + if sum(pattern) > 0 # skip all-missing rows + pattern_rows = get!(() -> Vector{Int}(), pattern_to_rows, pattern) + push!(pattern_rows, i) end end - rows = rows[1:length(remember)] - n_patterns = size(rows, 1) + # process each pattern and sort from most to least number of observed vars + patterns = [ + SemObservedMissingPattern(pat, rows, data) for (pat, rows) in pairs(pattern_to_rows) + ] + sort!(patterns, by = nmissed_vars) - # sort by number of missings - sort_n_miss = sortperm(sum.(remember)) - remember = remember[sort_n_miss] - remember_cart = findall.(!, remember) - remember_cart_not = findall.(remember) - rows = rows[sort_n_miss] - - pattern_n_obs = size.(rows, 1) - pattern_nvar_obs = length.(remember_cart) - - cov_mean = [cov_and_mean(data_rowwise[rows]) for rows in rows] - obs_cov = [cov_mean[1] for cov_mean in cov_mean] - obs_mean = [cov_mean[2] for cov_mean in cov_mean] - - em_model = EmMVNModel(zeros(n_man, n_man), zeros(n_man), false) + # allocate EM model (but don't fit) + em_model = EmMVNModel(zeros(nobs_vars, nobs_vars), zeros(nobs_vars), false) return SemObservedMissing( - data, - Float64(n_man), - Float64(n_obs), - remember_cart, - remember_cart_not, - rows, - data_rowwise, - Float64.(pattern_n_obs), - Float64.(pattern_nvar_obs), - obs_mean, - obs_cov, + convert(Matrix{Union{nonmissingtype(eltype(data)), Missing}}, data), + obs_vars, + nsamples, + patterns, em_model, ) end @@ -204,20 +95,10 @@ end ### Recommended methods ############################################################################################ -n_obs(observed::SemObservedMissing) = observed.n_obs -n_man(observed::SemObservedMissing) = observed.n_man - ############################################################################################ ### Additional methods ############################################################################################ -get_data(observed::SemObservedMissing) = observed.data -patterns(observed::SemObservedMissing) = observed.patterns -patterns_not(observed::SemObservedMissing) = observed.patterns_not -rows(observed::SemObservedMissing) = observed.rows -data_rowwise(observed::SemObservedMissing) = observed.data_rowwise -pattern_n_obs(observed::SemObservedMissing) = observed.pattern_n_obs -pattern_nvar_obs(observed::SemObservedMissing) = observed.pattern_nvar_obs -obs_mean(observed::SemObservedMissing) = observed.obs_mean -obs_cov(observed::SemObservedMissing) = observed.obs_cov em_model(observed::SemObservedMissing) = observed.em_model +obs_mean(observed::SemObservedMissing) = obs_mean(em_model(observed)) +obs_cov(observed::SemObservedMissing) = obs_cov(em_model(observed)) diff --git a/src/observed/missing_pattern.jl b/src/observed/missing_pattern.jl new file mode 100644 index 000000000..6ac6a360b --- /dev/null +++ b/src/observed/missing_pattern.jl @@ -0,0 +1,45 @@ +# data associated with the observed variables that all share the same missingness pattern +# variables that have values within that pattern are termed "measured" +# variables that have no measurements are termed "missing" +struct SemObservedMissingPattern{T, S} + measured_mask::BitVector # measured vars mask + miss_mask::BitVector # missing vars mask + rows::Vector{Int} # rows in original data + data::Matrix{T} # non-missing submatrix of data + + measured_mean::Vector{S} # means of measured vars + measured_cov::Matrix{S} # covariance of measured vars +end + +function SemObservedMissingPattern( + measured_mask::BitVector, + rows::AbstractVector{<:Integer}, + data::AbstractMatrix, +) + T = nonmissingtype(eltype(data)) + + pat_data = convert(Matrix{T}, view(data, rows, measured_mask)) + if size(pat_data, 1) > 1 + pat_mean, pat_cov = mean_and_cov(pat_data, 1, corrected = false) + @assert size(pat_cov) == (size(pat_data, 2), size(pat_data, 2)) + else + pat_mean = reshape(pat_data[1, :], 1, :) + # 1x1 covariance matrix since it is not meant to be used + pat_cov = fill(zero(T), 1, 1) + end + + return SemObservedMissingPattern{T, eltype(pat_mean)}( + measured_mask, + .!measured_mask, + rows, + pat_data, + dropdims(pat_mean, dims = 1), + pat_cov, + ) +end + +nobserved_vars(pat::SemObservedMissingPattern) = length(pat.measured_mask) +nsamples(pat::SemObservedMissingPattern) = length(pat.rows) + +nmeasured_vars(pat::SemObservedMissingPattern) = length(pat.measured_mean) +nmissed_vars(pat::SemObservedMissingPattern) = nobserved_vars(pat) - nmeasured_vars(pat) diff --git a/src/diff/Empty.jl b/src/optimizer/Empty.jl similarity index 92% rename from src/diff/Empty.jl rename to src/optimizer/Empty.jl index 57fa9ee98..45a20db55 100644 --- a/src/diff/Empty.jl +++ b/src/optimizer/Empty.jl @@ -15,13 +15,13 @@ an optimizer part. Subtype of `SemOptimizer`. """ -struct SemOptimizerEmpty <: SemOptimizer end +struct SemOptimizerEmpty <: SemOptimizer{:Empty} end ############################################################################################ ### Constructor ############################################################################################ -# SemOptimizerEmpty(;kwargs...) = SemOptimizerEmpty() +SemOptimizer{:Empty}() = SemOptimizerEmpty() ############################################################################################ ### Recommended methods diff --git a/src/optimizer/NLopt.jl b/src/optimizer/NLopt.jl deleted file mode 100644 index ffe2ffed0..000000000 --- a/src/optimizer/NLopt.jl +++ /dev/null @@ -1,138 +0,0 @@ -############################################################################################ -### connect to NLopt.jl as backend -############################################################################################ - -# wrapper to define the objective -function sem_wrap_nlopt(par, G, model::AbstractSem) - need_gradient = length(G) != 0 - if need_gradient - return objective_gradient!(G, model, par) - else - return objective!(model, par) - end -end - -mutable struct NLoptResult - result::Any - problem::Any -end - -optimizer(res::NLoptResult) = res.problem.algorithm -n_iterations(res::NLoptResult) = res.problem.numevals -convergence(res::NLoptResult) = res.result[3] - -# construct SemFit from fitted NLopt object -function SemFit_NLopt(optimization_result, model::AbstractSem, start_val, opt) - return SemFit( - optimization_result[1], - optimization_result[2], - start_val, - model, - NLoptResult(optimization_result, opt), - ) -end - -# sem_fit method -function sem_fit( - model::Sem{O, I, L, D}; - start_val = start_val, - kwargs..., -) where {O, I, L, D <: SemOptimizerNLopt} - - # starting values - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - # construct the NLopt problem - opt = construct_NLopt_problem( - model.optimizer.algorithm, - model.optimizer.options, - length(start_val), - ) - set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) - - if !isnothing(model.optimizer.local_algorithm) - opt_local = construct_NLopt_problem( - model.optimizer.local_algorithm, - model.optimizer.local_options, - length(start_val), - ) - opt.local_optimizer = opt_local - end - - # fit - result = NLopt.optimize(opt, start_val) - - return SemFit_NLopt(result, model, start_val, opt) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, - kwargs..., -) where {N, T, V, D <: SemOptimizerNLopt, S} - - # starting values - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - # construct the NLopt problem - opt = construct_NLopt_problem( - model.optimizer.algorithm, - model.optimizer.options, - length(start_val), - ) - set_NLopt_constraints!(opt, model.optimizer) - opt.min_objective = (par, G) -> sem_wrap_nlopt(par, G, model) - - if !isnothing(model.optimizer.local_algorithm) - opt_local = construct_NLopt_problem( - model.optimizer.local_algorithm, - model.optimizer.local_options, - length(start_val), - ) - opt.local_optimizer = opt_local - end - - # fit - result = NLopt.optimize(opt, start_val) - - return SemFit_NLopt(result, model, start_val, opt) -end - -############################################################################################ -### additional functions -############################################################################################ - -function construct_NLopt_problem(algorithm, options, npar) - opt = Opt(algorithm, npar) - - for key in keys(options) - setproperty!(opt, key, options[key]) - end - - return opt -end - -function set_NLopt_constraints!(opt, optimizer::SemOptimizerNLopt) - for con in optimizer.inequality_constraints - inequality_constraint!(opt::Opt, con.f, con.tol) - end - for con in optimizer.equality_constraints - equality_constraint!(opt::Opt, con.f, con.tol) - end -end - -############################################################################################ -# pretty printing -############################################################################################ - -function Base.show(io::IO, result::NLoptResult) - print(io, "Optimizer status: $(result.result[3]) \n") - print(io, "Minimum: $(round(result.result[1]; digits = 2)) \n") - print(io, "Algorithm: $(result.problem.algorithm) \n") - print(io, "No. evaluations: $(result.problem.numevals) \n") -end diff --git a/src/optimizer/abstract.jl b/src/optimizer/abstract.jl new file mode 100644 index 000000000..2487b7c52 --- /dev/null +++ b/src/optimizer/abstract.jl @@ -0,0 +1,122 @@ +""" + fit([optim::SemOptimizer], model::AbstractSem; + [engine::Symbol], start_val = start_val, kwargs...) + +Return the fitted `model`. + +# Arguments +- `optim`: [`SemOptimizer`](@ref) to use for fitting. + If omitted, a new optimizer is constructed as `SemOptimizer(; engine, kwargs...)`. +- `model`: `AbstractSem` to fit +- `engine`: the optimization engine to use, default is `:Optim` +- `start_val`: a vector or a dictionary of starting parameter values, + or function to compute them (1) +- `kwargs...`: keyword arguments, passed to optimization engine constructor and + `start_val` function + +(1) available functions are `start_fabin3`, `start_simple` and `start_partable`. +For more information, we refer to the individual documentations and +the online documentation on [Starting values](@ref). + +# Examples +```julia +fit( + my_model; + start_val = start_simple, + start_covariances_latent = 0.5) +``` +""" +function fit(optim::SemOptimizer, model::AbstractSem; start_val = nothing, kwargs...) + start_params = prepare_start_params(start_val, model; kwargs...) + @assert start_params isa AbstractVector + @assert length(start_params) == nparams(model) + + fit(optim, model, start_params; kwargs...) +end + +fit(model::AbstractSem; engine::Symbol = :Optim, start_val = nothing, kwargs...) = +fit(SemOptimizer(; engine, kwargs...), model; start_val, kwargs...) + +# fallback method +fit(optim::SemOptimizer, model::AbstractSem, start_params; kwargs...) = + error("Optimizer $(optim) support not implemented.") + +# FABIN3 is the default method for single models +prepare_start_params(start_val::Nothing, model::AbstractSemSingle; kwargs...) = + start_fabin3(model; kwargs...) + +# simple algorithm is the default method for ensembles +prepare_start_params(start_val::Nothing, model::AbstractSem; kwargs...) = + start_simple(model; kwargs...) + +function prepare_start_params(start_val::AbstractVector, model::AbstractSem; kwargs...) + (length(start_val) == nparams(model)) || throw( + DimensionMismatch( + "The length of `start_val` vector ($(length(start_val))) does not match the number of model parameters ($(nparams(model))).", + ), + ) + return start_val +end + +function prepare_start_params(start_val::AbstractDict, model::AbstractSem; kwargs...) + return [start_val[param] for param in params(model)] # convert to a vector +end + +# get from the ParameterTable (potentially from a different model with match param names) +# TODO: define kwargs that instruct to get values from "estimate" and "fixed" +function prepare_start_params(start_val::ParameterTable, model::AbstractSem; kwargs...) + res = zeros(eltype(start_val.columns[:start]), nparams(model)) + param_indices = Dict(param => i for (i, param) in enumerate(params(model))) + + for (param, startval) in zip(start_val.columns[:param], start_val.columns[:start]) + (param == :const) && continue + par_ind = get(param_indices, param, nothing) + if !isnothing(par_ind) + isfinite(startval) && (res[par_ind] = startval) + else + throw( + ErrorException( + "Model parameter $(param) not found in the parameter table.", + ), + ) + end + end + return res +end + +# prepare a vector of model parameter bounds (BOUND=:lower or BOUND=:lower): +# use the user-specified "bounds" vector "as is" +function prepare_param_bounds( + ::Val{BOUND}, + bounds::AbstractVector{<:Number}, + model::AbstractSem; + default::Number, # unused for vector bounds + variance_default::Number, # unused for vector bounds +) where {BOUND} + length(bounds) == nparams(model) || throw( + DimensionMismatch( + "The length of `bounds` vector ($(length(bounds))) does not match the number of model parameters ($(nparams(model))).", + ), + ) + return bounds +end + +# prepare a vector of model parameter bounds +# given the "bounds" dictionary and default values +function prepare_param_bounds( + ::Val{BOUND}, + bounds::Union{AbstractDict, Nothing}, + model::AbstractSem; + default::Number, + variance_default::Number, +) where {BOUND} + varparams = Set(variance_params(model.implied.ram_matrices)) + res = [ + begin + def = in(p, varparams) ? variance_default : default + isnothing(bounds) ? def : get(bounds, p, def) + end for p in SEM.params(model) + ] + + return res +end diff --git a/src/optimizer/documentation.jl b/src/optimizer/documentation.jl deleted file mode 100644 index 83b4f7a98..000000000 --- a/src/optimizer/documentation.jl +++ /dev/null @@ -1,22 +0,0 @@ -""" - sem_fit(model::AbstractSem; start_val = start_val, kwargs...) - -Return the fitted `model`. - -# Arguments -- `model`: `AbstractSem` to fit -- `start_val`: vector of starting values or function to compute starting values (1) -- `kwargs...`: keyword arguments, passed to starting value functions - -(1) available options are `start_fabin3`, `start_simple` and `start_partable`. -For more information, we refer to the individual documentations and the online documentation on [Starting values](@ref). - -# Examples -```julia -sem_fit( - my_model; - start_val = start_simple, - start_covariances_latent = 0.5) -``` -""" -function sem_fit end diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl index 68617fdb8..8f5404bc2 100644 --- a/src/optimizer/optim.jl +++ b/src/optimizer/optim.jl @@ -1,31 +1,89 @@ ## connect to Optim.jl as backend -function sem_wrap_optim(par, F, G, H, model::AbstractSem) - if !isnothing(F) - if !isnothing(G) - if !isnothing(H) - return objective_gradient_hessian!(G, H, model, par) - else - return objective_gradient!(G, model, par) - end - else - if !isnothing(H) - return objective_hessian!(H, model, par) - else - return objective!(model, par) - end - end - else - if !isnothing(G) - if !isnothing(H) - gradient_hessian!(G, H, model, par) - else - gradient!(G, model, par) - end - end - end - return nothing + +############################################################################################ +### Types and Constructor +############################################################################################ +""" + SemOptimizerOptim{A, B} <: SemOptimizer{:Optim} + +Connects to `Optim.jl` as the optimization backend. + +# Constructor + + SemOptimizerOptim(; + algorithm = LBFGS(), + options = Optim.Options(;f_tol = 1e-10, x_tol = 1.5e-8), + kwargs...) + +# Arguments +- `algorithm`: optimization algorithm. +- `options::Optim.Options`: options for the optimization algorithm + +# Usage +All algorithms and options from the Optim.jl library are available, for more information see +the Optim.jl online documentation. + +# Examples +```julia +my_optimizer = SemOptimizerOptim() + +# hessian based optimization with backtracking linesearch and modified initial step size +using Optim, LineSearches + +my_newton_optimizer = SemOptimizerOptim( + algorithm = Newton( + ;linesearch = BackTracking(order=3), + alphaguess = InitialHagerZhang() + ) +) +``` + +# Extended help + +## Constrained optimization + +When using the `Fminbox` or `SAMIN` constrained optimization algorithms, +the vector or dictionary of lower and upper bounds for each model parameter can be specified +via `lower_bounds` and `upper_bounds` keyword arguments. +Alternatively, the `lower_bound` and `upper_bound` keyword arguments can be used to specify +the default bound for all non-variance model parameters, +and the `variance_lower_bound` and `variance_upper_bound` keyword -- +for the variance parameters (the diagonal of the *S* matrix). + +## Interfaces +- `algorithm(::SemOptimizerOptim)` +- `options(::SemOptimizerOptim)` + +## Implementation + +Subtype of `SemOptimizer`. +""" +mutable struct SemOptimizerOptim{A, B} <: SemOptimizer{:Optim} + algorithm::A + options::B end +SemOptimizer{:Optim}(args...; kwargs...) = SemOptimizerOptim(args...; kwargs...) + +SemOptimizerOptim(; + algorithm = LBFGS(), + options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8), + kwargs..., +) = SemOptimizerOptim(algorithm, options) + +############################################################################################ +### Recommended methods +############################################################################################ + +update_observed(optimizer::SemOptimizerOptim, observed::SemObserved; kwargs...) = optimizer + +############################################################################################ +### additional methods +############################################################################################ + +algorithm(optimizer::SemOptimizerOptim) = optimizer.algorithm +options(optimizer::SemOptimizerOptim) = optimizer.options + function SemFit( optimization_result::Optim.MultivariateOptimizationResults, model::AbstractSem, @@ -44,38 +102,50 @@ optimizer(res::Optim.MultivariateOptimizationResults) = Optim.summary(res) n_iterations(res::Optim.MultivariateOptimizationResults) = Optim.iterations(res) convergence(res::Optim.MultivariateOptimizationResults) = Optim.converged(res) -function sem_fit( - model::AbstractSemSingle{O, I, L, D}; - start_val = start_val, - kwargs..., -) where {O, I, L, D <: SemOptimizerOptim} - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) - end - - result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), - start_val, - model.optimizer.algorithm, - model.optimizer.options, - ) - return SemFit(result, model, start_val) -end - -function sem_fit( - model::SemEnsemble{N, T, V, D, S}; - start_val = start_val, +function fit( + optim::SemOptimizerOptim, + model::AbstractSem, + start_params::AbstractVector; + lower_bounds::Union{AbstractVector, AbstractDict, Nothing} = nothing, + upper_bounds::Union{AbstractVector, AbstractDict, Nothing} = nothing, + lower_bound = -Inf, + upper_bound = Inf, + variance_lower_bound::Number = 0.0, + variance_upper_bound::Number = Inf, kwargs..., -) where {N, T, V, D <: SemOptimizerOptim, S} - if !isa(start_val, Vector) - start_val = start_val(model; kwargs...) +) + # setup lower/upper bounds if the algorithm supports it + if optim.algorithm isa Optim.Fminbox || optim.algorithm isa Optim.SAMIN + lbounds = prepare_param_bounds( + Val(:lower), + lower_bounds, + model, + default = lower_bound, + variance_default = variance_lower_bound, + ) + ubounds = prepare_param_bounds( + Val(:upper), + upper_bounds, + model, + default = upper_bound, + variance_default = variance_upper_bound, + ) + start_params = clamp.(start_params, lbounds, ubounds) + result = Optim.optimize( + Optim.only_fgh!((F, G, H, par) -> evaluate!(F, G, H, model, par)), + lbounds, + ubounds, + start_params, + optim.algorithm, + optim.options, + ) + else + result = Optim.optimize( + Optim.only_fgh!((F, G, H, par) -> evaluate!(F, G, H, model, par)), + start_params, + optim.algorithm, + optim.options, + ) end - - result = Optim.optimize( - Optim.only_fgh!((F, G, H, par) -> sem_wrap_optim(par, F, G, H, model)), - start_val, - model.optimizer.algorithm, - model.optimizer.options, - ) - return SemFit(result, model, start_val) + return SemFit(result, model, start_params) end diff --git a/src/package_extensions/SEMNLOptExt.jl b/src/package_extensions/SEMNLOptExt.jl new file mode 100644 index 000000000..69721ac94 --- /dev/null +++ b/src/package_extensions/SEMNLOptExt.jl @@ -0,0 +1,69 @@ +""" +Connects to `NLopt.jl` as the optimization backend. +Only usable if `NLopt.jl` is loaded in the current Julia session! + +# Constructor + + SemOptimizerNLopt(; + algorithm = :LD_LBFGS, + options = Dict{Symbol, Any}(), + local_algorithm = nothing, + local_options = Dict{Symbol, Any}(), + equality_constraints = Vector{NLoptConstraint}(), + inequality_constraints = Vector{NLoptConstraint}(), + kwargs...) + +# Arguments +- `algorithm`: optimization algorithm. +- `options::Dict{Symbol, Any}`: options for the optimization algorithm +- `local_algorithm`: local optimization algorithm +- `local_options::Dict{Symbol, Any}`: options for the local optimization algorithm +- `equality_constraints::Vector{NLoptConstraint}`: vector of equality constraints +- `inequality_constraints::Vector{NLoptConstraint}`: vector of inequality constraints + +# Example +```julia +my_optimizer = SemOptimizerNLopt() + +# constrained optimization with augmented lagrangian +my_constrained_optimizer = SemOptimizerNLopt(; + algorithm = :AUGLAG, + local_algorithm = :LD_LBFGS, + local_options = Dict(:ftol_rel => 1e-6), + inequality_constraints = NLoptConstraint(;f = my_constraint, tol = 0.0), +) +``` + +# Usage +All algorithms and options from the NLopt library are available, for more information see +the NLopt.jl package and the NLopt online documentation. +For information on how to use inequality and equality constraints, +see [Constrained optimization](@ref) in our online documentation. + +# Extended help + +## Interfaces +- `algorithm(::SemOptimizerNLopt)` +- `local_algorithm(::SemOptimizerNLopt)` +- `options(::SemOptimizerNLopt)` +- `local_options(::SemOptimizerNLopt)` +- `equality_constraints(::SemOptimizerNLopt)` +- `inequality_constraints(::SemOptimizerNLopt)` + +## Implementation + +Subtype of `SemOptimizer`. +""" +struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer{:NLopt} + algorithm::A + local_algorithm::A2 + options::B + local_options::B2 + equality_constraints::C + inequality_constraints::C +end + +Base.@kwdef struct NLoptConstraint + f::Any + tol = 0.0 +end diff --git a/src/package_extensions/SEMProximalOptExt.jl b/src/package_extensions/SEMProximalOptExt.jl new file mode 100644 index 000000000..5d4007504 --- /dev/null +++ b/src/package_extensions/SEMProximalOptExt.jl @@ -0,0 +1,21 @@ +""" +Connects to `ProximalAlgorithms.jl` as the optimization backend. + +# Constructor + + SemOptimizerProximal(; + algorithm = ProximalAlgorithms.PANOC(), + operator_g, + operator_h = nothing, + kwargs..., + +# Arguments +- `algorithm`: optimization algorithm. +- `operator_g`: gradient of the objective function +- `operator_h`: optional hessian of the objective function +""" +mutable struct SemOptimizerProximal{A, B, C} <: SemOptimizer{:Proximal} + algorithm::A + operator_g::B + operator_h::C +end diff --git a/src/types.jl b/src/types.jl index 803bc733a..64a4acbac 100644 --- a/src/types.jl +++ b/src/types.jl @@ -4,12 +4,38 @@ "Most abstract supertype for all SEMs" abstract type AbstractSem end -"Supertype for all single SEMs, e.g. SEMs that have at least the fields `observed`, `imply`, `loss` and `optimizer`" -abstract type AbstractSemSingle{O, I, L, D} <: AbstractSem end +"Supertype for all single SEMs, e.g. SEMs that have at least the fields `observed`, `implied`, `loss`" +abstract type AbstractSemSingle{O, I, L} <: AbstractSem end "Supertype for all collections of multiple SEMs" abstract type AbstractSemCollection <: AbstractSem end +"Meanstructure trait for `SemImplied` subtypes" +abstract type MeanStruct end +"Indicates that `SemImplied` subtype supports mean structure" +struct HasMeanStruct <: MeanStruct end +"Indicates that `SemImplied` subtype does not support mean structure" +struct NoMeanStruct <: MeanStruct end + +# default implementation +MeanStruct(::Type{T}) where {T} = + hasfield(T, :meanstruct) ? fieldtype(T, :meanstruct) : + error("Objects of type $T do not support MeanStruct trait") + +MeanStruct(semobj) = MeanStruct(typeof(semobj)) + +"Hessian Evaluation trait for `SemImplied` and `SemLossFunction` subtypes" +abstract type HessianEval end +struct ApproxHessian <: HessianEval end +struct ExactHessian <: HessianEval end + +# default implementation +HessianEval(::Type{T}) where {T} = + hasfield(T, :hessianeval) ? fieldtype(T, :hessianeval) : + error("Objects of type $T do not support HessianEval trait") + +HessianEval(semobj) = HessianEval(typeof(semobj)) + "Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`." abstract type SemLossFunction end @@ -58,7 +84,18 @@ Supertype of all objects that can serve as the `optimizer` field of a SEM. Connects the SEM to its optimization backend and controls options like the optimization algorithm. If you want to connect the SEM package to a new optimization backend, you should implement a subtype of SemOptimizer. """ -abstract type SemOptimizer end +abstract type SemOptimizer{E} end + +engine(::Type{SemOptimizer{E}}) where {E} = E +engine(optimizer::SemOptimizer) = engine(typeof(optimizer)) + +SemOptimizer(args...; engine::Symbol = :Optim, kwargs...) = + SemOptimizer{engine}(args...; kwargs...) + +# fallback optimizer constructor +function SemOptimizer{E}(args...; kwargs...) where {E} + throw(ErrorException("$E optimizer is not supported.")) +end """ Supertype of all objects that can serve as the observed field of a SEM. @@ -68,130 +105,142 @@ If you have a special kind of data, e.g. ordinal data, you should implement a su abstract type SemObserved end """ -Supertype of all objects that can serve as the imply field of a SEM. +Supertype of all objects that can serve as the implied field of a SEM. Computed model-implied values that should be compared with the observed data to find parameter estimates, e. g. the model implied covariance or mean. -If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImply. +If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImplied. """ -abstract type SemImply end +abstract type SemImplied end -"Subtype of SemImply for all objects that can serve as the imply field of a SEM and use some form of symbolic precomputation." -abstract type SemImplySymbolic <: SemImply end +"Subtype of SemImplied for all objects that can serve as the implied field of a SEM and use some form of symbolic precomputation." +abstract type SemImpliedSymbolic <: SemImplied end """ - Sem(;observed = SemObservedData, imply = RAM, loss = SemML, optimizer = SemOptimizerOptim, kwargs...) + Sem(;observed = SemObservedData, implied = RAM, loss = SemML, kwargs...) Constructor for the basic `Sem` type. -All additional kwargs are passed down to the constructors for the observed, imply, loss and optimizer fields. +All additional kwargs are passed down to the constructors for the observed, implied, and loss fields. # Arguments - `observed`: object of subtype `SemObserved` or a constructor. -- `imply`: object of subtype `SemImply` or a constructor. +- `implied`: object of subtype `SemImplied` or a constructor. - `loss`: object of subtype `SemLossFunction`s or constructor; or a tuple of such. -- `optimizer`: object of subtype `SemOptimizer` or a constructor. Returns a Sem with fields - `observed::SemObserved`: Stores observed data, sample statistics, etc. See also [`SemObserved`](@ref). -- `imply::SemImply`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImply`](@ref). +- `implied::SemImplied`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImplied`](@ref). - `loss::SemLoss`: Computes the objective and gradient of a sum of loss functions. See also [`SemLoss`](@ref). -- `optimizer::SemOptimizer`: Connects the model to the optimizer. See also [`SemOptimizer`](@ref). """ -mutable struct Sem{O <: SemObserved, I <: SemImply, L <: SemLoss, D <: SemOptimizer} <: - AbstractSemSingle{O, I, L, D} +mutable struct Sem{O <: SemObserved, I <: SemImplied, L <: SemLoss} <: + AbstractSemSingle{O, I, L} observed::O - imply::I + implied::I loss::L - optimizer::D end ############################################################################################ # automatic differentiation ############################################################################################ """ - SemFiniteDiff(;observed = SemObservedData, imply = RAM, loss = SemML, optimizer = SemOptimizerOptim, kwargs...) + SemFiniteDiff(;observed = SemObservedData, implied = RAM, loss = SemML, kwargs...) -Constructor for `SemFiniteDiff`. -All additional kwargs are passed down to the constructors for the observed, imply, loss and optimizer fields. +A wrapper around [`Sem`](@ref) that substitutes dedicated evaluation of gradient and hessian with +finite difference approximation. # Arguments - `observed`: object of subtype `SemObserved` or a constructor. -- `imply`: object of subtype `SemImply` or a constructor. +- `implied`: object of subtype `SemImplied` or a constructor. - `loss`: object of subtype `SemLossFunction`s or constructor; or a tuple of such. -- `optimizer`: object of subtype `SemOptimizer` or a constructor. Returns a Sem with fields - `observed::SemObserved`: Stores observed data, sample statistics, etc. See also [`SemObserved`](@ref). -- `imply::SemImply`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImply`](@ref). +- `implied::SemImplied`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImplied`](@ref). - `loss::SemLoss`: Computes the objective and gradient of a sum of loss functions. See also [`SemLoss`](@ref). -- `optimizer::SemOptimizer`: Connects the model to the optimizer. See also [`SemOptimizer`](@ref). """ -struct SemFiniteDiff{O <: SemObserved, I <: SemImply, L <: SemLoss, D <: SemOptimizer} <: - AbstractSemSingle{O, I, L, D} +struct SemFiniteDiff{O <: SemObserved, I <: SemImplied, L <: SemLoss} <: + AbstractSemSingle{O, I, L} observed::O - imply::I + implied::I loss::L - optimizer::D end ############################################################################################ # ensemble models ############################################################################################ """ - SemEnsemble(models..., optimizer = SemOptimizerOptim, weights = nothing, kwargs...) + (1) SemEnsemble(models..., weights = nothing, kwargs...) + + (2) SemEnsemble(;specification, data, groups, column = :group, kwargs...) -Constructor for ensemble models. +Constructor for ensemble models. (2) can be used to conveniently specify multigroup models. # Arguments - `models...`: `AbstractSem`s. -- `optimizer`: object of subtype `SemOptimizer` or a constructor. - `weights::Vector`: Weights for each model. Defaults to the number of observed data points. +- `specification::EnsembleParameterTable`: Model specification. +- `data::DataFrame`: Observed data. Must contain a `column` of type `Vector{Symbol}` that contains the group. +- `groups::Vector{Symbol}`: Group names. +- `column::Symbol`: Name of the column in `data` that contains the group. -All additional kwargs are passed down to the constructor for the optimizer field. +All additional kwargs are passed down to the model parts. Returns a SemEnsemble with fields - `n::Int`: Number of models. - `sems::Tuple`: `AbstractSem`s. - `weights::Vector`: Weights for each model. -- `optimizer::SemOptimizer`: Connects the model to the optimizer. See also [`SemOptimizer`](@ref). -- `identifier::Dict`: Stores parameter labels and their position. +- `param_labels::Vector`: Stores parameter labels and their position. """ -struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, D, I} <: AbstractSemCollection +struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, I} <: AbstractSemCollection n::N sems::T weights::V - optimizer::D - identifier::I + param_labels::I end -function SemEnsemble(models...; optimizer = SemOptimizerOptim, weights = nothing, kwargs...) +# constructor from multiple models +function SemEnsemble(models...; weights = nothing, kwargs...) n = length(models) - npar = n_par(models[1]) # default weights if isnothing(weights) - nobs_total = sum(n_obs, models) - weights = [n_obs(model) / nobs_total for model in models] + nsamples_total = sum(nsamples, models) + weights = [nsamples(model) / nsamples_total for model in models] end - # check identifier equality - id = identifier(models[1]) + # check parameters equality + param_labels = SEM.param_labels(models[1]) for model in models - if id != identifier(model) - throw(ErrorException("The identifier of your models do not match. \n + if param_labels != SEM.param_labels(model) + throw(ErrorException("The parameters of your models do not match. \n Maybe you tried to specify models of an ensemble via ParameterTables. \n In that case, you may use RAMMatrices instead.")) end end - # optimizer - if !isa(optimizer, SemOptimizer) - optimizer = optimizer(; kwargs...) - end + return SemEnsemble(n, models, weights, param_labels) +end - return SemEnsemble(n, models, weights, optimizer, id) +# constructor from EnsembleParameterTable and data set +function SemEnsemble(; specification, data, groups, column = :group, kwargs...) + if specification isa EnsembleParameterTable + specification = convert(Dict{Symbol, RAMMatrices}, specification) + end + models = [] + for group in groups + ram_matrices = specification[group] + data_group = select(filter(r -> r[column] == group, data), Not(column)) + if iszero(nrow(data_group)) + error("Your data does not contain any observations from group `$(group)`.") + end + model = Sem(; specification = ram_matrices, data = data_group, kwargs...) + push!(models, model) + end + return SemEnsemble(models...; weights = nothing, kwargs...) end +param_labels(ensemble::SemEnsemble) = ensemble.param_labels + """ n_models(ensemble::SemEnsemble) -> Integer @@ -210,40 +259,10 @@ models(ensemble::SemEnsemble) = ensemble.sems Returns the weights of an ensemble model. """ weights(ensemble::SemEnsemble) = ensemble.weights -""" - optimizer(ensemble::SemEnsemble) -> SemOptimizer -Returns the optimizer part of an ensemble model. """ -optimizer(ensemble::SemEnsemble) = ensemble.optimizer - -############################################################################################ -# additional methods -############################################################################################ +Base type for all SEM specifications. """ - observed(model::AbstractSemSingle) -> SemObserved +abstract type SemSpecification end -Returns the observed part of a model. -""" -observed(model::AbstractSemSingle) = model.observed - -""" - imply(model::AbstractSemSingle) -> SemImply - -Returns the imply part of a model. -""" -imply(model::AbstractSemSingle) = model.imply - -""" - loss(model::AbstractSemSingle) -> SemLoss - -Returns the loss part of a model. -""" -loss(model::AbstractSemSingle) = model.loss - -""" - optimizer(model::AbstractSemSingle) -> SemOptimizer - -Returns the optimizer part of a model. -""" -optimizer(model::AbstractSemSingle) = model.optimizer +abstract type AbstractParameterTable <: SemSpecification end diff --git a/test/Project.toml b/test/Project.toml index c5124c659..3cf1e50e3 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -6,10 +6,14 @@ JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" +ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/examples/examples.jl b/test/examples/examples.jl index a1e0f2c28..e088ffa92 100644 --- a/test/examples/examples.jl +++ b/test/examples/examples.jl @@ -9,3 +9,6 @@ end @safetestset "Multigroup" begin include("multigroup/multigroup.jl") end +@safetestset "Proximal" begin + include("proximal/proximal.jl") +end diff --git a/test/examples/helper.jl b/test/examples/helper.jl index 3bb4e217a..4ff9bd507 100644 --- a/test/examples/helper.jl +++ b/test/examples/helper.jl @@ -1,61 +1,68 @@ -function test_gradient(model, parameters; rtol = 1e-10, atol = 0) - true_grad = - FiniteDiff.finite_difference_gradient(Base.Fix1(objective!, model), parameters) - gradient = similar(parameters) +using LinearAlgebra: norm + +function is_extended_tests() + return lowercase(get(ENV, "JULIA_EXTENDED_TESTS", "false")) == "true" +end + +function test_gradient(model, params; rtol = 1e-10, atol = 0) + @test nparams(model) == length(params) + + true_grad = FiniteDiff.finite_difference_gradient(Base.Fix1(objective!, model), params) + gradient = similar(params) # F and G fill!(gradient, NaN) - gradient!(gradient, model, parameters) + gradient!(gradient, model, params) @test gradient ≈ true_grad rtol = rtol atol = atol # only G fill!(gradient, NaN) - objective_gradient!(gradient, model, parameters) + objective_gradient!(gradient, model, params) @test gradient ≈ true_grad rtol = rtol atol = atol end -function test_hessian(model, parameters; rtol = 1e-4, atol = 0) +function test_hessian(model, params; rtol = 1e-4, atol = 0) true_hessian = - FiniteDiff.finite_difference_hessian(Base.Fix1(objective!, model), parameters) - hessian = similar(parameters, size(true_hessian)) - gradient = similar(parameters) + FiniteDiff.finite_difference_hessian(Base.Fix1(objective!, model), params) + hessian = similar(params, size(true_hessian)) + gradient = similar(params) # H fill!(hessian, NaN) - hessian!(hessian, model, parameters) + hessian!(hessian, model, params) @test hessian ≈ true_hessian rtol = rtol atol = atol # F and H fill!(hessian, NaN) - objective_hessian!(hessian, model, parameters) + objective_hessian!(hessian, model, params) @test hessian ≈ true_hessian rtol = rtol atol = atol # G and H fill!(hessian, NaN) - gradient_hessian!(gradient, hessian, model, parameters) + gradient_hessian!(gradient, hessian, model, params) @test hessian ≈ true_hessian rtol = rtol atol = atol # F, G and H fill!(hessian, NaN) - objective_gradient_hessian!(gradient, hessian, model, parameters) + objective_gradient_hessian!(gradient, hessian, model, params) @test hessian ≈ true_hessian rtol = rtol atol = atol end fitmeasure_names_ml = Dict( :AIC => "aic", :BIC => "bic", - :df => "df", + :dof => "df", :χ² => "chisq", :p_value => "pvalue", - :n_par => "npar", + :nparams => "npar", :RMSEA => "rmsea", ) fitmeasure_names_ls = Dict( - :df => "df", + :dof => "df", :χ² => "chisq", :p_value => "pvalue", - :n_par => "npar", + :nparams => "npar", :RMSEA => "rmsea", ) @@ -72,264 +79,63 @@ function test_fitmeasures( end end -function compare_estimates( +function test_estimates( partable::ParameterTable, partable_lav; rtol = 1e-10, atol = 0, col = :estimate, lav_col = :est, + lav_group = nothing, + skip::Bool = false, ) - correct = [] - - for i in findall(partable.columns[:free]) - from = partable.columns[:from][i] - to = partable.columns[:to][i] - type = partable.columns[:parameter_type][i] - estimate = partable.columns[col][i] - - if from == Symbol("1") - lav_ind = - findall((partable_lav.lhs .== String(to)) .& (partable_lav.op .== "~1")) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Parameter from: $from, to: $to, type: $type, could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - - else - if type == :↔ - type = "~~" - elseif type == :→ - if (from ∈ partable.variables[:latent_vars]) & - (to ∈ partable.variables[:observed_vars]) - type = "=~" - else - type = "~" - from, to = to, from - end - end - - if type == "~~" - lav_ind = findall( - ( - ( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) - ) .| ( - (partable_lav.lhs .== String(to)) .& - (partable_lav.rhs .== String(from)) - ) - ) .& (partable_lav.op .== type), - ) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Parameter from: $from, to: $to, type: $type, could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - - else - lav_ind = findall( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) .& - (partable_lav.op .== type), - ) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Parameter from: $from, to: $to, type: $type, could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - end - end + actual = StructuralEquationModels.params(partable, col) + expected = StructuralEquationModels.lavaan_params( + partable_lav, + partable, + lav_col, + lav_group, + ) + @test !any(isnan, actual) + @test !any(isnan, expected) + + if skip # workaround skip=false not supported in earlier versions + @test actual ≈ expected rtol = rtol atol = atol norm = Base.Fix2(norm, Inf) skip = + skip + else + @test actual ≈ expected rtol = rtol atol = atol norm = Base.Fix2(norm, Inf) end - - return all(correct) end -function compare_estimates( +function test_estimates( ens_partable::EnsembleParameterTable, partable_lav; rtol = 1e-10, atol = 0, col = :estimate, lav_col = :est, - lav_groups, + lav_groups::AbstractDict, + skip::Bool = false, ) - correct = [] - - for key in keys(ens_partable.tables) - group = lav_groups[key] - partable = ens_partable.tables[key] - - for i in findall(partable.columns[:free]) - from = partable.columns[:from][i] - to = partable.columns[:to][i] - type = partable.columns[:parameter_type][i] - estimate = partable.columns[col][i] - - if from == Symbol("1") - lav_ind = findall( - (partable_lav.lhs .== String(to)) .& - (partable_lav.op .== "~1") .& - (partable_lav.group .== group), - ) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Mean parameter of variable $to could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - - else - if type == :↔ - type = "~~" - elseif type == :→ - if (from ∈ partable.variables[:latent_vars]) & - (to ∈ partable.variables[:observed_vars]) - type = "=~" - else - type = "~" - from, to = to, from - end - end - - if type == "~~" - lav_ind = findall( - ( - ( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) - ) .| ( - (partable_lav.lhs .== String(to)) .& - (partable_lav.rhs .== String(from)) - ) - ) .& - (partable_lav.op .== type) .& - (partable_lav.group .== group), - ) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Parameter from: $from, to: $to, type: $type, could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - - else - lav_ind = findall( - (partable_lav.lhs .== String(from)) .& - (partable_lav.rhs .== String(to)) .& - (partable_lav.op .== type) .& - (partable_lav.group .== group), - ) - - if length(lav_ind) == 0 - throw( - ErrorException( - "Parameter $from $type $to could not be found in the lavaan solution", - ), - ) - elseif length(lav_ind) > 1 - throw( - ErrorException( - "At least one parameter was found twice in the lavaan solution", - ), - ) - else - is_correct = isapprox( - estimate, - partable_lav[:, lav_col][lav_ind[1]]; - rtol = rtol, - atol = atol, - ) - push!(correct, is_correct) - end - end - end - end + actual = fill(NaN, nparams(ens_partable)) + expected = fill(NaN, nparams(ens_partable)) + for (key, partable) in pairs(ens_partable.tables) + StructuralEquationModels.params!(actual, partable, col) + StructuralEquationModels.lavaan_params!( + expected, + partable_lav, + partable, + lav_col, + lav_groups[key], + ) + end + @test !any(isnan, actual) + @test !any(isnan, expected) + + if skip # workaround skip=false not supported in earlier versions + @test actual ≈ expected rtol = rtol atol = atol norm = Base.Fix2(norm, Inf) skip = + skip + else + @test actual ≈ expected rtol = rtol atol = atol norm = Base.Fix2(norm, Inf) end - - return all(correct) end diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl index 9b97300df..f6a7a230d 100644 --- a/test/examples/multigroup/build_models.jl +++ b/test/examples/multigroup/build_models.jl @@ -1,23 +1,44 @@ +const SEM = StructuralEquationModels + ############################################################################################ # ML estimation ############################################################################################ -model_g1 = Sem(specification = specification_g1, data = dat_g1, imply = RAMSymbolic) +model_g1 = Sem(specification = specification_g1, data = dat_g1, implied = RAMSymbolic) -model_g2 = Sem(specification = specification_g2, data = dat_g2, imply = RAM) +model_g2 = Sem(specification = specification_g2, data = dat_g2, implied = RAM) -model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer) +@test SEM.param_labels(model_g1.implied.ram_matrices) == SEM.param_labels(model_g2.implied.ram_matrices) + +# test the different constructors +model_ml_multigroup = SemEnsemble(model_g1, model_g2) +model_ml_multigroup2 = SemEnsemble( + specification = partable, + data = dat, + column = :school, + groups = [:Pasteur, :Grant_White], + loss = SemML, +) # gradients @testset "ml_gradients_multigroup" begin test_gradient(model_ml_multigroup, start_test; atol = 1e-9) + test_gradient(model_ml_multigroup2, start_test; atol = 1e-9) end # fit @testset "ml_solution_multigroup" begin - solution = sem_fit(model_ml_multigroup) + solution = fit(semoptimizer, model_ml_multigroup) + update_estimate!(partable, solution) + test_estimates( + partable, + solution_lav[:parameter_estimates_ml]; + atol = 1e-4, + lav_groups = Dict(:Pasteur => 1, :Grant_White => 2), + ) + solution = fit(semoptimizer, model_ml_multigroup2) update_estimate!(partable, solution) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ml]; atol = 1e-4, @@ -26,16 +47,32 @@ end end @testset "fitmeasures/se_ml" begin - solution_ml = sem_fit(model_ml_multigroup) + solution_ml = fit(model_ml_multigroup) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_ml]; rtol = 1e-2, atol = 1e-7, ) + update_se_hessian!(partable, solution_ml) + test_estimates( + partable, + solution_lav[:parameter_estimates_ml]; + atol = 1e-3, + col = :se, + lav_col = :se, + lav_groups = Dict(:Pasteur => 1, :Grant_White => 2), + ) + solution_ml = fit(model_ml_multigroup2) + test_fitmeasures( + fit_measures(solution_ml), + solution_lav[:fitmeasures_ml]; + rtol = 1e-2, + atol = 1e-7, + ) update_se_hessian!(partable, solution_ml) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ml]; atol = 1e-3, @@ -49,16 +86,16 @@ end # ML estimation - sorted ############################################################################################ -partable_s = sort(partable) +partable_s = sort_vars(partable) -specification_s = RAMMatrices(partable_s) +specification_s = convert(Dict{Symbol, RAMMatrices}, partable_s) specification_g1_s = specification_s[:Pasteur] specification_g2_s = specification_s[:Grant_White] -model_g1 = Sem(specification = specification_g1_s, data = dat_g1, imply = RAMSymbolic) +model_g1 = Sem(specification = specification_g1_s, data = dat_g1, implied = RAMSymbolic) -model_g2 = Sem(specification = specification_g2_s, data = dat_g2, imply = RAM) +model_g2 = Sem(specification = specification_g2_s, data = dat_g2, implied = RAM) model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer) @@ -70,15 +107,15 @@ end grad = similar(start_test) gradient!(grad, model_ml_multigroup, rand(36)) grad_fd = FiniteDiff.finite_difference_gradient( - x -> objective!(model_ml_multigroup, x), + Base.Fix1(SEM.objective, model_ml_multigroup), start_test, ) # fit @testset "ml_solution_multigroup | sorted" begin - solution = sem_fit(model_ml_multigroup) + solution = fit(model_ml_multigroup) update_estimate!(partable_s, solution) - @test compare_estimates( + test_estimates( partable_s, solution_lav[:parameter_estimates_ml]; atol = 1e-4, @@ -87,7 +124,7 @@ grad_fd = FiniteDiff.finite_difference_gradient( end @testset "fitmeasures/se_ml | sorted" begin - solution_ml = sem_fit(model_ml_multigroup) + solution_ml = fit(model_ml_multigroup) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_ml]; @@ -96,7 +133,7 @@ end ) update_se_hessian!(partable_s, solution_ml) - @test compare_estimates( + test_estimates( partable_s, solution_lav[:parameter_estimates_ml]; atol = 1e-3, @@ -107,14 +144,18 @@ end end @testset "sorted | LowerTriangular A" begin - @test imply(model_ml_multigroup.sems[2]).A isa LowerTriangular + @test implied(model_ml_multigroup.sems[2]).A isa LowerTriangular end ############################################################################################ # ML estimation - user defined loss function ############################################################################################ -struct UserSemML <: SemLossFunction end +struct UserSemML <: SemLossFunction + hessianeval::ExactHessian + + UserSemML() = new(ExactHessian()) +end ############################################################################################ ### functors @@ -122,8 +163,8 @@ struct UserSemML <: SemLossFunction end using LinearAlgebra: isposdef, logdet, tr, inv -function SEM.objective!(semml::UserSemML, parameters, model::AbstractSem) - Σ = imply(model).Σ +function SEM.objective(ml::UserSemML, model::AbstractSem, params) + Σ = implied(model).Σ Σₒ = SEM.obs_cov(observed(model)) if !isposdef(Σ) return Inf @@ -133,12 +174,12 @@ function SEM.objective!(semml::UserSemML, parameters, model::AbstractSem) end # models -model_g1 = Sem(specification = specification_g1, data = dat_g1, imply = RAMSymbolic) +model_g1 = Sem(specification = specification_g1, data = dat_g1, implied = RAMSymbolic) model_g2 = SemFiniteDiff( specification = specification_g2, data = dat_g2, - imply = RAMSymbolic, + implied = RAMSymbolic, loss = UserSemML(), ) @@ -150,9 +191,9 @@ end # fit @testset "solution_user_defined_loss" begin - solution = sem_fit(model_ml_multigroup) + solution = fit(model_ml_multigroup) update_estimate!(partable, solution) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ml]; atol = 1e-4, @@ -164,11 +205,19 @@ end # GLS estimation ############################################################################################ -model_ls_g1 = - Sem(specification = specification_g1, data = dat_g1, imply = RAMSymbolic, loss = SemWLS) +model_ls_g1 = Sem( + specification = specification_g1, + data = dat_g1, + implied = RAMSymbolic, + loss = SemWLS, +) -model_ls_g2 = - Sem(specification = specification_g2, data = dat_g2, imply = RAMSymbolic, loss = SemWLS) +model_ls_g2 = Sem( + specification = specification_g2, + data = dat_g2, + implied = RAMSymbolic, + loss = SemWLS, +) model_ls_multigroup = SemEnsemble(model_ls_g1, model_ls_g2; optimizer = semoptimizer) @@ -177,9 +226,9 @@ model_ls_multigroup = SemEnsemble(model_ls_g1, model_ls_g2; optimizer = semoptim end @testset "ls_solution_multigroup" begin - solution = sem_fit(model_ls_multigroup) + solution = fit(model_ls_multigroup) update_estimate!(partable, solution) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 1e-4, @@ -188,7 +237,7 @@ end end @testset "fitmeasures/se_ls" begin - solution_ls = sem_fit(model_ls_multigroup) + solution_ls = fit(model_ls_multigroup) test_fitmeasures( fit_measures(solution_ls), solution_lav[:fitmeasures_ls]; @@ -197,8 +246,8 @@ end atol = 1e-5, ) - update_se_hessian!(partable, solution_ls) - @test compare_estimates( + @suppress update_se_hessian!(partable, solution_ls) + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 1e-2, @@ -218,8 +267,7 @@ if !isnothing(specification_miss_g1) observed = SemObservedMissing, loss = SemFIML, data = dat_miss_g1, - imply = RAM, - optimizer = SemOptimizerEmpty(), + implied = RAM, meanstructure = true, ) @@ -228,12 +276,20 @@ if !isnothing(specification_miss_g1) observed = SemObservedMissing, loss = SemFIML, data = dat_miss_g2, - imply = RAM, - optimizer = SemOptimizerEmpty(), + implied = RAM, meanstructure = true, ) - model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer) + model_ml_multigroup = SemEnsemble(model_g1, model_g2) + model_ml_multigroup2 = SemEnsemble( + specification = partable_miss, + data = dat_missing, + column = :school, + groups = [:Pasteur, :Grant_White], + loss = SemFIML, + observed = SemObservedMissing, + meanstructure = true, + ) ############################################################################################ ### test gradients @@ -261,12 +317,21 @@ if !isnothing(specification_miss_g1) @testset "fiml_gradients_multigroup" begin test_gradient(model_ml_multigroup, start_test; atol = 1e-7) + test_gradient(model_ml_multigroup2, start_test; atol = 1e-7) end @testset "fiml_solution_multigroup" begin - solution = sem_fit(model_ml_multigroup) + solution = fit(semoptimizer, model_ml_multigroup) update_estimate!(partable_miss, solution) - @test compare_estimates( + test_estimates( + partable_miss, + solution_lav[:parameter_estimates_fiml]; + atol = 1e-4, + lav_groups = Dict(:Pasteur => 1, :Grant_White => 2), + ) + solution = fit(semoptimizer, model_ml_multigroup2) + update_estimate!(partable_miss, solution) + test_estimates( partable_miss, solution_lav[:parameter_estimates_fiml]; atol = 1e-4, @@ -275,16 +340,32 @@ if !isnothing(specification_miss_g1) end @testset "fitmeasures/se_fiml" begin - solution = sem_fit(model_ml_multigroup) + solution = fit(semoptimizer, model_ml_multigroup) test_fitmeasures( fit_measures(solution), solution_lav[:fitmeasures_fiml]; rtol = 1e-3, atol = 0, ) + update_se_hessian!(partable_miss, solution) + test_estimates( + partable_miss, + solution_lav[:parameter_estimates_fiml]; + atol = 1e-3, + col = :se, + lav_col = :se, + lav_groups = Dict(:Pasteur => 1, :Grant_White => 2), + ) + solution = fit(semoptimizer, model_ml_multigroup2) + test_fitmeasures( + fit_measures(solution), + solution_lav[:fitmeasures_fiml]; + rtol = 1e-3, + atol = 0, + ) update_se_hessian!(partable_miss, solution) - @test compare_estimates( + test_estimates( partable_miss, solution_lav[:parameter_estimates_fiml]; atol = 1e-3, diff --git a/test/examples/multigroup/multigroup.jl b/test/examples/multigroup/multigroup.jl index 759c24eda..239bf713c 100644 --- a/test/examples/multigroup/multigroup.jl +++ b/test/examples/multigroup/multigroup.jl @@ -1,4 +1,4 @@ -using StructuralEquationModels, Test, FiniteDiff +using StructuralEquationModels, Test, FiniteDiff, Suppressor using LinearAlgebra: diagind, LowerTriangular const SEM = StructuralEquationModels @@ -15,6 +15,9 @@ dat_g2 = dat[dat.school.=="Grant-White", :] dat_miss_g1 = dat_missing[dat_missing.school.=="Pasteur", :] dat_miss_g2 = dat_missing[dat_missing.school.=="Grant-White", :] +dat.school = ifelse.(dat.school .== "Pasteur", :Pasteur, :Grant_White) +dat_missing.school = ifelse.(dat_missing.school .== "Pasteur", :Pasteur, :Grant_White) + ############################################################################################ ### specification - RAMMatrices ############################################################################################ @@ -56,23 +59,20 @@ specification_g1 = RAMMatrices(; A = A, S = S1, F = F, - parameters = x, - colnames = [:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :visual, :textual, :speed], + param_labels = x, + vars = [:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :visual, :textual, :speed], ) specification_g2 = RAMMatrices(; A = A, S = S2, F = F, - parameters = x, - colnames = [:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :visual, :textual, :speed], + param_labels = x, + vars = [:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :visual, :textual, :speed], ) -partable = EnsembleParameterTable( - specification_g1, - specification_g2; - groups = [:Pasteur, :Grant_White], -) +partable = + EnsembleParameterTable(:Pasteur => specification_g1, :Grant_White => specification_g2) specification_miss_g1 = nothing specification_miss_g2 = nothing @@ -86,7 +86,7 @@ start_test = [ fill(0.05, 3) fill(0.01, 3) ] -semoptimizer = SemOptimizerOptim +semoptimizer = SemOptimizerOptim() @testset "RAMMatrices | constructor | Optim" begin include("build_models.jl") @@ -111,14 +111,14 @@ graph = @StenoGraph begin _(latent_vars) ⇔ _(latent_vars) end -partable = EnsembleParameterTable(; - graph = graph, +partable = EnsembleParameterTable( + graph; observed_vars = observed_vars, latent_vars = latent_vars, groups = [:Pasteur, :Grant_White], ) -specification = RAMMatrices(partable) +specification = convert(Dict{Symbol, RAMMatrices}, partable) specification_g1 = specification[:Pasteur] specification_g2 = specification[:Grant_White] @@ -137,17 +137,17 @@ graph = @StenoGraph begin _(observed_vars) ↔ _(observed_vars) _(latent_vars) ⇔ _(latent_vars) - Symbol("1") → _(observed_vars) + Symbol(1) → _(observed_vars) end -partable_miss = EnsembleParameterTable(; - graph = graph, +partable_miss = EnsembleParameterTable( + graph; observed_vars = observed_vars, latent_vars = latent_vars, groups = [:Pasteur, :Grant_White], ) -specification_miss = RAMMatrices(partable_miss) +specification_miss = convert(Dict{Symbol, RAMMatrices}, partable_miss) specification_miss_g1 = specification_miss[:Pasteur] specification_miss_g2 = specification_miss[:Grant_White] @@ -169,7 +169,7 @@ start_test = [ 0.01 0.05 ] -semoptimizer = SemOptimizerOptim +semoptimizer = SemOptimizerOptim() @testset "Graph → Partable → RAMMatrices | constructor | Optim" begin include("build_models.jl") diff --git a/test/examples/political_democracy/by_parts.jl b/test/examples/political_democracy/by_parts.jl index c071d9e00..3397b5f0a 100644 --- a/test/examples/political_democracy/by_parts.jl +++ b/test/examples/political_democracy/by_parts.jl @@ -5,17 +5,17 @@ # observed --------------------------------------------------------------------------------- observed = SemObservedData(specification = spec, data = dat) -# imply -imply_ram = RAM(specification = spec) +# implied +implied_ram = RAM(specification = spec) -imply_ram_sym = RAMSymbolic(specification = spec) +implied_ram_sym = RAMSymbolic(specification = spec) # loss functions --------------------------------------------------------------------------- ml = SemML(observed = observed) wls = SemWLS(observed = observed) -ridge = SemRidge(α_ridge = 0.001, which_ridge = 16:20, n_par = 31) +ridge = SemRidge(α_ridge = 0.001, which_ridge = 16:20, nparams = 31) constant = SemConstant(constant_loss = 3.465) @@ -25,23 +25,22 @@ loss_ml = SemLoss(ml) loss_wls = SemLoss(wls) # optimizer ------------------------------------------------------------------------------------- -optimizer_obj = semoptimizer() +optimizer_obj = SemOptimizer(engine = opt_engine) # models ----------------------------------------------------------------------------------- -model_ml = Sem(observed, imply_ram, loss_ml, optimizer_obj) +model_ml = Sem(observed, implied_ram, loss_ml) -model_ls_sym = - Sem(observed, RAMSymbolic(specification = spec, vech = true), loss_wls, optimizer_obj) +model_ls_sym = Sem(observed, RAMSymbolic(specification = spec, vech = true), loss_wls) -model_ml_sym = Sem(observed, imply_ram_sym, loss_ml, optimizer_obj) +model_ml_sym = Sem(observed, implied_ram_sym, loss_ml) -model_ridge = Sem(observed, imply_ram, SemLoss(ml, ridge), optimizer_obj) +model_ridge = Sem(observed, implied_ram, SemLoss(ml, ridge)) -model_constant = Sem(observed, imply_ram, SemLoss(ml, constant), optimizer_obj) +model_constant = Sem(observed, implied_ram, SemLoss(ml, constant)) model_ml_weighted = - Sem(observed, imply_ram, SemLoss(ml; loss_weights = [n_obs(model_ml)]), optimizer_obj) + Sem(observed, implied_ram, SemLoss(ml; loss_weights = [nsamples(model_ml)])) ############################################################################################ ### test gradients @@ -71,18 +70,18 @@ solution_names = Symbol.("parameter_estimates_" .* ["ml", "ls", "ml", "ml"]) for (model, name, solution_name) in zip(models, model_names, solution_names) try @testset "$(name)_solution" begin - solution = sem_fit(model) + solution = fit(optimizer_obj, model) update_estimate!(partable, solution) - @test compare_estimates(partable, solution_lav[solution_name]; atol = 1e-2) + test_estimates(partable, solution_lav[solution_name]; atol = 1e-2) end catch end end @testset "ridge_solution" begin - solution_ridge = sem_fit(model_ridge) - solution_ml = sem_fit(model_ml) - # solution_ridge_id = sem_fit(model_ridge_id) + solution_ridge = fit(optimizer_obj, model_ridge) + solution_ml = fit(optimizer_obj, model_ml) + # solution_ridge_id = fit(optimizer_obj, model_ridge_id) @test solution_ridge.minimum < solution_ml.minimum + 1 end @@ -98,10 +97,10 @@ end end @testset "ml_solution_weighted" begin - solution_ml = sem_fit(model_ml) - solution_ml_weighted = sem_fit(model_ml_weighted) + solution_ml = fit(optimizer_obj, model_ml) + solution_ml_weighted = fit(optimizer_obj, model_ml_weighted) @test solution(solution_ml) ≈ solution(solution_ml_weighted) rtol = 1e-3 - @test n_obs(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈ + @test nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈ StructuralEquationModels.minimum(solution_ml_weighted) rtol = 1e-6 end @@ -110,11 +109,11 @@ end ############################################################################################ @testset "fitmeasures/se_ml" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(optimizer_obj, model_ml) test_fitmeasures(fit_measures(solution_ml), solution_lav[:fitmeasures_ml]; atol = 1e-3) update_se_hessian!(partable, solution_ml) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ml]; atol = 1e-3, @@ -124,7 +123,7 @@ end end @testset "fitmeasures/se_ls" begin - solution_ls = sem_fit(model_ls_sym) + solution_ls = fit(optimizer_obj, model_ls_sym) fm = fit_measures(solution_ls) test_fitmeasures( fm, @@ -134,8 +133,8 @@ end ) @test (fm[:AIC] === missing) & (fm[:BIC] === missing) & (fm[:minus2ll] === missing) - update_se_hessian!(partable, solution_ls) - @test compare_estimates( + @suppress update_se_hessian!(partable, solution_ls) + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 1e-2, @@ -148,24 +147,25 @@ end ### test hessians ############################################################################################ -if semoptimizer == SemOptimizerOptim +if opt_engine == :Optim using Optim, LineSearches - optimizer_obj = SemOptimizerOptim( + optimizer_obj = SemOptimizer( + engine = opt_engine, algorithm = Newton(; linesearch = BackTracking(order = 3), alphaguess = InitialHagerZhang(), ), ) - imply_sym_hessian_vech = RAMSymbolic(specification = spec, vech = true, hessian = true) + implied_sym_hessian_vech = + RAMSymbolic(specification = spec, vech = true, hessian = true) - imply_sym_hessian = RAMSymbolic(specification = spec, hessian = true) + implied_sym_hessian = RAMSymbolic(specification = spec, hessian = true) - model_ls = Sem(observed, imply_sym_hessian_vech, loss_wls, optimizer_obj) + model_ls = Sem(observed, implied_sym_hessian_vech, loss_wls) - model_ml = - Sem(observed, imply_sym_hessian, loss_ml, SemOptimizerOptim(algorithm = Newton())) + model_ml = Sem(observed, implied_sym_hessian, loss_ml) @testset "ml_hessians" begin test_hessian(model_ml, start_test; atol = 1e-4) @@ -176,23 +176,20 @@ if semoptimizer == SemOptimizerOptim end @testset "ml_solution_hessian" begin - solution = sem_fit(model_ml) + solution = fit(optimizer_obj, model_ml) update_estimate!(partable, solution) - @test compare_estimates( - partable, - solution_lav[:parameter_estimates_ml]; - atol = 1e-3, - ) + test_estimates(partable, solution_lav[:parameter_estimates_ml]; atol = 1e-2) end @testset "ls_solution_hessian" begin - solution = sem_fit(model_ls) + solution = fit(optimizer_obj, model_ls) update_estimate!(partable, solution) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 1e-3, - ) skip = true + skip = true, + ) end end @@ -203,10 +200,10 @@ end # observed --------------------------------------------------------------------------------- observed = SemObservedData(specification = spec_mean, data = dat, meanstructure = true) -# imply -imply_ram = RAM(specification = spec_mean, meanstructure = true) +# implied +implied_ram = RAM(specification = spec_mean, meanstructure = true) -imply_ram_sym = RAMSymbolic(specification = spec_mean, meanstructure = true) +implied_ram_sym = RAMSymbolic(specification = spec_mean, meanstructure = true) # loss functions --------------------------------------------------------------------------- ml = SemML(observed = observed, meanstructure = true) @@ -219,19 +216,18 @@ loss_ml = SemLoss(ml) loss_wls = SemLoss(wls) # optimizer ------------------------------------------------------------------------------------- -optimizer_obj = semoptimizer() +optimizer_obj = SemOptimizer(engine = opt_engine) # models ----------------------------------------------------------------------------------- -model_ml = Sem(observed, imply_ram, loss_ml, optimizer_obj) +model_ml = Sem(observed, implied_ram, loss_ml) model_ls = Sem( observed, RAMSymbolic(specification = spec_mean, meanstructure = true, vech = true), loss_wls, - optimizer_obj, ) -model_ml_sym = Sem(observed, imply_ram_sym, loss_ml, optimizer_obj) +model_ml_sym = Sem(observed, implied_ram_sym, loss_ml) ############################################################################################ ### test gradients @@ -258,9 +254,9 @@ solution_names = Symbol.("parameter_estimates_" .* ["ml", "ls", "ml"] .* "_mean" for (model, name, solution_name) in zip(models, model_names, solution_names) try @testset "$(name)_solution_mean" begin - solution = sem_fit(model) + solution = fit(optimizer_obj, model) update_estimate!(partable_mean, solution) - @test compare_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2) + test_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2) end catch end @@ -271,7 +267,7 @@ end ############################################################################################ @testset "fitmeasures/se_ml_mean" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(optimizer_obj, model_ml) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_ml_mean]; @@ -279,7 +275,7 @@ end ) update_se_hessian!(partable_mean, solution_ml) - @test compare_estimates( + test_estimates( partable_mean, solution_lav[:parameter_estimates_ml_mean]; atol = 0.002, @@ -289,7 +285,7 @@ end end @testset "fitmeasures/se_ls_mean" begin - solution_ls = sem_fit(model_ls) + solution_ls = fit(optimizer_obj, model_ls) fm = fit_measures(solution_ls) test_fitmeasures( fm, @@ -299,8 +295,8 @@ end ) @test (fm[:AIC] === missing) & (fm[:BIC] === missing) & (fm[:minus2ll] === missing) - update_se_hessian!(partable_mean, solution_ls) - @test compare_estimates( + @suppress update_se_hessian!(partable_mean, solution_ls) + test_estimates( partable_mean, solution_lav[:parameter_estimates_ls_mean]; atol = 1e-2, @@ -319,9 +315,9 @@ fiml = SemFIML(observed = observed, specification = spec_mean) loss_fiml = SemLoss(fiml) -model_ml = Sem(observed, imply_ram, loss_fiml, optimizer_obj) +model_ml = Sem(observed, implied_ram, loss_fiml) -model_ml_sym = Sem(observed, imply_ram_sym, loss_fiml, optimizer_obj) +model_ml_sym = Sem(observed, implied_ram_sym, loss_fiml) ############################################################################################ ### test gradients @@ -340,23 +336,15 @@ end ############################################################################################ @testset "fiml_solution" begin - solution = sem_fit(model_ml) + solution = fit(optimizer_obj, model_ml) update_estimate!(partable_mean, solution) - @test compare_estimates( - partable_mean, - solution_lav[:parameter_estimates_fiml]; - atol = 1e-2, - ) + test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2) end @testset "fiml_solution_symbolic" begin - solution = sem_fit(model_ml_sym) + solution = fit(optimizer_obj, model_ml_sym) update_estimate!(partable_mean, solution) - @test compare_estimates( - partable_mean, - solution_lav[:parameter_estimates_fiml]; - atol = 1e-2, - ) + test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2) end ############################################################################################ @@ -364,7 +352,7 @@ end ############################################################################################ @testset "fitmeasures/se_fiml" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(optimizer_obj, model_ml) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_fiml]; @@ -372,7 +360,7 @@ end ) update_se_hessian!(partable_mean, solution_ml) - @test compare_estimates( + test_estimates( partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-3, diff --git a/test/examples/political_democracy/constraints.jl b/test/examples/political_democracy/constraints.jl index e5cd96ab9..cc1b0874d 100644 --- a/test/examples/political_democracy/constraints.jl +++ b/test/examples/political_democracy/constraints.jl @@ -1,4 +1,5 @@ # NLopt constraints ------------------------------------------------------------------------ +using NLopt # 1.5*x1 == x2 (aka 1.5*x1 - x2 == 0) #= function eq_constraint(x, grad) @@ -20,43 +21,36 @@ function ineq_constraint(x, grad) 0.6 - x[30] * x[31] end -constrained_optimizer = SemOptimizerNLopt(; +constrained_optimizer = SemOptimizer(; + engine = :NLopt, algorithm = :AUGLAG, local_algorithm = :LD_LBFGS, options = Dict(:xtol_rel => 1e-4), - # equality_constraints = NLoptConstraint(;f = eq_constraint, tol = 1e-14), - inequality_constraints = NLoptConstraint(; f = ineq_constraint, tol = 1e-8), + # equality_constraints = (f = eq_constraint, tol = 1e-14), + inequality_constraints = (f = ineq_constraint, tol = 0.0), ) -model_ml_constrained = - Sem(specification = spec, data = dat, optimizer = constrained_optimizer) - -solution_constrained = sem_fit(model_ml_constrained) +@test constrained_optimizer isa SemOptimizer{:NLopt} # NLopt option setting --------------------------------------------------------------------- -model_ml_maxeval = Sem( - specification = spec, - data = dat, - optimizer = SemOptimizerNLopt, - options = Dict(:maxeval => 10), -) - ############################################################################################ ### test solution ############################################################################################ @testset "ml_solution_maxeval" begin - solution_maxeval = sem_fit(model_ml_maxeval) + solution_maxeval = fit(model_ml, engine = :NLopt, options = Dict(:maxeval => 10)) + @test solution_maxeval.optimization_result.problem.numevals == 10 @test solution_maxeval.optimization_result.result[3] == :MAXEVAL_REACHED end @testset "ml_solution_constrained" begin - solution_constrained = sem_fit(model_ml_constrained) + solution_constrained = fit(constrained_optimizer, model_ml) + @test solution_constrained.solution[31] * solution_constrained.solution[30] >= (0.6 - 1e-8) @test all(abs.(solution_constrained.solution) .< 10) - @test solution_constrained.optimization_result.result[3] == :FTOL_REACHED skip = true - @test abs(solution_constrained.minimum - 21.21) < 0.01 + @test solution_constrained.optimization_result.result[3] == :FTOL_REACHED + @test solution_constrained.minimum <= 21.21 + 0.01 end diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl index 4ca1994bd..7a8adc72e 100644 --- a/test/examples/political_democracy/constructor.jl +++ b/test/examples/political_democracy/constructor.jl @@ -1,30 +1,26 @@ using Statistics: cov, mean +using Random, NLopt ############################################################################################ ### models w.o. meanstructure ############################################################################################ -model_ml = Sem(specification = spec, data = dat, optimizer = semoptimizer) +semoptimizer = SemOptimizer(engine = opt_engine) + +model_ml = Sem(specification = spec, data = dat) +@test SEM.param_labels(model_ml.implied.ram_matrices) == SEM.param_labels(spec) model_ml_cov = Sem( specification = spec, observed = SemObservedCovariance, obs_cov = cov(Matrix(dat)), obs_colnames = Symbol.(names(dat)), - optimizer = semoptimizer, - n_obs = 75, + nsamples = 75, ) -model_ls_sym = Sem( - specification = spec, - data = dat, - imply = RAMSymbolic, - loss = SemWLS, - optimizer = semoptimizer, -) +model_ls_sym = Sem(specification = spec, data = dat, implied = RAMSymbolic, loss = SemWLS) -model_ml_sym = - Sem(specification = spec, data = dat, imply = RAMSymbolic, optimizer = semoptimizer) +model_ml_sym = Sem(specification = spec, data = dat, implied = RAMSymbolic) model_ridge = Sem( specification = spec, @@ -32,7 +28,6 @@ model_ridge = Sem( loss = (SemML, SemRidge), α_ridge = 0.001, which_ridge = 16:20, - optimizer = semoptimizer, ) model_constant = Sem( @@ -40,15 +35,10 @@ model_constant = Sem( data = dat, loss = (SemML, SemConstant), constant_loss = 3.465, - optimizer = semoptimizer, ) -model_ml_weighted = Sem( - specification = partable, - data = dat, - loss_weights = (n_obs(model_ml),), - optimizer = semoptimizer, -) +model_ml_weighted = + Sem(specification = partable, data = dat, loss_weights = (nsamples(model_ml),)) ############################################################################################ ### test gradients @@ -85,18 +75,18 @@ solution_names = Symbol.("parameter_estimates_" .* ["ml", "ml", "ls", "ml", "ml" for (model, name, solution_name) in zip(models, model_names, solution_names) try @testset "$(name)_solution" begin - solution = sem_fit(model) + solution = fit(semoptimizer, model) update_estimate!(partable, solution) - @test compare_estimates(partable, solution_lav[solution_name]; atol = 1e-2) + test_estimates(partable, solution_lav[solution_name]; atol = 1e-2) end catch end end @testset "ridge_solution" begin - solution_ridge = sem_fit(model_ridge) - solution_ml = sem_fit(model_ml) - # solution_ridge_id = sem_fit(model_ridge_id) + solution_ridge = fit(semoptimizer, model_ridge) + solution_ml = fit(semoptimizer, model_ml) + # solution_ridge_id = fit(semoptimizer, model_ridge_id) @test abs(solution_ridge.minimum - solution_ml.minimum) < 1 end @@ -112,11 +102,11 @@ end end @testset "ml_solution_weighted" begin - solution_ml = sem_fit(model_ml) - solution_ml_weighted = sem_fit(model_ml_weighted) + solution_ml = fit(semoptimizer, model_ml) + solution_ml_weighted = fit(semoptimizer, model_ml_weighted) @test isapprox(solution(solution_ml), solution(solution_ml_weighted), rtol = 1e-3) @test isapprox( - n_obs(model_ml) * StructuralEquationModels.minimum(solution_ml), + nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml), StructuralEquationModels.minimum(solution_ml_weighted), rtol = 1e-6, ) @@ -127,11 +117,11 @@ end ############################################################################################ @testset "fitmeasures/se_ml" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(semoptimizer, model_ml) test_fitmeasures(fit_measures(solution_ml), solution_lav[:fitmeasures_ml]; atol = 1e-3) update_se_hessian!(partable, solution_ml) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ml]; atol = 1e-3, @@ -141,7 +131,7 @@ end end @testset "fitmeasures/se_ls" begin - solution_ls = sem_fit(model_ls_sym) + solution_ls = fit(semoptimizer, model_ls_sym) fm = fit_measures(solution_ls) test_fitmeasures( fm, @@ -151,8 +141,8 @@ end ) @test ismissing(fm[:AIC]) && ismissing(fm[:BIC]) && ismissing(fm[:minus2ll]) - update_se_hessian!(partable, solution_ls) - @test compare_estimates( + @suppress update_se_hessian!(partable, solution_ls) + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 1e-2, @@ -161,17 +151,55 @@ end ) end +############################################################################################ +### data simulation +############################################################################################ + +@testset "data_simulation_wo_mean" begin + # parameters to recover + params = start_simple( + model_ml; + start_loadings = 0.5, + start_regressions = 0.5, + start_variances_observed = 0.5, + start_variances_latent = 1.0, + start_covariances_observed = 0.2, + ) + # set seed for simulation + Random.seed!(83472834) + colnames = Symbol.(names(example_data("political_democracy"))) + # simulate data + model_ml_new = replace_observed( + model_ml, + data = rand(model_ml, params, 1_000_000), + specification = spec, + obs_colnames = colnames, + ) + model_ml_sym_new = replace_observed( + model_ml_sym, + data = rand(model_ml_sym, params, 1_000_000), + specification = spec, + obs_colnames = colnames, + ) + # fit models + sol_ml = solution(fit(semoptimizer, model_ml_new)) + sol_ml_sym = solution(fit(semoptimizer, model_ml_sym_new)) + # check solution + @test maximum(abs.(sol_ml - params)) < 0.01 + @test maximum(abs.(sol_ml_sym - params)) < 0.01 +end + ############################################################################################ ### test hessians ############################################################################################ -if semoptimizer == SemOptimizerOptim +if opt_engine == :Optim using Optim, LineSearches model_ls = Sem( specification = spec, data = dat, - imply = RAMSymbolic, + implied = RAMSymbolic, loss = SemWLS, hessian = true, algorithm = Newton(; @@ -183,7 +211,7 @@ if semoptimizer == SemOptimizerOptim model_ml = Sem( specification = spec, data = dat, - imply = RAMSymbolic, + implied = RAMSymbolic, hessian = true, algorithm = Newton(), ) @@ -197,24 +225,21 @@ if semoptimizer == SemOptimizerOptim end @testset "ml_solution_hessian" begin - solution = sem_fit(model_ml) + solution = fit(semoptimizer, model_ml) update_estimate!(partable, solution) - @test compare_estimates( - partable, - solution_lav[:parameter_estimates_ml]; - atol = 1e-3, - ) + test_estimates(partable, solution_lav[:parameter_estimates_ml]; atol = 1e-2) end @testset "ls_solution_hessian" begin - solution = sem_fit(model_ls) + solution = fit(semoptimizer, model_ls) update_estimate!(partable, solution) - @test compare_estimates( + test_estimates( partable, solution_lav[:parameter_estimates_ls]; atol = 0.002, rtol = 0.0, - ) skip = true + skip = true, + ) end end @@ -226,18 +251,12 @@ end model_ls = Sem( specification = spec_mean, data = dat, - imply = RAMSymbolic, + implied = RAMSymbolic, loss = SemWLS, meanstructure = true, - optimizer = semoptimizer, ) -model_ml = Sem( - specification = spec_mean, - data = dat, - meanstructure = true, - optimizer = semoptimizer, -) +model_ml = Sem(specification = spec_mean, data = dat, meanstructure = true) model_ml_cov = Sem( specification = spec_mean, @@ -246,18 +265,11 @@ model_ml_cov = Sem( obs_mean = vcat(mean(Matrix(dat), dims = 1)...), obs_colnames = Symbol.(names(dat)), meanstructure = true, - optimizer = semoptimizer, - n_obs = 75, + nsamples = 75, ) -model_ml_sym = Sem( - specification = spec_mean, - data = dat, - imply = RAMSymbolic, - meanstructure = true, - start_val = start_test_mean, - optimizer = semoptimizer, -) +model_ml_sym = + Sem(specification = spec_mean, data = dat, implied = RAMSymbolic, meanstructure = true) ############################################################################################ ### test gradients @@ -284,9 +296,9 @@ solution_names = Symbol.("parameter_estimates_" .* ["ml", "ml", "ls", "ml"] .* " for (model, name, solution_name) in zip(models, model_names, solution_names) try @testset "$(name)_solution_mean" begin - solution = sem_fit(model) + solution = fit(semoptimizer, model) update_estimate!(partable_mean, solution) - @test compare_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2) + test_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2) end catch end @@ -297,7 +309,7 @@ end ############################################################################################ @testset "fitmeasures/se_ml_mean" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(semoptimizer, model_ml) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_ml_mean]; @@ -305,7 +317,7 @@ end ) update_se_hessian!(partable_mean, solution_ml) - @test compare_estimates( + test_estimates( partable_mean, solution_lav[:parameter_estimates_ml_mean]; atol = 0.002, @@ -315,7 +327,7 @@ end end @testset "fitmeasures/se_ls_mean" begin - solution_ls = sem_fit(model_ls) + solution_ls = fit(semoptimizer, model_ls) fm = fit_measures(solution_ls) test_fitmeasures( fm, @@ -325,8 +337,8 @@ end ) @test ismissing(fm[:AIC]) && ismissing(fm[:BIC]) && ismissing(fm[:minus2ll]) - update_se_hessian!(partable_mean, solution_ls) - @test compare_estimates( + @suppress update_se_hessian!(partable_mean, solution_ls) + test_estimates( partable_mean, solution_lav[:parameter_estimates_ls_mean]; atol = 1e-2, @@ -335,6 +347,47 @@ end ) end +############################################################################################ +### data simulation +############################################################################################ + +@testset "data_simulation_with_mean" begin + # parameters to recover + params = start_simple( + model_ml; + start_loadings = 0.5, + start_regressions = 0.5, + start_variances_observed = 0.5, + start_variances_latent = 1.0, + start_covariances_observed = 0.2, + start_means = 0.5, + ) + # set seed for simulation + Random.seed!(83472834) + colnames = Symbol.(names(example_data("political_democracy"))) + # simulate data + model_ml_new = replace_observed( + model_ml, + data = rand(model_ml, params, 1_000_000), + specification = spec, + obs_colnames = colnames, + meanstructure = true, + ) + model_ml_sym_new = replace_observed( + model_ml_sym, + data = rand(model_ml_sym, params, 1_000_000), + specification = spec, + obs_colnames = colnames, + meanstructure = true, + ) + # fit models + sol_ml = solution(fit(semoptimizer, model_ml_new)) + sol_ml_sym = solution(fit(semoptimizer, model_ml_sym_new)) + # check solution + @test maximum(abs.(sol_ml - params)) < 0.01 + @test maximum(abs.(sol_ml_sym - params)) < 0.01 +end + ############################################################################################ ### fiml ############################################################################################ @@ -345,7 +398,6 @@ model_ml = Sem( data = dat_missing, observed = SemObservedMissing, loss = SemFIML, - optimizer = semoptimizer, meanstructure = true, ) @@ -353,10 +405,8 @@ model_ml_sym = Sem( specification = spec_mean, data = dat_missing, observed = SemObservedMissing, - imply = RAMSymbolic, + implied = RAMSymbolic, loss = SemFIML, - start_val = start_test_mean, - optimizer = semoptimizer, meanstructure = true, ) @@ -377,23 +427,15 @@ end ############################################################################################ @testset "fiml_solution" begin - solution = sem_fit(model_ml) + solution = fit(semoptimizer, model_ml) update_estimate!(partable_mean, solution) - @test compare_estimates( - partable_mean, - solution_lav[:parameter_estimates_fiml]; - atol = 1e-2, - ) + test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2) end @testset "fiml_solution_symbolic" begin - solution = sem_fit(model_ml_sym) + solution = fit(semoptimizer, model_ml_sym) update_estimate!(partable_mean, solution) - @test compare_estimates( - partable_mean, - solution_lav[:parameter_estimates_fiml]; - atol = 1e-2, - ) + test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2) end ############################################################################################ @@ -401,7 +443,7 @@ end ############################################################################################ @testset "fitmeasures/se_fiml" begin - solution_ml = sem_fit(model_ml) + solution_ml = fit(semoptimizer, model_ml) test_fitmeasures( fit_measures(solution_ml), solution_lav[:fitmeasures_fiml]; @@ -409,7 +451,7 @@ end ) update_se_hessian!(partable_mean, solution_ml) - @test compare_estimates( + test_estimates( partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 0.002, diff --git a/test/examples/political_democracy/political_democracy.jl b/test/examples/political_democracy/political_democracy.jl index 389800745..ad06e0fcd 100644 --- a/test/examples/political_democracy/political_democracy.jl +++ b/test/examples/political_democracy/political_democracy.jl @@ -1,4 +1,6 @@ -using StructuralEquationModels, Test, FiniteDiff +using StructuralEquationModels, Test, Suppressor, FiniteDiff + +SEM = StructuralEquationModels include( joinpath( @@ -75,30 +77,15 @@ spec = RAMMatrices(; A = A, S = S, F = F, - parameters = x, - colnames = [ - :x1, - :x2, - :x3, - :y1, - :y2, - :y3, - :y4, - :y5, - :y6, - :y7, - :y8, - :ind60, - :dem60, - :dem65, - ], + param_labels = x, + vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65], ) partable = ParameterTable(spec) -# w. meanstructure ------------------------------------------------------------------------- +@test SEM.param_labels(spec) == SEM.param_labels(partable) -x = Symbol.("x" .* string.(1:38)) +# w. meanstructure ------------------------------------------------------------------------- M = [:x32; :x33; :x34; :x35; :x36; :x37; :x38; :x35; :x36; :x37; :x38; 0.0; 0.0; 0.0] @@ -107,46 +94,34 @@ spec_mean = RAMMatrices(; S = S, F = F, M = M, - parameters = x, - colnames = [ - :x1, - :x2, - :x3, - :y1, - :y2, - :y3, - :y4, - :y5, - :y6, - :y7, - :y8, - :ind60, - :dem60, - :dem65, - ], + param_labels = [SEM.param_labels(spec); Symbol.("x", string.(32:38))], + vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65], ) partable_mean = ParameterTable(spec_mean) +@test SEM.param_labels(partable_mean) == SEM.param_labels(spec_mean) + start_test = [fill(1.0, 11); fill(0.05, 3); fill(0.05, 6); fill(0.5, 8); fill(0.05, 3)] start_test_mean = [fill(1.0, 11); fill(0.05, 3); fill(0.05, 6); fill(0.5, 8); fill(0.05, 3); fill(0.1, 7)] -semoptimizer = SemOptimizerOptim +opt_engine = :Optim @testset "RAMMatrices | constructor | Optim" begin include("constructor.jl") end -semoptimizer = SemOptimizerNLopt + +opt_engine = :NLopt @testset "RAMMatrices | constructor | NLopt" begin include("constructor.jl") end -if !haskey(ENV, "JULIA_EXTENDED_TESTS") || ENV["JULIA_EXTENDED_TESTS"] == "true" - semoptimizer = SemOptimizerOptim +if is_extended_tests() + opt_engine = :Optim @testset "RAMMatrices | parts | Optim" begin include("by_parts.jl") end - semoptimizer = SemOptimizerNLopt + opt_engine = :NLopt @testset "RAMMatrices | parts | NLopt" begin include("by_parts.jl") end @@ -163,24 +138,26 @@ end spec = ParameterTable(spec) spec_mean = ParameterTable(spec_mean) +@test SEM.param_labels(spec) == SEM.param_labels(partable) + partable = spec partable_mean = spec_mean -semoptimizer = SemOptimizerOptim +opt_engine = :Optim @testset "RAMMatrices → ParameterTable | constructor | Optim" begin include("constructor.jl") end -semoptimizer = SemOptimizerNLopt +opt_engine = :NLopt @testset "RAMMatrices → ParameterTable | constructor | NLopt" begin include("constructor.jl") end -if !haskey(ENV, "JULIA_EXTENDED_TESTS") || ENV["JULIA_EXTENDED_TESTS"] == "true" - semoptimizer = SemOptimizerOptim +if is_extended_tests() + opt_engine = :Optim @testset "RAMMatrices → ParameterTable | parts | Optim" begin include("by_parts.jl") end - semoptimizer = SemOptimizerNLopt + opt_engine = :NLopt @testset "RAMMatrices → ParameterTable | parts | NLopt" begin include("by_parts.jl") end @@ -212,10 +189,9 @@ graph = @StenoGraph begin y8 ↔ y4 + y6 end -spec = - ParameterTable(latent_vars = latent_vars, observed_vars = observed_vars, graph = graph) +spec = ParameterTable(graph, latent_vars = latent_vars, observed_vars = observed_vars) -sort!(spec) +sort_vars!(spec) partable = spec @@ -240,14 +216,13 @@ graph = @StenoGraph begin y3 ↔ y7 y8 ↔ y4 + y6 # means - Symbol("1") → _(mean_labels) .* _(observed_vars) - Symbol("1") → fixed(0) * ind60 + Symbol(1) → _(mean_labels) .* _(observed_vars) + Symbol(1) → fixed(0) * ind60 end -spec_mean = - ParameterTable(latent_vars = latent_vars, observed_vars = observed_vars, graph = graph) +spec_mean = ParameterTable(graph, latent_vars = latent_vars, observed_vars = observed_vars) -sort!(spec_mean) +sort_vars!(spec_mean) partable_mean = spec_mean @@ -255,21 +230,21 @@ start_test = [fill(0.5, 8); fill(0.05, 3); fill(1.0, 11); fill(0.05, 9)] start_test_mean = [fill(0.5, 8); fill(0.05, 3); fill(1.0, 11); fill(0.05, 3); fill(0.05, 13)] -semoptimizer = SemOptimizerOptim +opt_engine = :Optim @testset "Graph → ParameterTable | constructor | Optim" begin include("constructor.jl") end -semoptimizer = SemOptimizerNLopt +opt_engine = :NLopt @testset "Graph → ParameterTable | constructor | NLopt" begin include("constructor.jl") end -if !haskey(ENV, "JULIA_EXTENDED_TESTS") || ENV["JULIA_EXTENDED_TESTS"] == "true" - semoptimizer = SemOptimizerOptim +if is_extended_tests() + opt_engine = :Optim @testset "Graph → ParameterTable | parts | Optim" begin include("by_parts.jl") end - semoptimizer = SemOptimizerNLopt + opt_engine = :NLopt @testset "Graph → ParameterTable | parts | NLopt" begin include("by_parts.jl") end diff --git a/test/examples/proximal/l0.jl b/test/examples/proximal/l0.jl new file mode 100644 index 000000000..374f8e58a --- /dev/null +++ b/test/examples/proximal/l0.jl @@ -0,0 +1,67 @@ +using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators + +# load data +dat = example_data("political_democracy") + +############################################################################ +### define models +############################################################################ + +observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +latent_vars = [:ind60, :dem60, :dem65] + +graph = @StenoGraph begin + ind60 → fixed(1) * x1 + x2 + x3 + dem60 → fixed(1) * y1 + y2 + y3 + y4 + dem65 → fixed(1) * y5 + y6 + y7 + y8 + + dem60 ← ind60 + dem65 ← dem60 + dem65 ← ind60 + + _(observed_vars) ↔ _(observed_vars) + _(latent_vars) ↔ _(latent_vars) + + y1 ↔ label(:cov_15) * y5 + y2 ↔ label(:cov_24) * y4 + label(:cov_26) * y6 + y3 ↔ label(:cov_37) * y7 + y4 ↔ label(:cov_48) * y8 + y6 ↔ label(:cov_68) * y8 +end + +partable = ParameterTable(graph; latent_vars = latent_vars, observed_vars = observed_vars) + +ram_mat = RAMMatrices(partable) + +model = Sem(specification = partable, data = dat, loss = SemML) + +sem_fit = fit(model) + +# use l0 from ProximalSEM +# regularized +prox_operator = + SlicedSeparableSum((NormL0(0.0), NormL0(0.02)), ([vcat(1:15, 21:31)], [12:20])) + +model_prox = Sem(specification = partable, data = dat, loss = SemML) + +fit_prox = fit(model_prox, engine = :Proximal, operator_g = prox_operator) + +@testset "l0 | solution_unregularized" begin + @test fit_prox.optimization_result.result[:iterations] < 1000 + @test maximum(abs.(solution(sem_fit) - solution(fit_prox))) < 0.002 +end + +# regularized +prox_operator = SlicedSeparableSum((NormL0(0.0), NormL0(100.0)), ([1:30], [31])) + +model_prox = Sem(specification = partable, data = dat, loss = SemML) + +fit_prox = fit(model_prox, engine = :Proximal, operator_g = prox_operator) + +@testset "l0 | solution_regularized" begin + @test fit_prox.optimization_result.result[:iterations] < 1000 + @test solution(fit_prox)[31] == 0.0 + @test abs( + StructuralEquationModels.minimum(fit_prox) - StructuralEquationModels.minimum(sem_fit), + ) < 1.0 +end diff --git a/test/examples/proximal/lasso.jl b/test/examples/proximal/lasso.jl new file mode 100644 index 000000000..beb5cf529 --- /dev/null +++ b/test/examples/proximal/lasso.jl @@ -0,0 +1,64 @@ +using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators + +# load data +dat = example_data("political_democracy") + +############################################################################ +### define models +############################################################################ + +observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +latent_vars = [:ind60, :dem60, :dem65] + +graph = @StenoGraph begin + ind60 → fixed(1) * x1 + x2 + x3 + dem60 → fixed(1) * y1 + y2 + y3 + y4 + dem65 → fixed(1) * y5 + y6 + y7 + y8 + + dem60 ← ind60 + dem65 ← dem60 + dem65 ← ind60 + + _(observed_vars) ↔ _(observed_vars) + _(latent_vars) ↔ _(latent_vars) + + y1 ↔ label(:cov_15) * y5 + y2 ↔ label(:cov_24) * y4 + label(:cov_26) * y6 + y3 ↔ label(:cov_37) * y7 + y4 ↔ label(:cov_48) * y8 + y6 ↔ label(:cov_68) * y8 +end + +partable = ParameterTable(graph, latent_vars = latent_vars, observed_vars = observed_vars) + +ram_mat = RAMMatrices(partable) + +model = Sem(specification = partable, data = dat, loss = SemML) + +sem_fit = fit(model) + +# use lasso from ProximalSEM +λ = zeros(31) + +model_prox = Sem(specification = partable, data = dat, loss = SemML) + +fit_prox = fit(model_prox, engine = :Proximal, operator_g = NormL1(λ)) + +@testset "lasso | solution_unregularized" begin + @test fit_prox.optimization_result.result[:iterations] < 1000 + @test maximum(abs.(solution(sem_fit) - solution(fit_prox))) < 0.002 +end + +λ = zeros(31); +λ[16:20] .= 0.02; + +model_prox = Sem(specification = partable, data = dat, loss = SemML) + +fit_prox = fit(model_prox, engine = :Proximal, operator_g = NormL1(λ)) + +@testset "lasso | solution_regularized" begin + @test fit_prox.optimization_result.result[:iterations] < 1000 + @test all(solution(fit_prox)[16:20] .< solution(sem_fit)[16:20]) + @test StructuralEquationModels.minimum(fit_prox) - + StructuralEquationModels.minimum(sem_fit) < 0.03 +end diff --git a/test/examples/proximal/proximal.jl b/test/examples/proximal/proximal.jl new file mode 100644 index 000000000..40e72a1ef --- /dev/null +++ b/test/examples/proximal/proximal.jl @@ -0,0 +1,9 @@ +@testset "Ridge" begin + include("ridge.jl") +end +@testset "Lasso" begin + include("lasso.jl") +end +@testset "L0" begin + include("l0.jl") +end diff --git a/test/examples/proximal/ridge.jl b/test/examples/proximal/ridge.jl new file mode 100644 index 000000000..fd7ae113d --- /dev/null +++ b/test/examples/proximal/ridge.jl @@ -0,0 +1,61 @@ +using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators, Suppressor + +# load data +dat = example_data("political_democracy") + +############################################################################ +### define models +############################################################################ + +observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] +latent_vars = [:ind60, :dem60, :dem65] + +graph = @StenoGraph begin + ind60 → fixed(1) * x1 + x2 + x3 + dem60 → fixed(1) * y1 + y2 + y3 + y4 + dem65 → fixed(1) * y5 + y6 + y7 + y8 + + dem60 ← ind60 + dem65 ← dem60 + dem65 ← ind60 + + _(observed_vars) ↔ _(observed_vars) + _(latent_vars) ↔ _(latent_vars) + + y1 ↔ label(:cov_15) * y5 + y2 ↔ label(:cov_24) * y4 + label(:cov_26) * y6 + y3 ↔ label(:cov_37) * y7 + y4 ↔ label(:cov_48) * y8 + y6 ↔ label(:cov_68) * y8 +end + +partable = ParameterTable(graph, latent_vars = latent_vars, observed_vars = observed_vars) + +ram_mat = RAMMatrices(partable) + +model = Sem(specification = partable, data = dat, loss = SemML) + +sem_fit = fit(model) + +# use ridge from StructuralEquationModels +model_ridge = Sem( + specification = partable, + data = dat, + loss = (SemML, SemRidge), + α_ridge = 0.02, + which_ridge = 16:20, +) + +solution_ridge = fit(model_ridge) + +# use ridge from ProximalSEM; SqrNormL2 uses λ/2 as penalty +λ = zeros(31); +λ[16:20] .= 0.04; + +model_prox = Sem(specification = partable, data = dat, loss = SemML) + +solution_prox = @suppress fit(model_prox, engine = :Proximal, operator_g = SqrNormL2(λ)) + +@testset "ridge_solution" begin + @test isapprox(solution_prox.solution, solution_ridge.solution; rtol = 1e-3) +end diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl index 68e44ce20..a3e426cbc 100644 --- a/test/examples/recover_parameters/recover_parameters_twofact.jl +++ b/test/examples/recover_parameters/recover_parameters_twofact.jl @@ -40,7 +40,7 @@ A = [ 0 0 0 0 0 0 0 0 ] -ram_matrices = RAMMatrices(; A = A, S = S, F = F, parameters = x, colnames = nothing) +ram_matrices = RAMMatrices(; A = A, S = S, F = F, param_labels = x, vars = nothing) true_val = [ repeat([1], 8) @@ -53,25 +53,26 @@ start = [ repeat([0.5], 4) ] -imply_ml = RAMSymbolic(; specification = ram_matrices, start_val = start) +implied_ml = RAMSymbolic(; specification = ram_matrices, start_val = start) -imply_ml.Σ_function(imply_ml.Σ, true_val) +implied_ml.Σ_function(implied_ml.Σ, true_val) -true_dist = MultivariateNormal(imply_ml.Σ) +true_dist = MultivariateNormal(implied_ml.Σ) Random.seed!(1234) -x = transpose(rand(true_dist, 100000)) +x = transpose(rand(true_dist, 100_000)) semobserved = SemObservedData(data = x, specification = nothing) -loss_ml = SemLoss(SemML(; observed = semobserved, n_par = length(start))) +loss_ml = SemLoss(SemML(; observed = semobserved, nparams = length(start))) + +model_ml = Sem(semobserved, implied_ml, loss_ml) +objective!(model_ml, true_val) optimizer = SemOptimizerOptim( BFGS(; linesearch = BackTracking(order = 3), alphaguess = InitialHagerZhang()),# m = 100), Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8), ) -model_ml = Sem(semobserved, imply_ml, loss_ml, optimizer) -objective!(model_ml, true_val) -solution_ml = sem_fit(model_ml) +solution_ml = fit(optimizer, model_ml) @test true_val ≈ solution(solution_ml) atol = 0.05 diff --git a/test/runtests.jl b/test/runtests.jl index c3b15475f..28d2142b1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,3 @@ end @time @safetestset "Example Models" begin include("examples/examples.jl") end - -if !haskey(ENV, "JULIA_EXTENDED_TESTS") || ENV["JULIA_EXTENDED_TESTS"] == "true" -end diff --git a/test/unit_tests/StatsAPI.jl b/test/unit_tests/StatsAPI.jl new file mode 100644 index 000000000..8648fc363 --- /dev/null +++ b/test/unit_tests/StatsAPI.jl @@ -0,0 +1,29 @@ +using StructuralEquationModels +graph = @StenoGraph begin + a → b +end +partable = ParameterTable(graph, observed_vars = [:a, :b], latent_vars = Symbol[]) +update_partable!(partable, :estimate, param_labels(partable), [3.1415]) +data = randn(100, 2) +model = Sem( + specification = partable, + data = data +) +model_fit = fit(model) + +@testset "params" begin + out = [NaN] + StructuralEquationModels.params!(out, partable) + @test params(partable) == out == [3.1415] == coef(partable) +end +@testset "param_labels" begin + @test param_labels(partable) == [:θ_1] == coefnames(partable) +end + +@testset "nobs" begin + @test nobs(model) == nsamples(model) +end + +@testset "coeftable" begin + @test_throws "StructuralEquationModels does not support" coeftable(model) +end \ No newline at end of file diff --git a/test/unit_tests/bootstrap.jl b/test/unit_tests/bootstrap.jl index f30092865..a2d5b6832 100644 --- a/test/unit_tests/bootstrap.jl +++ b/test/unit_tests/bootstrap.jl @@ -1,4 +1,4 @@ -solution_ml = sem_fit(model_ml) +solution_ml = fit(model_ml) bs = se_bootstrap(solution_ml; n_boot = 20) update_se_hessian!(partable, solution_ml) diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl index 485cf82d2..183b067f5 100644 --- a/test/unit_tests/data_input_formats.jl +++ b/test/unit_tests/data_input_formats.jl @@ -1,428 +1,496 @@ -using StructuralEquationModels, Test, Statistics -using StructuralEquationModels: obs_cov, obs_mean, get_data +using StructuralEquationModels, Test, Statistics, Suppressor + ### model specification -------------------------------------------------------------------- -spec = ParameterTable(nothing) -spec.variables[:observed_vars] = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8] -spec.variables[:latent_vars] = [:ind60, :dem60, :dem65] +spec = ParameterTable( + observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8], + latent_vars = [:ind60, :dem60, :dem65], +) + +# specification with non-existent observed var z1 +wrong_spec = ParameterTable( + observed_vars = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :z1], + latent_vars = [:ind60, :dem60, :dem65], +) ### data ----------------------------------------------------------------------------------- dat = example_data("political_democracy") dat_missing = example_data("political_democracy_missing")[:, names(dat)] +@assert Symbol.(names(dat)) == observed_vars(spec) + dat_matrix = Matrix(dat) dat_missing_matrix = Matrix(dat_missing) dat_cov = Statistics.cov(dat_matrix) dat_mean = vcat(Statistics.mean(dat_matrix, dims = 1)...) -############################################################################################ -### tests - SemObservedData -############################################################################################ - -# w.o. means ------------------------------------------------------------------------------- - -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedData(specification = spec, data = dat, obs_colnames = Symbol.(names(dat))) -end - -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedData(specification = spec, data = dat_matrix) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedData(specification = spec, data = dat_matrix, obs_colnames = names(dat)) -end - -@test_throws UndefKeywordError(:data) SemObservedData(specification = spec) - -@test_throws UndefKeywordError(:specification) SemObservedData(data = dat_matrix) - -# should work -observed = SemObservedData(specification = spec, data = dat) - -observed_nospec = SemObservedData(specification = nothing, data = dat_matrix) - -observed_matrix = SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = Symbol.(names(dat)), -) - -all_equal_cov = - (obs_cov(observed) == obs_cov(observed_nospec)) & - (obs_cov(observed) == obs_cov(observed_matrix)) - -all_equal_data = - (get_data(observed) == get_data(observed_nospec)) & - (get_data(observed) == get_data(observed_matrix)) - -@testset "unit tests | SemObservedData | input formats" begin - @test all_equal_cov - @test all_equal_data -end - # shuffle variables new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat = dat[:, new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -observed_shuffle = SemObservedData(specification = spec, data = shuffle_dat) - -observed_matrix_shuffle = SemObservedData( - specification = spec, - data = shuffle_dat_matrix, - obs_colnames = shuffle_names, -) - -all_equal_cov_suffled = - (obs_cov(observed) == obs_cov(observed_shuffle)) & - (obs_cov(observed) == obs_cov(observed_matrix_shuffle)) - -all_equal_data_suffled = - (get_data(observed) == get_data(observed_shuffle)) & - (get_data(observed) == get_data(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedData | input formats shuffled " begin - @test all_equal_cov_suffled - @test all_equal_data_suffled -end - -# with means ------------------------------------------------------------------------------- - -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedData( - specification = spec, - data = dat, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, - ) -end - -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedData(specification = spec, data = dat_matrix, meanstructure = true) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = names(dat), - meanstructure = true, - ) -end - -@test_throws UndefKeywordError(:data) SemObservedData( - specification = spec, - meanstructure = true, -) - -@test_throws UndefKeywordError(:specification) SemObservedData( - data = dat_matrix, - meanstructure = true, -) - -# should work -observed = SemObservedData(specification = spec, data = dat, meanstructure = true) - -observed_nospec = - SemObservedData(specification = nothing, data = dat_matrix, meanstructure = true) +shuffle_names = names(dat)[new_order] -observed_matrix = SemObservedData( - specification = spec, - data = dat_matrix, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, +shuffle_spec = ParameterTable( + observed_vars = Symbol.(shuffle_names), + latent_vars = [:ind60, :dem60, :dem65], ) -all_equal_mean = - (obs_mean(observed) == obs_mean(observed_nospec)) & - (obs_mean(observed) == obs_mean(observed_matrix)) - -@testset "unit tests | SemObservedData | input formats - means" begin - @test all_equal_mean -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - shuffle_dat = dat[:, new_order] +shuffle_dat_missing = dat_missing[:, new_order] shuffle_dat_matrix = dat_matrix[:, new_order] +shuffle_dat_missing_matrix = dat_missing_matrix[:, new_order] -observed_shuffle = - SemObservedData(specification = spec, data = shuffle_dat, meanstructure = true) - -observed_matrix_shuffle = SemObservedData( - specification = spec, - data = shuffle_dat_matrix, - obs_colnames = shuffle_names, - meanstructure = true, +shuffle_dat_cov = cov(shuffle_dat_matrix) +shuffle_dat_mean = vec(mean(shuffle_dat_matrix, dims = 1)) + +# common tests for SemObserved subtypes +function test_observed( + observed::SemObserved, + dat, + dat_matrix, + dat_cov, + dat_mean; + meanstructure::Bool, + approx_cov::Bool = false, ) - -all_equal_mean_suffled = - (obs_mean(observed) == obs_mean(observed_shuffle)) & - (obs_mean(observed) == obs_mean(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedData | input formats shuffled - mean" begin - @test all_equal_mean_suffled + if !isnothing(dat) + @test @inferred(nsamples(observed)) == size(dat, 1) + @test @inferred(nobserved_vars(observed)) == size(dat, 2) + @test @inferred(observed_vars(observed)) == Symbol.(names(dat)) + end + + if !isnothing(dat_matrix) + @test @inferred(nsamples(observed)) == size(dat_matrix, 1) + + if any(ismissing, dat_matrix) + @test isequal(@inferred(samples(observed)), dat_matrix) + else + @test @inferred(samples(observed)) == dat_matrix + end + end + + if !isnothing(dat_cov) + if any(ismissing, dat_cov) + @test isequal(@inferred(obs_cov(observed)), dat_cov) + else + if approx_cov + @test @inferred(obs_cov(observed)) ≈ dat_cov + else + @test @inferred(obs_cov(observed)) == dat_cov + end + end + end + + # FIXME actually, SemObserved should not use meanstructure and always provide obs_mean() + # since meanstructure belongs to the implied part of a SEM model + if meanstructure + if !isnothing(dat_mean) + if any(ismissing, dat_mean) + @test isequal(@inferred(obs_mean(observed)), dat_mean) + else + @test @inferred(obs_mean(observed)) == dat_mean + end + else + # FIXME @inferred is broken for EM cov/mean since it may return nothing if EM was not run + @test @inferred(obs_mean(observed)) isa AbstractVector{Float64} broken = true # EM-based means + end + else + @test @inferred(obs_mean(observed)) === nothing skip = true + end end ############################################################################################ -### tests - SemObservedCovariance -############################################################################################ - -# w.o. means ------------------------------------------------------------------------------- - -# errors - -@test_throws ArgumentError("observed means were passed, but `meanstructure = false`") begin - SemObservedCovariance( - specification = nothing, - obs_cov = dat_cov, - obs_mean = dat_mean, - n_obs = 75, - ) -end - -@test_throws UndefKeywordError(:specification) SemObservedCovariance(obs_cov = dat_cov) - -@test_throws ArgumentError("no `obs_colnames` were specified") begin - SemObservedCovariance(specification = spec, obs_cov = dat_cov, n_obs = 75) -end - -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = names(dat), - n_obs = 75, - ) -end - -# should work -observed = SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = obs_colnames = Symbol.(names(dat)), - n_obs = 75, -) - -observed_nospec = - SemObservedCovariance(specification = nothing, obs_cov = dat_cov, n_obs = 75) - -all_equal_cov = (obs_cov(observed) == obs_cov(observed_nospec)) - -@testset "unit tests | SemObservedCovariance | input formats" begin - @test all_equal_cov - @test n_obs(observed) == 75 - @test n_obs(observed_nospec) == 75 -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -shuffle_dat_cov = Statistics.cov(shuffle_dat_matrix) - -observed_shuffle = SemObservedCovariance( - specification = spec, - obs_cov = shuffle_dat_cov, - obs_colnames = shuffle_names, - n_obs = 75, -) - -all_equal_cov_suffled = (obs_cov(observed) ≈ obs_cov(observed_shuffle)) - -@testset "unit tests | SemObservedCovariance | input formats shuffled " begin - @test all_equal_cov_suffled -end - -# with means ------------------------------------------------------------------------------- +@testset "SemObservedData" begin -# errors -@test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin - SemObservedCovariance( + # errors + obs_data_redundant = SemObservedData( specification = spec, - obs_cov = dat_cov, - meanstructure = true, - n_obs = 75, - ) -end - -@test_throws UndefKeywordError SemObservedCovariance( - data = dat_matrix, - meanstructure = true, -) - -@test_throws UndefKeywordError SemObservedCovariance( - obs_cov = dat_cov, - meanstructure = true, -) - -@test_throws ArgumentError("`meanstructure = true`, but no observed means were passed") begin - SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_colnames = Symbol.(names(dat)), - meanstructure = true, - n_obs = 75, + data = dat, + observed_vars = Symbol.(names(dat)), ) -end - -# should work -observed = SemObservedCovariance( - specification = spec, - obs_cov = dat_cov, - obs_mean = dat_mean, - obs_colnames = Symbol.(names(dat)), - n_obs = 75, - meanstructure = true, -) - -observed_nospec = SemObservedCovariance( - specification = nothing, - obs_cov = dat_cov, - obs_mean = dat_mean, - meanstructure = true, - n_obs = 75, -) - -all_equal_mean = (obs_mean(observed) == obs_mean(observed_nospec)) - -@testset "unit tests | SemObservedCovariance | input formats - means" begin - @test all_equal_mean -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat = dat[:, new_order] - -shuffle_dat_matrix = dat_matrix[:, new_order] - -shuffle_dat_cov = Statistics.cov(shuffle_dat_matrix) -shuffle_dat_mean = vcat(Statistics.mean(shuffle_dat_matrix, dims = 1)...) + @test observed_vars(obs_data_redundant) == Symbol.(names(dat)) + @test observed_vars(obs_data_redundant) == observed_vars(spec) + + obs_data_spec = SemObservedData(specification = spec, data = dat_matrix) + @test observed_vars(obs_data_spec) == observed_vars(spec) + + obs_data_strnames = + SemObservedData(specification = spec, data = dat_matrix, observed_vars = names(dat)) + @test observed_vars(obs_data_strnames) == Symbol.(names(dat)) + + @test_throws UndefKeywordError(:data) SemObservedData(specification = spec) + + obs_data_nonames = SemObservedData(data = dat_matrix) + @test observed_vars(obs_data_nonames) == Symbol.("obs", 1:size(dat_matrix, 2)) + + obs_data_nonames2 = + SemObservedData(data = dat_matrix, observed_var_prefix = "observed_") + @test observed_vars(obs_data_nonames2) == Symbol.("observed_", 1:size(dat_matrix, 2)) + + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + observed = SemObservedData(specification = spec, data = dat; meanstructure) + + test_observed(observed, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + observed_nospec = + SemObservedData(specification = nothing, data = dat_matrix; meanstructure) + + test_observed( + observed_nospec, + nothing, + dat_matrix, + dat_cov, + dat_mean; + meanstructure, + ) + + observed_matrix = SemObservedData( + specification = spec, + data = dat_matrix, + observed_vars = Symbol.(names(dat)); + meanstructure, + ) + + test_observed(observed_matrix, dat, dat_matrix, dat_cov, dat_mean; meanstructure) + + # detect non-existing column + @test_throws "ArgumentError: column name \"z1\"" SemObservedData( + specification = wrong_spec, + data = shuffle_dat, + ) + + # detect non-existing observed_var + @test_throws "ArgumentError: observed_var \"z1\"" SemObservedData( + specification = wrong_spec, + data = shuffle_dat_matrix, + observed_vars = shuffle_names, + ) + + # cannot infer observed_vars + @test_throws "No data, specification or observed_vars provided" SemObservedData( + data = nothing, + ) + + if false # FIXME data = nothing is for simulation studies + # no data, just observed_vars + observed_nodata = + SemObservedData(data = nothing, observed_vars = Symbol.(names(dat))) + @test observed_nodata isa SemObservedData + @test @inferred(samples(observed_nodata)) === nothing + @test observed_vars(observed_nodata) == Symbol.(names(dat)) + end + + @test_warn "The order of variables in observed_vars" SemObservedData( + specification = spec, + data = shuffle_dat, + observed_vars = shuffle_names, + ) + + # spec takes precedence in obs_vars order + observed_spec = @suppress SemObservedData( + specification = spec, + data = shuffle_dat, + observed_vars = shuffle_names, + ) + + test_observed( + observed_spec, + dat, + dat_matrix, + dat_cov, + meanstructure ? dat_mean : nothing; + meanstructure, + ) + + observed_shuffle = + SemObservedData(specification = shuffle_spec, data = shuffle_dat; meanstructure) + + test_observed( + observed_shuffle, + shuffle_dat, + shuffle_dat_matrix, + shuffle_dat_cov, + meanstructure ? shuffle_dat_mean : nothing; + meanstructure, + ) + + observed_matrix_shuffle = SemObservedData( + specification = shuffle_spec, + data = shuffle_dat_matrix, + observed_vars = shuffle_names; + meanstructure, + ) + + test_observed( + observed_matrix_shuffle, + shuffle_dat, + shuffle_dat_matrix, + shuffle_dat_cov, + meanstructure ? shuffle_dat_mean : nothing; + meanstructure, + ) + end # meanstructure +end # SemObservedData -observed_shuffle = SemObservedCovariance( - specification = spec, - obs_cov = shuffle_dat_cov, - obs_mean = shuffle_dat_mean, - obs_colnames = shuffle_names, - n_obs = 75, - meanstructure = true, -) - -all_equal_mean_suffled = (obs_mean(observed) == obs_mean(observed_shuffle)) +############################################################################################ -@testset "unit tests | SemObservedCovariance | input formats shuffled - mean" begin - @test all_equal_mean_suffled -end +@testset "SemObservedCovariance" begin + + # errors + + @test_throws UndefKeywordError(:nsamples) SemObservedCovariance(obs_cov = dat_cov) + + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + + # errors + @test_throws UndefKeywordError SemObservedCovariance( + obs_cov = dat_cov; + meanstructure, + ) + + @test_throws UndefKeywordError SemObservedCovariance( + data = dat_matrix; + meanstructure, + ) + + # default vars + observed_nonames = SemObservedCovariance( + obs_cov = dat_cov, + obs_mean = meanstructure ? dat_mean : nothing, + nsamples = size(dat, 1), + ) + @test observed_vars(observed_nonames) == Symbol.("obs", 1:size(dat_cov, 2)) + + @test_throws DimensionMismatch SemObservedCovariance( + obs_cov = dat_cov, + observed_vars = Symbol.("obs", 1:(size(dat_cov, 2)+1)), + nsamples = size(dat, 1), + ) + + observed = SemObservedCovariance( + specification = spec, + obs_cov = dat_cov, + obs_mean = meanstructure ? dat_mean : nothing, + observed_vars = Symbol.(names(dat)), + nsamples = size(dat, 1), + meanstructure = meanstructure, + ) + + test_observed( + observed, + dat, + nothing, + dat_cov, + dat_mean; + meanstructure, + approx_cov = true, + ) + + @test @inferred(samples(observed)) === nothing + + observed_nospec = SemObservedCovariance( + specification = nothing, + obs_cov = dat_cov, + obs_mean = meanstructure ? dat_mean : nothing, + nsamples = size(dat, 1); + meanstructure, + ) + + test_observed( + observed_nospec, + nothing, + nothing, + dat_cov, + dat_mean; + meanstructure, + approx_cov = true, + ) + + @test @inferred(samples(observed_nospec)) === nothing + + # detect non-existing observed_var + @test_throws "ArgumentError: observed_var \"z1\"" SemObservedCovariance( + specification = wrong_spec, + obs_cov = shuffle_dat_cov, + observed_vars = shuffle_names, + nsamples = size(dat, 1), + ) + + # spec takes precedence in obs_vars order + observed_spec = SemObservedCovariance( + specification = spec, + obs_cov = shuffle_dat_cov, + obs_mean = meanstructure ? shuffle_dat_mean : nothing, + observed_vars = shuffle_names, + nsamples = size(dat, 1), + ) + + test_observed( + observed_spec, + dat, + nothing, + dat_cov, + meanstructure ? dat_mean : nothing; + meanstructure, + approx_cov = true, + ) + + observed_shuffle = SemObservedCovariance( + specification = shuffle_spec, + obs_cov = shuffle_dat_cov, + obs_mean = meanstructure ? shuffle_dat_mean : nothing, + observed_vars = shuffle_names, + nsamples = size(dat, 1); + meanstructure, + ) + + test_observed( + observed_shuffle, + shuffle_dat, + nothing, + shuffle_dat_cov, + meanstructure ? shuffle_dat_mean : nothing; + meanstructure, + approx_cov = true, + ) + end # meanstructure +end # SemObservedCovariance ############################################################################################ -### tests - SemObservedMissing -############################################################################################ -# errors -@test_throws ArgumentError( - "You passed your data as a `DataFrame`, but also specified `obs_colnames`. " * - "Please make sure the column names of your data frame indicate the correct variables " * - "or pass your data in a different format.", -) begin - SemObservedMissing( +@testset "SemObservedMissing" begin + + # errors + observed_redundant_names = SemObservedMissing( specification = spec, data = dat_missing, - obs_colnames = Symbol.(names(dat)), + observed_vars = Symbol.(names(dat)), ) -end + @test observed_vars(observed_redundant_names) == Symbol.(names(dat)) -@test_throws ArgumentError( - "Your `data` can not be indexed by symbols. " * - "Maybe you forgot to provide column names via the `obs_colnames = ...` argument.", -) begin - SemObservedMissing(specification = spec, data = dat_missing_matrix) -end + observed_spec_only = SemObservedMissing(specification = spec, data = dat_missing_matrix) + @test observed_vars(observed_spec_only) == observed_vars(spec) -@test_throws ArgumentError("please specify `obs_colnames` as a vector of Symbols") begin - SemObservedMissing( + observed_str_colnames = SemObservedMissing( specification = spec, data = dat_missing_matrix, - obs_colnames = names(dat), + observed_vars = names(dat), ) -end - -@test_throws UndefKeywordError(:data) SemObservedMissing(specification = spec) - -@test_throws UndefKeywordError(:specification) SemObservedMissing(data = dat_missing_matrix) - -# should work -observed = SemObservedMissing(specification = spec, data = dat_missing) - -observed_nospec = SemObservedMissing(specification = nothing, data = dat_missing_matrix) - -observed_matrix = SemObservedMissing( - specification = spec, - data = dat_missing_matrix, - obs_colnames = Symbol.(names(dat)), -) - -all_equal_data = - isequal(get_data(observed), get_data(observed_nospec)) & - isequal(get_data(observed), get_data(observed_matrix)) - -@testset "unit tests | SemObservedMissing | input formats" begin - @test all_equal_data -end - -# shuffle variables -new_order = [3, 2, 7, 8, 5, 6, 9, 11, 1, 10, 4] - -shuffle_names = Symbol.(names(dat))[new_order] - -shuffle_dat_missing = dat_missing[:, new_order] - -shuffle_dat_missing_matrix = dat_missing_matrix[:, new_order] - -observed_shuffle = SemObservedMissing(specification = spec, data = shuffle_dat_missing) - -observed_matrix_shuffle = SemObservedMissing( - specification = spec, - data = shuffle_dat_missing_matrix, - obs_colnames = shuffle_names, -) - -all_equal_data_shuffled = - isequal(get_data(observed), get_data(observed_shuffle)) & - isequal(get_data(observed), get_data(observed_matrix_shuffle)) - -@testset "unit tests | SemObservedMissing | input formats shuffled " begin - @test all_equal_data_suffled -end + @test observed_vars(observed_str_colnames) == Symbol.(names(dat)) + + @test_throws UndefKeywordError(:data) SemObservedMissing(specification = spec) + + observed_no_names = SemObservedMissing(data = dat_missing_matrix) + @test observed_vars(observed_no_names) == Symbol.(:obs, 1:size(dat_missing_matrix, 2)) + + observed_no_names2 = + SemObservedMissing(data = dat_missing_matrix, observed_var_prefix = "observed_") + @test observed_vars(observed_no_names2) == Symbol.("observed_", 1:size(dat_matrix, 2)) + + @testset "meanstructure=$meanstructure" for meanstructure in (false, true) + observed = + SemObservedMissing(specification = spec, data = dat_missing; meanstructure) + + test_observed( + observed, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + @test @inferred(length(observed.patterns)) == 55 + @test sum(@inferred(nsamples(pat)) for pat in observed.patterns) == + size(dat_missing, 1) + @test all(nsamples(pat) <= size(dat_missing, 2) for pat in observed.patterns) + + observed_nospec = SemObservedMissing( + specification = nothing, + data = dat_missing_matrix; + meanstructure, + ) + + test_observed( + observed_nospec, + nothing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_matrix = SemObservedMissing( + specification = spec, + data = dat_missing_matrix, + observed_vars = Symbol.(names(dat)), + ) + + test_observed( + observed_matrix, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + # detect non-existing column + @test_throws "ArgumentError: column name \"z1\"" SemObservedMissing( + specification = wrong_spec, + data = shuffle_dat, + ) + + # detect non-existing observed_var + @test_throws "ArgumentError: observed_var \"z1\"" SemObservedMissing( + specification = wrong_spec, + data = shuffle_dat_missing_matrix, + observed_vars = shuffle_names, + ) + + # spec takes precedence in obs_vars order + observed_spec = @suppress SemObservedMissing( + specification = spec, + observed_vars = shuffle_names, + data = shuffle_dat_missing, + ) + + test_observed( + observed_spec, + dat_missing, + dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_shuffle = + SemObservedMissing(specification = shuffle_spec, data = shuffle_dat_missing) + + test_observed( + observed_shuffle, + shuffle_dat_missing, + shuffle_dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + + observed_matrix_shuffle = SemObservedMissing( + specification = shuffle_spec, + data = shuffle_dat_missing_matrix, + observed_vars = shuffle_names, + ) + + test_observed( + observed_matrix_shuffle, + shuffle_dat_missing, + shuffle_dat_missing_matrix, + nothing, + nothing; + meanstructure, + ) + end # meanstructure +end # SemObservedMissing diff --git a/test/unit_tests/matrix_helpers.jl b/test/unit_tests/matrix_helpers.jl new file mode 100644 index 000000000..b2f32f31a --- /dev/null +++ b/test/unit_tests/matrix_helpers.jl @@ -0,0 +1,49 @@ +using StructuralEquationModels, Test, Random, SparseArrays, LinearAlgebra +using StructuralEquationModels: + CommutationMatrix, transpose_linear_indices, duplication_matrix, elimination_matrix + +Random.seed!(73721) + +n = 4 +m = 5 + +@testset "Commutation matrix" begin + # transpose linear indices + A = rand(n, m) + @test reshape(A[transpose_linear_indices(n, m)], m, n) == A' + # commutation matrix multiplication + K = CommutationMatrix(n) + # test K array interface methods + @test size(K) == (n^2, n^2) + @test size(K, 1) == n^2 + @test length(K) == n^4 + nn_linind = LinearIndices((n, n)) + @test K[nn_linind[3, 2], nn_linind[2, 3]] == 1 + @test K[nn_linind[3, 2], nn_linind[3, 2]] == 0 + + B = rand(n, n) + @test_throws DimensionMismatch K * rand(n, m) + @test K * vec(B) == vec(B') + C = sprand(n, n, 0.5) + @test K * vec(C) == vec(C') + # lmul! + D = sprand(n^2, n^2, 0.1) + E = copy(D) + F = Matrix(E) + lmul!(K, D) + @test D == K * E + @test Matrix(D) == K * F +end + +@testset "Duplication / elimination matrix" begin + A = rand(m, m) + A = A * A' + + # dupication + D = duplication_matrix(m) + @test D * A[tril(trues(size(A)))] == vec(A) + + # elimination + E = elimination_matrix(m) + @test E * vec(A) == A[tril(trues(size(A)))] +end diff --git a/test/unit_tests/model.jl b/test/unit_tests/model.jl new file mode 100644 index 000000000..2bf5dedaf --- /dev/null +++ b/test/unit_tests/model.jl @@ -0,0 +1,75 @@ +using StructuralEquationModels, Test, Statistics + +dat = example_data("political_democracy") +dat_missing = example_data("political_democracy_missing")[:, names(dat)] + +obs_vars = [Symbol.("x", 1:3); Symbol.("y", 1:8)] +lat_vars = [:ind60, :dem60, :dem65] + +graph = @StenoGraph begin + # loadings + ind60 → fixed(1) * x1 + x2 + x3 + dem60 → fixed(1) * y1 + y2 + y3 + y4 + dem65 → fixed(1) * y5 + y6 + y7 + y8 + # latent regressions + label(:a) * dem60 ← ind60 + dem65 ← dem60 + dem65 ← ind60 + # variances + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) + # covariances + y1 ↔ y5 + y2 ↔ y4 + y6 + y3 ↔ y7 + y8 ↔ y4 + y6 +end + + +ram_matrices = + RAMMatrices(ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars)) + +obs = SemObservedData(specification = ram_matrices, data = dat) + +function test_vars_api(semobj, spec::SemSpecification) + @test @inferred(nobserved_vars(semobj)) == nobserved_vars(spec) + @test observed_vars(semobj) == observed_vars(spec) + + @test @inferred(nlatent_vars(semobj)) == nlatent_vars(spec) + @test latent_vars(semobj) == latent_vars(spec) + + @test @inferred(nvars(semobj)) == nvars(spec) + @test vars(semobj) == vars(spec) +end + +function test_params_api(semobj, spec::SemSpecification) + @test @inferred(nparams(semobj)) == nparams(spec) + @test @inferred(param_labels(semobj)) == param_labels(spec) +end + +@testset "Sem(implied=$impliedtype, loss=$losstype)" for impliedtype in (RAM, RAMSymbolic), + losstype in (SemML, SemWLS) + + model = Sem( + specification = ram_matrices, + observed = obs, + implied = impliedtype, + loss = losstype, + ) + + @test model isa Sem + @test @inferred(implied(model)) isa impliedtype + @test @inferred(observed(model)) isa SemObserved + + test_vars_api(model, ram_matrices) + test_params_api(model, ram_matrices) + + test_vars_api(implied(model), ram_matrices) + test_params_api(implied(model), ram_matrices) + + @test @inferred(loss(model)) isa SemLoss + semloss = loss(model).functions[1] + @test semloss isa losstype + + @test @inferred(nsamples(model)) == nsamples(obs) +end diff --git a/test/unit_tests/sorting.jl b/test/unit_tests/sorting.jl index e573c6d22..3c61e13c4 100644 --- a/test/unit_tests/sorting.jl +++ b/test/unit_tests/sorting.jl @@ -1,17 +1,17 @@ ############################################################################ -### test sorting +### test variables sorting ############################################################################ -sort!(partable) +sort_vars!(partable) model_ml_sorted = Sem(specification = partable, data = dat) @testset "graph sorting" begin - @test model_ml_sorted.imply.I_A isa LowerTriangular + @test model_ml_sorted.implied.I_A isa LowerTriangular end @testset "ml_solution_sorted" begin - solution_ml_sorted = sem_fit(model_ml_sorted) + solution_ml_sorted = fit(model_ml_sorted) update_estimate!(partable, solution_ml_sorted) - @test SEM.compare_estimates(par_ml, partable, 0.01) + @test test_estimates(par_ml, partable, 0.01) end diff --git a/test/unit_tests/specification.jl b/test/unit_tests/specification.jl index 0bfc0de2d..b69230d7f 100644 --- a/test/unit_tests/specification.jl +++ b/test/unit_tests/specification.jl @@ -1,29 +1,141 @@ -@testset "ParameterTable - RAMMatrices conversion" begin - partable = ParameterTable(ram_matrices) - @test ram_matrices == RAMMatrices(partable) -end +using StructuralEquationModels -@test get_identifier_indices([:x2, :x10, :x28], model_ml) == [2, 10, 28] +obs_vars = Symbol.("x", 1:9) +lat_vars = [:visual, :textual, :speed] -@testset "get_identifier_indices" begin - pars = [:θ_1, :θ_7, :θ_21] - @test get_identifier_indices(pars, model_ml) == get_identifier_indices(pars, partable) - @test get_identifier_indices(pars, model_ml) == - get_identifier_indices(pars, RAMMatrices(partable)) +graph = @StenoGraph begin + # measurement model + visual → fixed(1.0) * x1 + fixed(0.5) * x2 + fixed(0.6) * x3 + textual → fixed(1.0) * x4 + x5 + label(:a₁) * x6 + speed → fixed(1.0) * x7 + fixed(1.0) * x8 + label(:λ₉) * x9 + # variances and covariances + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) + visual ↔ textual + speed + textual ↔ speed end -# from docstrings: -parameter_indices = get_identifier_indices([:λ₁, λ₂], my_fitted_sem) -values = solution(my_fitted_sem)[parameter_indices] - -graph = @StenoGraph begin +ens_graph = @StenoGraph begin # measurement model visual → fixed(1.0, 1.0) * x1 + fixed(0.5, 0.5) * x2 + fixed(0.6, 0.8) * x3 textual → fixed(1.0, 1.0) * x4 + x5 + label(:a₁, :a₂) * x6 speed → fixed(1.0, 1.0) * x7 + fixed(1.0, NaN) * x8 + label(:λ₉, :λ₉) * x9 # variances and covariances - _(observed_vars) ↔ _(observed_vars) - _(latent_vars) ↔ _(latent_vars) + _(obs_vars) ↔ _(obs_vars) + _(lat_vars) ↔ _(lat_vars) visual ↔ textual + speed textual ↔ speed end + +fixed_and_labeled_graph = @StenoGraph begin + # measurement model + visual → fixed(1.0) * label(:λ) * x1 +end + +@testset "ParameterTable" begin + @testset "from StenoGraph" begin + @test_throws UndefKeywordError(:observed_vars) ParameterTable(graph) + @test_throws UndefKeywordError(:latent_vars) ParameterTable( + graph, + observed_vars = obs_vars, + ) + @test_throws ArgumentError("It is not allowed to label fixed parameters.") ParameterTable( + fixed_and_labeled_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + ) + partable = @inferred( + ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + ) + + @test partable isa ParameterTable + + # vars API + @test observed_vars(partable) == obs_vars + @test nobserved_vars(partable) == length(obs_vars) + @test latent_vars(partable) == lat_vars + @test nlatent_vars(partable) == length(lat_vars) + @test nvars(partable) == length(obs_vars) + length(lat_vars) + @test issetequal(vars(partable), [obs_vars; lat_vars]) + + # param_labels API + @test param_labels(partable) == [[:θ_1, :a₁, :λ₉]; Symbol.("θ_", 2:16)] + @test nparams(partable) == 18 + + # don't allow constructing ParameterTable from a graph for an ensemble + @test_throws ArgumentError ParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + ) + end + + @testset "from RAMMatrices" begin + partable_orig = + ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + ram_matrices = RAMMatrices(partable_orig) + + partable = @inferred(ParameterTable(ram_matrices)) + @test partable isa ParameterTable + @test issetequal(keys(partable.columns), keys(partable_orig.columns)) + # FIXME nrow()? + @test length(partable.columns[:from]) == length(partable_orig.columns[:from]) + @test partable == partable_orig broken = true + end +end + +@testset "EnsembleParameterTable" begin + groups = [:Pasteur, :Grant_White], + @test_throws UndefKeywordError(:observed_vars) EnsembleParameterTable(ens_graph) + @test_throws UndefKeywordError(:latent_vars) EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + ) + @test_throws UndefKeywordError(:groups) EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + ) + + enspartable = @inferred( + EnsembleParameterTable( + ens_graph, + observed_vars = obs_vars, + latent_vars = lat_vars, + groups = [:Pasteur, :Grant_White], + ) + ) + @test enspartable isa EnsembleParameterTable + + @test nobserved_vars(enspartable) == length(obs_vars) broken = true + @test observed_vars(enspartable) == obs_vars broken = true + @test nlatent_vars(enspartable) == length(lat_vars) broken = true + @test latent_vars(enspartable) == lat_vars broken = true + @test nvars(enspartable) == length(obs_vars) + length(lat_vars) broken = true + @test issetequal(vars(enspartable), [obs_vars; lat_vars]) broken = true + + @test nparams(enspartable) == 36 + @test issetequal( + param_labels(enspartable), + [Symbol.("gPasteur_", 1:16); Symbol.("gGrant_White_", 1:17); [:a₁, :a₂, :λ₉]], + ) +end + +@testset "RAMMatrices" begin + partable = ParameterTable(graph, observed_vars = obs_vars, latent_vars = lat_vars) + + ram_matrices = @inferred(RAMMatrices(partable)) + @test ram_matrices isa RAMMatrices + + # vars API + @test nobserved_vars(ram_matrices) == length(obs_vars) + @test observed_vars(ram_matrices) == obs_vars + @test nlatent_vars(ram_matrices) == length(lat_vars) + @test latent_vars(ram_matrices) == lat_vars + @test nvars(ram_matrices) == length(obs_vars) + length(lat_vars) + @test issetequal(vars(ram_matrices), [obs_vars; lat_vars]) + + # param_labels API + @test nparams(ram_matrices) == nparams(partable) + @test param_labels(ram_matrices) == param_labels(partable) +end diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl index 87fdde2f1..7189addd4 100644 --- a/test/unit_tests/unit_tests.jl +++ b/test/unit_tests/unit_tests.jl @@ -1,9 +1,35 @@ using Test, SafeTestsets -@safetestset "Multithreading" begin - include("multithreading.jl") -end +# Define available test sets +available_tests = Dict( + "multithreading" => "Multithreading", + "matrix_helpers" => "Matrix algebra helper functions", + "data_input_formats" => "SemObserved", + "specification" => "SemSpecification", + "model" => "Sem model", + "StatsAPI" => "StatsAPI" +) + +# Determine which tests to run based on command-line arguments +selected_tests = isempty(ARGS) ? collect(keys(available_tests)) : ARGS -@safetestset "SemObs" begin - include("data_input_formats.jl") +@testset "All Tests" begin + for file in selected_tests + if haskey(available_tests, file) + let file_ = file, test_name = available_tests[file] + # Compute the literal values + test_sym = Symbol(file_) + file_jl = file_ * ".jl" + # Build the expression with no free variables: + ex = quote + @safetestset $(Symbol(test_sym)) = $test_name begin + include($file_jl) + end + end + eval(ex) + end + else + @warn "Test file '$file' not found in available tests. Skipping." + end + end end diff --git a/test/unit_tests/unit_tests_interactive.jl b/test/unit_tests/unit_tests_interactive.jl new file mode 100644 index 000000000..cf082fa60 --- /dev/null +++ b/test/unit_tests/unit_tests_interactive.jl @@ -0,0 +1,10 @@ +# requires: TestEnv to be installed globally, and the StructuralEquationModels package `]dev`ed +# example: julia test/unit_tests/unit_tests_interactive.jl matrix_helpers + +try + import TestEnv + TestEnv.activate("StructuralEquationModels") +catch e + @warn "Error initializing Test Env" exception=(e, catch_backtrace()) +end +include("unit_tests.jl") \ No newline at end of file