diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml index 5088d5b98d..6e98b31898 100644 --- a/.github/workflows/Benchmarking.yml +++ b/.github/workflows/Benchmarking.yml @@ -101,16 +101,12 @@ jobs: echo "" echo "## Benchmarks @ ${head_sha}" echo "" - echo "### Performance" + echo "**Performance Ratio:** gradient time divided by log-density time." echo "" - echo "Performance Ratio:" - echo "Ratio of time to compute gradient and time to compute log-density." - echo "Warning: results are very approximate! See [benchmark notes](https://github.com/TuringLang/DynamicPPL.jl/tree/main/benchmarks#interpreting-results) for more context." + echo "For very small models these ratios are noisy across runs and machines; raw primal and gradient timings are more reliable. The benchmarks are aimed at DynamicPPL developers and mainly catch obvious allocation or type-stability regressions. See [benchmark notes](https://github.com/TuringLang/DynamicPPL.jl/tree/main/benchmarks#interpreting-results) for details." echo "" cat head/results.md echo "" - echo "Rows marked \`*\` have \`t(logdensity)\` below about 100 ns; their ratios can be dominated by timer floor, fixed overhead, and run-to-run variation. For those rows, raw \`t(grad)\` is more meaningful than \`t(grad)/t(logdensity)\`." - echo "" if [[ "$main_status" == "success" ]]; then echo "
Main @ ${main_sha}" echo "" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 4dac08e984..c9766e0cfd 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -80,3 +80,37 @@ jobs: files: lcov.info token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: true + + reversediff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v3 + with: + version: "1" + + - uses: julia-actions/cache@v3 + + - name: Run AD with ReverseDiff on demo models + working-directory: test/ext/DynamicPPLReverseDiffExt + run: | + julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()' + julia --project=. --color=yes main.jl + + marginallogdensities: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v3 + with: + version: "1" + + - uses: julia-actions/cache@v3 + + - name: Run MarginalLogDensities integration tests + working-directory: test/ext/DynamicPPLMarginalLogDensitiesExt + run: | + julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()' + julia --project=. --color=yes main.jl diff --git a/.github/workflows/Enzyme.yml b/.github/workflows/Enzyme.yml index 5a1d3d069f..318b8ec9aa 100644 --- a/.github/workflows/Enzyme.yml +++ b/.github/workflows/Enzyme.yml @@ -29,7 +29,7 @@ jobs: - uses: julia-actions/cache@v3 - name: Run AD with Enzyme on demo models - working-directory: test/integration/enzyme + working-directory: test/ext/DynamicPPLEnzymeCoreExt run: | julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()' julia --project=. --color=yes main.jl diff --git a/AGENTS.md b/AGENTS.md index c051f5269f..639f7c0848 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,6 +14,7 @@ DynamicPPL builds on AbstractPPL.jl for shared PPL interfaces such as `VarName`, - CI also runs Aqua.jl quality checks and doctests. - Test files are self-contained: use package imports, not relative imports or `include()`, so they run individually with TestPicker.jl. + - Always refresh each environment (`Pkg.update()` / `up`) before tests or doc builds — a stale manifest can cause subtle resolution and loading issues. - Formatting is JuliaFormatter v1 (Blue style), enforced by CI: ```bash diff --git a/Project.toml b/Project.toml index cf086e19dc..07fdb81ea0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DynamicPPL" uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8" -version = "0.41.8" +version = "0.42" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -12,7 +12,6 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" @@ -44,22 +43,21 @@ DynamicPPLComponentArraysExt = ["ComponentArrays"] DynamicPPLEnzymeCoreExt = ["EnzymeCore"] DynamicPPLForwardDiffExt = ["ForwardDiff"] DynamicPPLMCMCChainsExt = ["MCMCChains"] +DynamicPPLMooncakeExt = ["Mooncake"] DynamicPPLMarginalLogDensitiesExt = ["MarginalLogDensities"] -DynamicPPLMooncakeExt = ["Mooncake", "DifferentiationInterface"] DynamicPPLReverseDiffExt = ["ReverseDiff"] [compat] ADTypes = "1" AbstractMCMC = "5.14" -AbstractPPL = "0.14.1" +AbstractPPL = "0.15" Accessors = "0.1" BangBang = "0.4.1" -Bijectors = "0.15.17" +Bijectors = "0.16" Chairmarks = "1.3.1" Compat = "4" ComponentArrays = "0.15" ConstructionBase = "1.5.4" -DifferentiationInterface = "0.6.41, 0.7" Distributions = "0.25" DocStringExtensions = "0.9" EnzymeCore = "0.6 - 0.8" diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml index fa50522545..8133937b0f 100644 --- a/benchmarks/Project.toml +++ b/benchmarks/Project.toml @@ -21,12 +21,12 @@ DynamicPPL = {path = ".."} [compat] ADTypes = "1.14.0" -AbstractPPL = "0.14" -Bijectors = "0.15.17" +AbstractPPL = "0.15" +Bijectors = "0.16" Chairmarks = "1.3.1" DifferentiationInterface = "0.7" Distributions = "0.25.117" -DynamicPPL = "0.41" +DynamicPPL = "0.42" Enzyme = "0.13" ForwardDiff = "1" LogDensityProblems = "2.1.2" diff --git a/benchmarks/README.md b/benchmarks/README.md index 5c266a2d43..dc6651f706 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -21,10 +21,10 @@ divided by `t(logdensity)`. For example, a value of `10` means computing the gradient takes 10 times as long as evaluating the log-density. Lower is better. `err` means the backend errored on that model. -If `t(logdensity)` is below about 100 ns, ratios are often dominated by timer -floor and fixed overhead. For those rows, raw `t(grad)` is more meaningful than -`t(grad)/t(logdensity)`. These microbenchmarks can also vary noticeably across -runs. +Rows marked `*` have `t(logdensity)` below about 100 ns; their ratios can be +dominated by timer floor, fixed overhead, and run-to-run variation. For those +rows, raw `t(grad)` is more meaningful than `t(grad)/t(logdensity)`. These +microbenchmarks can also vary noticeably across runs and machines. The CI comment shows the PR head table first and, when available, includes a collapsed `main` table for comparison. Treat the numbers as approximate and use diff --git a/docs/Project.toml b/docs/Project.toml index 288ae162ab..17659122c0 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -25,10 +25,10 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] ADTypes = "1" AbstractMCMC = "5" -AbstractPPL = "0.14" +AbstractPPL = "0.15" Accessors = "0.1" BangBang = "0.4" -Bijectors = "0.15.17" +Bijectors = "0.16" Chairmarks = "1" ChangesOfVariables = "0.1" DimensionalData = "0.30" @@ -36,7 +36,7 @@ Distributions = "0.25" Documenter = "1" DocumenterInterLinks = "1" DocumenterMermaid = "0.1, 0.2" -DynamicPPL = "0.41" +DynamicPPL = "0.42" FillArrays = "0.13, 1" ForwardDiff = "0.10, 1" LogDensityProblems = "2" diff --git a/ext/DynamicPPLMarginalLogDensitiesExt.jl b/ext/DynamicPPLMarginalLogDensitiesExt.jl index 714a3be2d2..13e512f139 100644 --- a/ext/DynamicPPLMarginalLogDensitiesExt.jl +++ b/ext/DynamicPPLMarginalLogDensitiesExt.jl @@ -158,7 +158,7 @@ accs = DynamicPPL.OnlyAccsVarInfo(( DynamicPPL.RawValueAccumulator(false), # ... whatever else you need )) -_, accs = DynamicPPL.init!!(rng, model, oavi, init_strategy, DynamicPPL.UnlinkAll()) +_, accs = DynamicPPL.init!!(rng, model, accs, init_strategy, DynamicPPL.UnlinkAll()) ``` You can then extract all the updated data from `accs` using DynamicPPL's existing API (see @@ -192,7 +192,7 @@ retcode: Success u: 1-element Vector{Float64}: 4.88281250001733e-5 -julia> # Get the an initialisation strategy representing the mode of `y`. +julia> # Get an initialisation strategy representing the mode of `y`. init_strategy = InitFromVector(mld, opt_solution.u); julia> # Evaluate the model with this initialisation strategy. diff --git a/ext/DynamicPPLMooncakeExt.jl b/ext/DynamicPPLMooncakeExt.jl index b876df575a..7a82e87fc3 100644 --- a/ext/DynamicPPLMooncakeExt.jl +++ b/ext/DynamicPPLMooncakeExt.jl @@ -1,6 +1,7 @@ module DynamicPPLMooncakeExt using DynamicPPL: DynamicPPL, is_transformed +using AbstractPPL: AbstractPPL using Mooncake: Mooncake # These are purely optimisations (although quite significant ones sometimes, especially for @@ -15,17 +16,21 @@ Mooncake.@zero_derivative Mooncake.DefaultCtx Tuple{ using DynamicPPL: @model, LinkAll, getlogjoint_internal, LogDensityFunction using ADTypes: AutoMooncake -import DifferentiationInterface using Distributions: Normal, InverseGamma, Beta using PrecompileTools: @setup_workload, @compile_workload @setup_workload begin @compile_workload begin - for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2)) - @model f() = x ~ dist - ldf = LogDensityFunction( - f(), getlogjoint_internal, LinkAll(); adtype=AutoMooncake() - ) - DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5]) + # Julia does not guarantee transitive extensions are loaded while this + # extension precompiles, so skip the workload unless Mooncake's + # AbstractPPL methods are already available. + if !isnothing(Base.get_extension(AbstractPPL, :AbstractPPLMooncakeExt)) + for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2)) + @model f() = x ~ dist + ldf = LogDensityFunction( + f(), getlogjoint_internal, LinkAll(); adtype=AutoMooncake() + ) + DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5]) + end end end end diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl index da9d5f22af..81f3730392 100644 --- a/src/logdensityfunction.jl +++ b/src/logdensityfunction.jl @@ -23,7 +23,6 @@ using ADTypes: ADTypes using BangBang: BangBang using AbstractPPL: AbstractPPL, VarName using LogDensityProblems: LogDensityProblems -import DifferentiationInterface as DI using Random: Random """ @@ -183,7 +182,7 @@ struct LogDensityFunction{ L<:AbstractTransformStrategy, F, VNT<:VarNamedTuple, - ADP<:Union{Nothing,DI.GradientPrep}, + ADP, # type of the vector passed to logdensity functions X<:AbstractVector, AC<:AccumulatorTuple, @@ -226,12 +225,14 @@ struct LogDensityFunction{ else # Make backend-specific tweaks to the adtype adtype = DynamicPPL.tweak_adtype(adtype, model, x) - args = (model, getlogdensity, ranges_and_transforms, transform_strategy, accs) - if _use_closure(adtype) - DI.prepare_gradient(LogDensityAt(args...), adtype, x) - else - DI.prepare_gradient(logdensity_at, adtype, x, map(DI.Constant, args)...) - end + context = ( + model, getlogdensity, ranges_and_transforms, transform_strategy, accs + ) + # `x` was just constructed from the same range metadata stored in `context`, + # so the AD wrapper can skip its hot-path dimension validation. + AbstractPPL.prepare( + adtype, logdensity_internal, x; check_dims=false, context=context + ) end return new{ typeof(model), @@ -459,7 +460,7 @@ ldf_accs(::typeof(getlogprior)) = AccumulatorTuple((LogPriorAccumulator(),)) ldf_accs(::typeof(getloglikelihood)) = AccumulatorTuple((LogLikelihoodAccumulator(),)) """ - logdensity_at( + logdensity_internal( params::AbstractVector{<:Real}, model::Model, getlogdensity::Any, @@ -469,9 +470,10 @@ ldf_accs(::typeof(getloglikelihood)) = AccumulatorTuple((LogLikelihoodAccumulato ) Calculate the log density at the given `params`, using the provided information extracted -from a `LogDensityFunction`. +from a `LogDensityFunction`. This is the internal implementation behind +`LogDensityProblems.logdensity(ldf, params)`. """ -function logdensity_at( +function logdensity_internal( params::AbstractVector{<:Real}, model::Model, getlogdensity::Any, @@ -486,46 +488,48 @@ function logdensity_at( return getlogdensity(vi) end +# Backwards-compatible alias for the previous name. +const logdensity_at = logdensity_internal + """ LogDensityAt( model::Model, - getlogdensity::Any, + getlogdensity, varname_ranges::VarNamedTuple, transform_strategy::AbstractTransformStrategy, accs::AccumulatorTuple, ) -A callable struct that behaves in the same way as `logdensity_at`, but stores the model and -other information internally. Having two separate functions/structs allows for better -performance with AD backends. +!!! warning "Deprecated" + `LogDensityAt` is retained as a compatibility shim and emits a deprecation + warning. It returns an `AbstractPPL.Evaluators.VectorEvaluator` whose call + forwards to [`DynamicPPL.logdensity_internal`](@ref). New code should call + `AbstractPPL.prepare(logdensity_internal, x; context=...)` directly. """ -struct LogDensityAt{ - M<:Model,F,V<:VarNamedTuple,L<:AbstractTransformStrategy,A<:AccumulatorTuple -} - model::M - getlogdensity::F - varname_ranges::V - transform_strategy::L - accs::A - - function LogDensityAt( - model::M, getlogdensity::F, varname_ranges::V, transform_strategy::L, accs::A - ) where {M,F,V,L,A} - return new{M,F,V,L,A}( - model, getlogdensity, varname_ranges, transform_strategy, accs - ) - end -end -function (f::LogDensityAt)(params::AbstractVector{<:Real}) - return logdensity_at( - params, f.model, f.getlogdensity, f.varname_ranges, f.transform_strategy, f.accs +function LogDensityAt( + model::Model, + getlogdensity, + varname_ranges::VarNamedTuple, + transform_strategy::AbstractTransformStrategy, + accs::AccumulatorTuple, +) + Base.depwarn( + "`DynamicPPL.LogDensityAt` is deprecated; call " * + "`AbstractPPL.prepare(DynamicPPL.logdensity_internal, x; context=...)` " * + "instead.", + :LogDensityAt, + ) + dim = mapreduce(rat -> length(rat.range), +, values(varname_ranges); init=0) + context = (model, getlogdensity, varname_ranges, transform_strategy, accs) + return AbstractPPL.prepare( + logdensity_internal, zeros(dim); check_dims=false, context=context ) end -function LogDensityProblems.logdensity( +@inline function LogDensityProblems.logdensity( ldf::LogDensityFunction, params::AbstractVector{<:Real} ) - return logdensity_at( + return logdensity_internal( params, ldf.model, ldf._getlogdensity, @@ -535,38 +539,14 @@ function LogDensityProblems.logdensity( ) end -function LogDensityProblems.logdensity_and_gradient( +@inline function LogDensityProblems.logdensity_and_gradient( ldf::LogDensityFunction, params::AbstractVector{<:Real} ) # `params` has to be converted to the same vector type that was used for AD preparation, # otherwise the preparation will not be valid. params = convert(get_input_vector_type(ldf), params) - return if _use_closure(ldf.adtype) - DI.value_and_gradient( - LogDensityAt( - ldf.model, - ldf._getlogdensity, - ldf._varname_ranges, - ldf.transform_strategy, - ldf._accs, - ), - ldf._adprep, - ldf.adtype, - params, - ) - else - DI.value_and_gradient( - logdensity_at, - ldf._adprep, - ldf.adtype, - params, - DI.Constant(ldf.model), - DI.Constant(ldf._getlogdensity), - DI.Constant(ldf._varname_ranges), - DI.Constant(ldf.transform_strategy), - DI.Constant(ldf._accs), - ) - end + logp, grad = AbstractPPL.value_and_gradient!!(ldf._adprep, params) + return (logp, copy(grad)) end function LogDensityProblems.capabilities(::Type{<:LogDensityFunction{M,Nothing}}) where {M} @@ -597,43 +577,6 @@ By default, this just returns the input unchanged. """ tweak_adtype(adtype::ADTypes.AbstractADType, ::Model, ::AbstractVector) = adtype -""" - _use_closure(adtype::ADTypes.AbstractADType) - -In LogDensityProblems, we want to calculate the derivative of `logdensity(f, x)` with -respect to x, where f is the model (in our case LogDensityFunction or its arguments ) and is -a constant. However, DifferentiationInterface generally expects a single-argument function -g(x) to differentiate. - -There are two ways of dealing with this: - -1. Construct a closure over the model, i.e. let g = Base.Fix1(logdensity, f) - -2. Use a constant DI.Context. This lets us pass a two-argument function to DI, as long as we - also give it the 'inactive argument' (i.e. the model) wrapped in `DI.Constant`. - -The relative performance of the two approaches, however, depends on the AD backend used. -Some benchmarks are provided here: https://github.com/TuringLang/DynamicPPL.jl/pull/1172 - -This function is used to determine whether a given AD backend should use a closure or a -constant. If `use_closure(adtype)` returns `true`, then the closure approach will be used. -By default, this function returns `false`, i.e. the constant approach will be used. -""" -# For these AD backends both closure and no closure work, but it is just faster to not use a -# closure (see link in the docstring). -_use_closure(::ADTypes.AutoForwardDiff) = false -_use_closure(::ADTypes.AutoMooncake) = false -_use_closure(::ADTypes.AutoMooncakeForward) = false -# For ReverseDiff, with the compiled tape, you _must_ use a closure because otherwise with -# DI.Constant arguments the tape will always be recompiled upon each call to -# value_and_gradient. For non-compiled ReverseDiff, it is faster to not use a closure. -_use_closure(::ADTypes.AutoReverseDiff{compile}) where {compile} = compile -# For AutoEnzyme it allows us to avoid setting function_annotation -_use_closure(::ADTypes.AutoEnzyme) = false -# Since for most backends it's faster to not use a closure, we set that as the default -# for unknown AD backends -_use_closure(::ADTypes.AbstractADType) = false - ###################################################### # Helper functions to extract ranges and link status # ###################################################### diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl index 606b7ecee5..7b879f3ac2 100644 --- a/src/test_utils/ad.jl +++ b/src/test_utils/ad.jl @@ -2,7 +2,6 @@ module AD using ADTypes: AbstractADType, AutoForwardDiff using Chairmarks: @be -import DifferentiationInterface as DI using DocStringExtensions using DynamicPPL: DynamicPPL, @@ -344,7 +343,7 @@ function run_ad( # Calculate log-density and gradient with the backend of interest value, grad = logdensity_and_gradient(ldf, params) - # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 + # Some AD backends (e.g. Enzyme) return non-Vector gradients; normalise to Vector. grad = collect(grad) verbose && println(" actual : $((value, grad))") @@ -362,7 +361,6 @@ function run_ad( model, getlogdensity, transform_strategy; adtype=test.adtype ) value_true, grad_true = logdensity_and_gradient(ldf_reference, params) - # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 grad_true = collect(grad_true) end # Perform testing diff --git a/src/transformed_values.jl b/src/transformed_values.jl index 6063a54173..61f2f31de2 100644 --- a/src/transformed_values.jl +++ b/src/transformed_values.jl @@ -99,8 +99,9 @@ get_internal_value(tv::TransformedValue) = tv.value Create a new `TransformedValue` with the same transformation as `tv`, but with internal value `new_val`. """ -set_internal_value(tv::TransformedValue, new_val) = - TransformedValue(new_val, get_transform(tv)) +function set_internal_value(tv::TransformedValue, new_val) + return TransformedValue(new_val, get_transform(tv)) +end """ DynamicPPL.get_raw_value(tv::TransformedValue) @@ -347,6 +348,9 @@ function apply_transform_strategy( fwd_transform = inverse(target.transform) transformed_value, logjac = with_logabsdet_jacobian(fwd_transform, raw_value) transformed_tv = TransformedValue(transformed_value, target) + # TODO: https://github.com/TuringLang/DynamicPPL.jl/issues/1407 + # Likely should return `logjac` rather than `logjac - inv_logjac`; the sibling + # branches all return only the target's forward Jacobian. (raw_value, transformed_tv, logjac - inv_logjac) else error("unknown target transform: $target") diff --git a/test/Project.toml b/test/Project.toml index 944c32102c..a14ce0bb1e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -14,32 +14,34 @@ DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" + +[sources] +DynamicPPL = {path = ".."} + [compat] ADTypes = "1" AbstractMCMC = "5.10" -AbstractPPL = "0.14" +AbstractPPL = "0.15" Accessors = "0.1" Aqua = "0.8" ComponentArrays = "0.15" BangBang = "0.4" -Bijectors = "0.15.17" +Bijectors = "0.16" Chairmarks = "1" Combinatorics = "1" DifferentiationInterface = "0.6.41, 0.7" @@ -50,10 +52,8 @@ ForwardDiff = "0.10.12, 1" InvertedIndices = "1" LogDensityProblems = "2" MCMCChains = "7.2.1" -MarginalLogDensities = "0.4" Mooncake = "0.4, 0.5" OffsetArrays = "1" OrderedCollections = "1" -ReverseDiff = "1" StableRNGs = "1" julia = "1.10" diff --git a/test/integration/enzyme/Project.toml b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml similarity index 56% rename from test/integration/enzyme/Project.toml rename to test/ext/DynamicPPLEnzymeCoreExt/Project.toml index c26655fae5..7a3d097ac0 100644 --- a/test/integration/enzyme/Project.toml +++ b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml @@ -1,9 +1,12 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [sources] -DynamicPPL = {path = "../../../"} +DynamicPPL = {path = "../../.."} diff --git a/test/integration/enzyme/main.jl b/test/ext/DynamicPPLEnzymeCoreExt/main.jl similarity index 91% rename from test/integration/enzyme/main.jl rename to test/ext/DynamicPPLEnzymeCoreExt/main.jl index ec589ae398..1a762f0fef 100644 --- a/test/integration/enzyme/main.jl +++ b/test/ext/DynamicPPLEnzymeCoreExt/main.jl @@ -1,6 +1,7 @@ using DynamicPPL.TestUtils: ALL_MODELS using DynamicPPL.TestUtils.AD: run_ad using ADTypes: AutoEnzyme +using DifferentiationInterface: DifferentiationInterface using Test: @test, @testset import Enzyme: set_runtime_activity, Forward, Reverse, Const using ForwardDiff: ForwardDiff # run_ad uses FD for correctness test diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml new file mode 100644 index 0000000000..c541036d16 --- /dev/null +++ b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml @@ -0,0 +1,19 @@ +[deps] +ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[sources] +DynamicPPL = {path = "../../.."} + +[compat] +ADTypes = "1" +Bijectors = "0.16" +Distributions = "0.25" +ForwardDiff = "0.10.12, 1" +MarginalLogDensities = "0.4" diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt.jl b/test/ext/DynamicPPLMarginalLogDensitiesExt/main.jl similarity index 100% rename from test/ext/DynamicPPLMarginalLogDensitiesExt.jl rename to test/ext/DynamicPPLMarginalLogDensitiesExt/main.jl diff --git a/test/ext/DynamicPPLReverseDiffExt/Project.toml b/test/ext/DynamicPPLReverseDiffExt/Project.toml new file mode 100644 index 0000000000..53dbe79151 --- /dev/null +++ b/test/ext/DynamicPPLReverseDiffExt/Project.toml @@ -0,0 +1,19 @@ +[deps] +ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[sources] +DynamicPPL = {path = "../../.."} + +[compat] +DifferentiationInterface = "0.6.41, 0.7" +ReverseDiff = "1" diff --git a/test/ext/DynamicPPLReverseDiffExt/main.jl b/test/ext/DynamicPPLReverseDiffExt/main.jl new file mode 100644 index 0000000000..44fffbadc9 --- /dev/null +++ b/test/ext/DynamicPPLReverseDiffExt/main.jl @@ -0,0 +1,50 @@ +using ADTypes: AutoReverseDiff +using DifferentiationInterface +using DynamicPPL +using DynamicPPL.TestUtils: ALL_MODELS +using DynamicPPL.TestUtils.AD: run_ad +using Distributions: Normal +using ForwardDiff: ForwardDiff # run_ad uses FD for correctness test +using LogDensityProblems: LogDensityProblems +using ReverseDiff: ReverseDiff +using Test: @test, @testset + +ADTYPES = ( + ("ReverseDiff", AutoReverseDiff(; compile=false)), + ("ReverseDiffCompiled", AutoReverseDiff(; compile=true)), +) + +@testset "$ad_key" for (ad_key, ad_type) in ADTYPES + @testset "$(model.f)" for model in ALL_MODELS + @test run_ad(model, ad_type) isa Any + end +end + +@testset "ReverseDiff compiled prep reduces repeated-call allocations" begin + @model f() = x ~ Normal() + ldf_compiled = LogDensityFunction( + f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=true) + ) + ldf_uncompiled = LogDensityFunction( + f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=false) + ) + params = rand(ldf_compiled) + + LogDensityProblems.logdensity_and_gradient(ldf_compiled, params) + LogDensityProblems.logdensity_and_gradient(ldf_uncompiled, params) + + function repeated_call_allocs(ldf, params) + GC.gc() + before = Base.gc_num() + for _ in 1:100 + LogDensityProblems.logdensity_and_gradient(ldf, params) + end + after = Base.gc_num() + return Base.GC_Diff(after, before).allocd + end + + allocs_compiled = repeated_call_allocs(ldf_compiled, params) + allocs_uncompiled = repeated_call_allocs(ldf_uncompiled, params) + + @test allocs_compiled < allocs_uncompiled +end diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml index 02a770fe70..5201c4ee0c 100644 --- a/test/floattypes/Project.toml +++ b/test/floattypes/Project.toml @@ -1,5 +1,8 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -7,4 +10,4 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [sources] -DynamicPPL = {path = "../../"} +DynamicPPL = {path = "../.."} diff --git a/test/floattypes/main.jl b/test/floattypes/main.jl index 235bf5b522..69974245a2 100644 --- a/test/floattypes/main.jl +++ b/test/floattypes/main.jl @@ -7,7 +7,8 @@ # # and this should be looped over for `f64`, `f32`, `f16`, and `min`. -using DynamicPPL, LogDensityProblems, ForwardDiff, Distributions, ADTypes, Test +using DynamicPPL, + DifferentiationInterface, LogDensityProblems, ForwardDiff, Distributions, ADTypes, Test function floattypestr_to_type(floattypestr) if floattypestr == "f64" diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl index ab50ef0d2a..8a5d37ed36 100644 --- a/test/logdensityfunction.jl +++ b/test/logdensityfunction.jl @@ -13,8 +13,8 @@ using LogDensityProblems: LogDensityProblems using Random: Xoshiro using StableRNGs: StableRNG +using DifferentiationInterface: DifferentiationInterface using ForwardDiff: ForwardDiff -using ReverseDiff: ReverseDiff using Mooncake: Mooncake @testset "LogDensityFunction: constructors" begin @@ -457,11 +457,7 @@ end # Used as the ground truth that others are compared against. ref_adtype = AutoForwardDiff() - test_adtypes = [ - AutoReverseDiff(; compile=false), - AutoReverseDiff(; compile=true), - AutoMooncake(; config=nothing), - ] + test_adtypes = [AutoForwardDiff(), AutoMooncake(; config=nothing)] @testset "Correctness" begin @testset "$(m.f)" for m in DynamicPPL.TestUtils.ALL_MODELS @@ -511,7 +507,7 @@ end return LogDensityProblems.logdensity_and_gradient(ldf, m[:]) end - @model function scalar_matrix_model(::Type{T}=Float64) where {T<:Real} + @model function scalar_matrix_model((::Type{T})=Float64) where {T<:Real} m = Matrix{T}(undef, 2, 3) return m ~ filldist(MvNormal(zeros(2), I), 3) end @@ -520,14 +516,14 @@ end scalar_matrix_model, test_m, ref_adtype ) - @model function matrix_model(::Type{T}=Matrix{Float64}) where {T} + @model function matrix_model((::Type{T})=Matrix{Float64}) where {T} m = T(undef, 2, 3) return m ~ filldist(MvNormal(zeros(2), I), 3) end matrix_model_reference = eval_logp_and_grad(matrix_model, test_m, ref_adtype) - @model function scalar_array_model(::Type{T}=Float64) where {T<:Real} + @model function scalar_array_model((::Type{T})=Float64) where {T<:Real} m = Array{T}(undef, 2, 3) return m ~ filldist(MvNormal(zeros(2), I), 3) end @@ -536,7 +532,7 @@ end scalar_array_model, test_m, ref_adtype ) - @model function array_model(::Type{T}=Array{Float64}) where {T} + @model function array_model((::Type{T})=Array{Float64}) where {T} m = T(undef, 2, 3) return m ~ filldist(MvNormal(zeros(2), I), 3) end @@ -595,4 +591,16 @@ end end end +@testset "LogDensityAt deprecation shim" begin + @model tiny() = x ~ Normal() + ldf = LogDensityFunction(tiny()) + vnt = DynamicPPL.get_all_ranges_and_transforms(ldf) + ts = ldf.transform_strategy + accs = ldf._accs + result = @test_logs (:warn, r"deprecated") DynamicPPL.LogDensityAt( + ldf.model, DynamicPPL.getlogjoint_internal, vnt, ts, accs + ) + @test result isa AbstractPPL.Evaluators.VectorEvaluator +end + end diff --git a/test/runtests.jl b/test/runtests.jl index 1ba744c3f8..bdd14521c4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -49,7 +49,6 @@ Random.seed!(100) include("transformed_values.jl") include("logdensityfunction.jl") @testset "extensions" begin - include("ext/DynamicPPLMarginalLogDensitiesExt.jl") include("ext/DynamicPPLMCMCChainsExt.jl") end @testset "ad" begin