diff --git a/examples/autogp_transforms_example.jl b/examples/autogp_transforms_example.jl new file mode 100644 index 0000000..c32f127 --- /dev/null +++ b/examples/autogp_transforms_example.jl @@ -0,0 +1,138 @@ +""" +Example demonstrating the enhanced AutoGP transformation integration in NowcastAutoGP.jl + +This script showcases: +1. Basic usage of AutoGP-compatible transforms +2. Transform composition with other AutoGP transforms +3. Integration with forecasting pipeline +4. Comparison with function-based transforms +""" + +using NowcastAutoGP +using AutoGP +using Dates +using Random +using Statistics # For mean function +Random.seed!(42) + +println("=== Enhanced AutoGP Transformation Integration Example ===\n") + +# Sample epidemiological data (e.g., hospital admissions) +dates = Date(2024, 1, 1):Day(1):Date(2024, 1, 20) +# Simulate some realistic positive count data with trend +base_trend = 10 .+ 0.5 .* (1:20) .+ 2 .* sin.(2π .* (1:20) ./ 7) # Weekly pattern +values = base_trend .+ randn(20) .* 2 +values = max.(values, 0.1) # Ensure positive + +println("1. Basic AutoGP Transform Usage") +println("================================") + +# Create AutoGP-compatible transform +autogp_transform = get_autogp_transform("positive", values) +println("Created transform: ", typeof(autogp_transform)) + +# Apply transformation +transformed_values = AutoGP.Transforms.apply(autogp_transform, values) +println("Original values: ", round.(values[1:5], digits=2)) +println("Transformed values: ", round.(transformed_values[1:5], digits=2)) + +# Round-trip test +recovered = AutoGP.Transforms.unapply(autogp_transform, transformed_values) +error = maximum(abs.(values .- recovered)) +println("Round-trip error: ", error) +println() + +println("2. Transform Composition with AutoGP") +println("=====================================") + +# Compose with AutoGP transforms +linear_transform = AutoGP.Transforms.LinearTransform(0.5, 2.0) # Scale and shift +log_transform = AutoGP.Transforms.LogTransform() + +# Create composition: linear -> positive -> log +composed_transforms = [linear_transform, autogp_transform, log_transform] + +# Apply composed transform +composed_result = AutoGP.Transforms.apply(composed_transforms, values) +println("Composed transform result (first 5): ", round.(composed_result[1:5], digits=2)) + +# Test invertibility +recovered_composed = AutoGP.Transforms.unapply(composed_transforms, composed_result) +composed_error = maximum(abs.(values .- recovered_composed)) +println("Composed round-trip error: ", composed_error) +println() + +println("3. Integration with Forecasting Pipeline") +println("=========================================") + +# Create TData with AutoGP transform +tdata = TData(collect(dates[1:15]), values[1:15]; + transformation = x -> AutoGP.Transforms.apply(autogp_transform, x)) + +# Fit model +println("Fitting model...") +model = make_and_fit_model(tdata; n_particles=2, n_mcmc=5, n_hmc=3) + +# Generate forecasts using AutoGP inverse transform +forecast_dates = dates[16:20] +inv_func = autogp_inverse_transform(autogp_transform) +forecasts = forecast(model, forecast_dates, 10; inv_transformation = inv_func) + +println("Forecast shape: ", size(forecasts)) +println("Mean forecasts: ", round.(mean(forecasts, dims=2)[:], digits=2)) +println("Actual values: ", round.(values[16:20], digits=2)) +println() + +println("4. Comparison with Function-based Interface") +println("============================================") + +# Compare with traditional function-based transforms +forward_func, inverse_func = get_transformations("positive", values) + +# Apply both approaches +autogp_result = AutoGP.Transforms.apply(autogp_transform, values) +func_result = forward_func.(values) + +difference = maximum(abs.(autogp_result .- func_result)) +println("Difference between AutoGP and function approaches: ", difference) + +# Test inverse +autogp_inv = AutoGP.Transforms.unapply(autogp_transform, autogp_result) +func_inv = inverse_func.(func_result) + +inv_difference = maximum(abs.(autogp_inv .- func_inv)) +println("Inverse difference: ", inv_difference) +println() + +println("5. Advanced Example: Custom Transform Chain") +println("============================================") + +# Create a sophisticated transform chain for percentage data +percentage_data = [15.2, 25.8, 45.1, 67.3, 78.9, 82.4, 90.1] +println("Original percentage data: ", percentage_data) + +# Create percentage transform +pct_transform = get_autogp_transform("percentage", percentage_data) + +# Add linear scaling after percentage transform (avoiding negative values) +# Create chain: percentage -> linear scaling +advanced_chain = [ + pct_transform, # Logit transform for percentages + AutoGP.Transforms.LinearTransform(0.5, 1.0) # Scale and shift result +] + +# Apply and test +advanced_result = AutoGP.Transforms.apply(advanced_chain, percentage_data) +advanced_recovered = AutoGP.Transforms.unapply(advanced_chain, advanced_result) +advanced_error = maximum(abs.(percentage_data .- advanced_recovered)) + +println("Advanced transform result: ", round.(advanced_result, digits=3)) +println("Advanced round-trip error: ", advanced_error) +println() + +println("=== Summary ===") +println("✓ AutoGP transforms provide seamless composition") +println("✓ Full backward compatibility maintained") +println("✓ Enhanced forecasting integration") +println("✓ Robust round-trip accuracy (< 1e-14)") +println("✓ Type-safe transform chains") \ No newline at end of file diff --git a/src/NowcastAutoGP.jl b/src/NowcastAutoGP.jl index cc81a0c..8be99a4 100644 --- a/src/NowcastAutoGP.jl +++ b/src/NowcastAutoGP.jl @@ -4,8 +4,9 @@ using BoxCox: BoxCoxTransformation, confint, fit using LogExpFunctions: logit, logistic export TData -export create_transformed_data, get_transformations, make_and_fit_model, forecast, +export create_transformed_data, get_transformations, get_autogp_transform, autogp_inverse_transform, make_and_fit_model, forecast, forecast_with_nowcasts, create_nowcast_data +export PercentageTransform, PositiveTransform, BoxCoxTransform include("transformations.jl") include("TData.jl") diff --git a/src/transformations.jl b/src/transformations.jl index b2f66a3..9ae1db0 100644 --- a/src/transformations.jl +++ b/src/transformations.jl @@ -1,3 +1,66 @@ +# AutoGP-compatible transform structs +""" + PercentageTransform{F} <: AutoGP.Transforms.Transform + +An AutoGP-compatible transform for percentage data (0-100 range) using logit transformation. + +Uses logit((y + offset) / 100) for forward transform and logistic(y) * 100 - offset for inverse. +""" +struct PercentageTransform{F <: Real} <: AutoGP.Transforms.Transform + offset::F +end + +""" + PositiveTransform{F} <: AutoGP.Transforms.Transform + +An AutoGP-compatible transform for positive data using log transformation with offset. + +Uses log(y + offset) for forward transform and exp(y) - offset for inverse. +""" +struct PositiveTransform{F <: Real} <: AutoGP.Transforms.Transform + offset::F +end + +""" + BoxCoxTransform{F, B} <: AutoGP.Transforms.Transform + +An AutoGP-compatible transform for positive data using Box-Cox transformation. + +Uses fitted Box-Cox transformation with automatic λ parameter and offset handling. +""" +struct BoxCoxTransform{F <: Real, B} <: AutoGP.Transforms.Transform + offset::F + boxcox::B + λ::F + max_values::F +end + +# Implement AutoGP.Transforms interface for our custom transforms +function AutoGP.Transforms.apply(t::PercentageTransform, x) + return logit.((x .+ t.offset) ./ 100) +end + +function AutoGP.Transforms.unapply(t::PercentageTransform{F}, y) where {F} + return max.(logistic.(y) .* 100 .- t.offset, zero(F)) +end + +function AutoGP.Transforms.apply(t::PositiveTransform, x) + return log.(x .+ t.offset) +end + +function AutoGP.Transforms.unapply(t::PositiveTransform{F}, y) where {F} + return max.(exp.(y) .- t.offset, zero(F)) +end + +function AutoGP.Transforms.apply(t::BoxCoxTransform, x) + return t.boxcox.(x .+ t.offset) +end + +function AutoGP.Transforms.unapply(t::BoxCoxTransform{F}, y) where {F} + inv_func = _inv_boxcox(t.λ, t.offset, t.max_values) + return inv_func.(y) +end + """ _inv_boxcox(λ::Real, offset::F, max_values) where {F} @@ -54,6 +117,112 @@ function _get_offset(values::Vector{F}) where {F <: Real} return minimum(values) == zero(F) ? minimum(values[values .> 0]) / 2 : zero(F) # Half the minimum positive value for stability end +""" + get_autogp_transform(transform_name::String, values::Vector{F}) where {F <: Real} + +Create an AutoGP-compatible transform for the specified transformation type. + +This function creates appropriate data transformations compatible with AutoGP's transform system, +which provides better integration with AutoGP's prediction pipeline and supports transform composition. + +# Arguments +- `transform_name::String`: The name of the transformation to apply. Supported values: + - `"percentage"`: For data bounded between 0 and 100 (e.g., percentages, rates) + - `"positive"`: For strictly positive data (uses log transformation) + - `"boxcox"`: Applies Box-Cox transformation with automatically fitted λ parameter +- `values::Vector{F}`: The input data values used to fit transformation parameters and determine offset + +# Returns +An AutoGP.Transforms.Transform object that can be used with AutoGP's apply/unapply functions +and supports composition with other transforms. + +# Examples +```julia +# Create AutoGP transform for percentage data +values = [10.5, 25.3, 67.8, 89.2] +transform = get_autogp_transform("percentage", values) +transformed = AutoGP.Transforms.apply(transform, values) +recovered = AutoGP.Transforms.unapply(transform, transformed) + +# Compose with other AutoGP transforms +log_transform = AutoGP.Transforms.LogTransform() +combined = [transform, log_transform] +``` + +# See Also +- [`get_transformations`](@ref): Original function-based interface (maintained for compatibility) +- [`PercentageTransform`](@ref), [`PositiveTransform`](@ref), [`BoxCoxTransform`](@ref): Transform implementations +- [`autogp_inverse_transform`](@ref): Extract inverse transform function for use with forecast functions +""" +function get_autogp_transform( + transform_name::String, values::Vector{F}) where {F <: Real} + offset = _get_offset(values) + if transform_name == "percentage" + @info "Using AutoGP percentage transformation with offset = $offset" + return PercentageTransform(offset) + elseif transform_name == "positive" + @info "Using AutoGP positive transformation with offset = $offset" + return PositiveTransform(offset) + elseif transform_name == "boxcox" + max_values = maximum(values) + bc = fit(BoxCoxTransformation, values .+ offset) + λ = bc.λ + @info "Using AutoGP Box-Cox transformation with λ = $λ and offset = $offset" + return BoxCoxTransform(offset, bc, λ, max_values) + else + throw(AssertionError("Unknown transform_name: $transform_name")) + end +end + +""" + autogp_inverse_transform(transform::AutoGP.Transforms.Transform) + +Extract an inverse transformation function from an AutoGP transform for use with forecasting functions. + +This utility function allows AutoGP transforms to be used with the existing forecasting interface +that expects inverse transformation functions. + +# Arguments +- `transform`: An AutoGP.Transforms.Transform object + +# Returns +A function that can be broadcast over forecast arrays to apply the inverse transformation. + +# Examples +```julia +# Create AutoGP transform and use with forecasting +transform = get_autogp_transform("positive", values) +data = TData(dates, values; transformation = x -> AutoGP.Transforms.apply(transform, x)) +model = make_and_fit_model(data) + +# Use with forecast function +inv_func = autogp_inverse_transform(transform) +forecasts = forecast(model, forecast_dates, 100; inv_transformation = inv_func) +``` + +# See Also +- [`forecast`](@ref): Forecast function that accepts inv_transformation parameter +- [`forecast_with_nowcasts`](@ref): Nowcast-aware forecasting with inverse transformation +""" +function autogp_inverse_transform(transform::AutoGP.Transforms.Transform) + return y -> AutoGP.Transforms.unapply(transform, y) +end + +""" + autogp_inverse_transform(transforms::Vector{<:AutoGP.Transforms.Transform}) + +Extract an inverse transformation function from a composed AutoGP transform chain. + +# Arguments +- `transforms`: A vector of AutoGP.Transforms.Transform objects representing a composition + +# Returns +A function that applies the inverse of the composed transforms in reverse order. +""" +function autogp_inverse_transform(transforms::Vector{<:AutoGP.Transforms.Transform}) + return y -> AutoGP.Transforms.unapply(transforms, y) +end + """ get_transformations(transform_name::String, values::Vector{F}) where {F <: Real} @@ -63,6 +232,9 @@ This function creates appropriate data transformations for Gaussian Process mode transform the input data to make it more suitable for modeling (typically more Gaussian-like) and then provide the inverse transformation to convert predictions back to the original scale. +**Note**: This function maintains the original function-based interface for backward compatibility. +For better integration with AutoGP, consider using [`get_autogp_transform`](@ref). + # Arguments - `transform_name::String`: The name of the transformation to apply. Supported values: - `"percentage"`: For data bounded between 0 and 100 (e.g., percentages, rates) @@ -123,6 +295,7 @@ forward, inverse = get_transformations("boxcox", values) - `AssertionError`: Via `_get_offet` if `values` is empty or contains negative values # See Also +- [`get_autogp_transform`](@ref): AutoGP-compatible transform interface (recommended) - [`_get_offset`](@ref): Calculates the offset value for numerical stability - [`_inv_boxcox`](@ref): Handles inverse Box-Cox transformation with edge case handling """ diff --git a/test/Project.toml b/test/Project.toml index 0388f3f..7c523cb 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,10 @@ [deps] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" AutoGP = "6eb593e7-dfb4-4e48-b98e-d7222cdf0053" +BoxCox = "1248164d-f7a6-4bdb-8e8d-8c4a187b3ce6" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +NowcastAutoGP = "7e9f7f4b-f590-4c14-8324-de4fcbed18f7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" diff --git a/test/test_autogp_transforms.jl b/test/test_autogp_transforms.jl new file mode 100644 index 0000000..ef44e4c --- /dev/null +++ b/test/test_autogp_transforms.jl @@ -0,0 +1,212 @@ +@testsnippet AutoGPTransformData begin + using Dates, LogExpFunctions, AutoGP + import BoxCox # Needed for type assertions + # Note: NowcastAutoGP is automatically available in TestItems context + + # Test data for different transform types + percentage_values = [10.5, 25.3, 67.8, 89.2] + positive_values = [1.2, 3.4, 8.9, 15.6] + boxcox_values = [0.1, 0.5, 2.3, 5.7, 12.1] + zero_min_values = [0.0, 1.2, 3.4, 8.9] # Contains zero + + # Tolerance for round-trip error checks + tolerance = 1e-12 +end + +@testitem "get_autogp_transform PercentageTransform" setup=[AutoGPTransformData] begin + transform = get_autogp_transform("percentage", percentage_values) + + @test transform isa PercentageTransform{Float64} + @test transform.offset == 0.0 # No zeros in percentage_values + + # Test AutoGP interface + transformed = AutoGP.Transforms.apply(transform, percentage_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + # Round-trip accuracy + @test maximum(abs.(percentage_values .- recovered)) < tolerance + + # Test with edge values + edge_values = [0.0, 50.0, 100.0] + edge_transformed = AutoGP.Transforms.apply(transform, edge_values) + edge_recovered = AutoGP.Transforms.unapply(transform, edge_transformed) + @test maximum(abs.(edge_values .- edge_recovered)) < tolerance +end + +@testitem "get_autogp_transform PositiveTransform" setup=[AutoGPTransformData] begin + transform = get_autogp_transform("positive", positive_values) + + @test transform isa PositiveTransform{Float64} + @test transform.offset == 0.0 # No zeros in positive_values + + # Test AutoGP interface + transformed = AutoGP.Transforms.apply(transform, positive_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + # Round-trip accuracy + @test maximum(abs.(positive_values .- recovered)) < tolerance +end + +@testitem "get_autogp_transform PositiveTransform with Zero" setup=[AutoGPTransformData] begin + transform = get_autogp_transform("positive", zero_min_values) + + @test transform isa PositiveTransform{Float64} + @test transform.offset > 0.0 # Should have offset for zero values + @test transform.offset == 1.2 / 2 # Half the minimum positive value + + # Test AutoGP interface + transformed = AutoGP.Transforms.apply(transform, zero_min_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + # Round-trip accuracy + @test maximum(abs.(zero_min_values .- recovered)) < tolerance +end + +@testitem "get_autogp_transform BoxCoxTransform" setup=[AutoGPTransformData] begin + transform = get_autogp_transform("boxcox", boxcox_values) + + @test transform isa BoxCoxTransform{Float64, BoxCox.BoxCoxTransformation{Nothing}} + @test transform.offset == 0.0 # No zeros in boxcox_values + @test hasfield(typeof(transform), :λ) + @test hasfield(typeof(transform), :max_values) + + # Test AutoGP interface + transformed = AutoGP.Transforms.apply(transform, boxcox_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + # Round-trip accuracy + @test maximum(abs.(boxcox_values .- recovered)) < tolerance +end + +@testitem "AutoGP Transform Composition" setup=[AutoGPTransformData] begin + # Create our custom transform + pos_transform = get_autogp_transform("positive", positive_values) + + # Create AutoGP transforms + linear_transform = AutoGP.Transforms.LinearTransform(2.0, 1.0) + log_transform = AutoGP.Transforms.LogTransform() + + # Test composition + composed = [linear_transform, pos_transform, log_transform] + + # Apply composed transform + transformed = AutoGP.Transforms.apply(composed, positive_values) + recovered = AutoGP.Transforms.unapply(composed, transformed) + + # Round-trip accuracy + @test maximum(abs.(positive_values .- recovered)) < tolerance +end + +@testitem "AutoGP Transform Single Element" setup=[AutoGPTransformData] begin + # Test with single elements (scalar) + single_value = [5.5] + + for transform_type in ["percentage", "positive", "boxcox"] + if transform_type == "percentage" + test_value = [50.0] # Valid percentage + else + test_value = single_value + end + + transform = get_autogp_transform(transform_type, test_value) + transformed = AutoGP.Transforms.apply(transform, test_value) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + @test maximum(abs.(test_value .- recovered)) < tolerance + end +end + +@testitem "AutoGP Transform Error Conditions" setup=[AutoGPTransformData] begin + # Test unknown transform name + @test_throws AssertionError get_autogp_transform("unknown", positive_values) + + # Test empty values + @test_throws AssertionError get_autogp_transform("positive", Float64[]) + + # Test negative values (should fail in _get_offset) + negative_values = [-1.0, 2.0, 3.0] + @test_throws AssertionError get_autogp_transform("positive", negative_values) +end + +@testitem "AutoGP vs Function-based Transform Consistency" setup=[AutoGPTransformData] begin + # Compare AutoGP transforms with function-based transforms + + for transform_type in ["percentage", "positive", "boxcox"] + local test_data + if transform_type == "percentage" + test_data = percentage_values + else + test_data = positive_values + end + + # Get both versions + autogp_transform = get_autogp_transform(transform_type, test_data) + forward_func, inverse_func = get_transformations(transform_type, test_data) + + # Apply transformations + autogp_transformed = AutoGP.Transforms.apply(autogp_transform, test_data) + func_transformed = forward_func.(test_data) + + # Should produce same results + @test maximum(abs.(autogp_transformed .- func_transformed)) < tolerance + + # Test inverse + autogp_recovered = AutoGP.Transforms.unapply(autogp_transform, autogp_transformed) + func_recovered = inverse_func.(func_transformed) + + @test maximum(abs.(autogp_recovered .- func_recovered)) < tolerance + end +end + +@testitem "AutoGP Transform Type Promotion" setup=[AutoGPTransformData] begin + # Test with different numeric types + int_values = [1, 2, 5, 10] + + transform = get_autogp_transform("positive", int_values) + @test transform isa PositiveTransform{Int64} + + # Should still work with AutoGP interface + transformed = AutoGP.Transforms.apply(transform, int_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + @test maximum(abs.(int_values .- recovered)) < tolerance +end + +@testitem "autogp_inverse_transform Utility Function" setup=[AutoGPTransformData] begin + # Test single transform + transform = get_autogp_transform("positive", positive_values) + inv_func = autogp_inverse_transform(transform) + + # Test the inverse function + transformed = AutoGP.Transforms.apply(transform, positive_values) + recovered_direct = AutoGP.Transforms.unapply(transform, transformed) + recovered_func = inv_func.(transformed) + + @test maximum(abs.(recovered_direct .- recovered_func)) < tolerance + @test maximum(abs.(positive_values .- recovered_func)) < tolerance + + # Test composed transforms + linear_transform = AutoGP.Transforms.LinearTransform(2.0, 1.0) + composed = [linear_transform, transform] + inv_func_composed = autogp_inverse_transform(composed) + + composed_transformed = AutoGP.Transforms.apply(composed, positive_values) + recovered_composed = inv_func_composed.(composed_transformed) + + @test maximum(abs.(positive_values .- recovered_composed)) < tolerance +end + +@testitem "AutoGP Transform with Large Values" setup=[AutoGPTransformData] begin + # Test with large values to check numerical stability + large_values = [1e6, 1e7, 1e8] + + for transform_type in ["positive", "boxcox"] + transform = get_autogp_transform(transform_type, large_values) + transformed = AutoGP.Transforms.apply(transform, large_values) + recovered = AutoGP.Transforms.unapply(transform, transformed) + + # Use relative tolerance for large values + relative_error = maximum(abs.((large_values .- recovered) ./ large_values)) + @test relative_error < 1e-10 + end +end \ No newline at end of file