including more regularization options: handling nonsquare case and including lagrangian hessian regularization for games

dfridovi · dfridovi · commit b4938ac5f609 · 2025-08-14T16:51:15.000-05:00
diff --git a/examples/utils.jl b/examples/utils.jl
@@ -59,6 +59,7 @@ function build_parametric_game(;
         K_symbolic,
         z_symbolic,
         θ_symbolic,
+        η_symbolic,
         lower_bounds,
         upper_bounds,
         dims,
@@ -72,7 +73,8 @@ function build_parametric_game(;
         z_symbolic,
         θ_symbolic,
         lower_bounds,
-        upper_bounds,
+        upper_bounds;
+        η_symbolic,
     )
     MixedComplementarityProblems.ParametricGame(
         problems,
diff --git a/src/game.jl b/src/game.jl
@@ -30,7 +30,7 @@ function ParametricGame(;
     shared_equality = nothing,
     shared_inequality = nothing,
 )
-    (; K_symbolic, z_symbolic, θ_symbolic, lower_bounds, upper_bounds, dims) =
+    (; K_symbolic, z_symbolic, θ_symbolic, η_symbolic, lower_bounds, upper_bounds, dims) =
         game_to_mcp(;
             test_point,
             test_parameter,
@@ -39,7 +39,15 @@ function ParametricGame(;
             shared_inequality,
         )
 
-    mcp = PrimalDualMCP(K_symbolic, z_symbolic, θ_symbolic, lower_bounds, upper_bounds)
+    mcp = PrimalDualMCP(
+        K_symbolic,
+        z_symbolic,
+        θ_symbolic,
+        lower_bounds,
+        upper_bounds;
+        η_symbolic,
+    )
+
     ParametricGame(problems, shared_equality, shared_inequality, dims, mcp)
 end
 
@@ -62,7 +70,7 @@ function game_to_mcp(;
         shared_inequality,
     )
 
-    # Define primal and dual variables for the game, and game parameters..
+    # Define primal and dual variables for the game, and game parameters.
     # Note that BlockArrays can handle blocks of zero size.
     backend = SymbolicTracingUtils.SymbolicsBackend()
     x =
@@ -80,6 +88,10 @@ function game_to_mcp(;
         SymbolicTracingUtils.make_variables(backend, :θ, sum(dims.θ)) |>
         to_blockvector(dims.θ)
 
+    # Parameter for adding a scaled identity to the Hessian of each player's
+    # Lagrangian wrt that player's variable.
+    η = only(SymbolicTracingUtils.make_variables(backend, :η, 1))
+
     # Build symbolic expressions for objectives and constraints.
     fs = map(problems, blocks(θ)) do p, θi
         p.objective(x, θi)
@@ -94,12 +106,12 @@ function game_to_mcp(;
     g̃ = isnothing(shared_equality) ? nothing : shared_equality(x, θ)
     h̃ = isnothing(shared_inequality) ? nothing : shared_inequality(x, θ)
 
-    # Build gradient of each player's Lagrangian.
+    # Build gradient of each player's Lagrangian and include regularization.
     ∇Ls = map(fs, gs, hs, blocks(x), blocks(λ), blocks(μ)) do f, g, h, xi, λi, μi
         L =
             f - (isnothing(g) ? 0 : sum(λi .* g)) - (isnothing(h) ? 0 : sum(μi .* h)) - (isnothing(g̃) ? 0 : sum(λ̃ .* g̃)) -
             (isnothing(h̃) ? 0 : sum(μ̃ .* h̃))
-        SymbolicTracingUtils.gradient(L, xi)
+        SymbolicTracingUtils.gradient(L, xi) + η * xi
     end
 
     # Build MCP representation.
@@ -150,6 +162,7 @@ function game_to_mcp(;
         K_symbolic = collect(K),
         z_symbolic = collect(z),
         θ_symbolic = collect(θ),
+        η_symbolic = η,
         lower_bounds,
         upper_bounds,
         dims,
@@ -183,13 +196,9 @@ function dimensions(
 end
 
 "Solve a parametric game."
-function solve(
-    game::ParametricGame,
-    θ;
-    solver_type = InteriorPoint(),
-    kwargs...
-)
-    (; x, y, s, kkt_error, status) = solve(solver_type, game.mcp, θ; kwargs...)
+function solve(game::ParametricGame, θ; solver_type = InteriorPoint(), kwargs...)
+    (; x, y, s, kkt_error, status) =
+        solve(solver_type, game.mcp, θ; regularize_linear_solve = :internal, kwargs...)
 
     # Unpack primals per-player for ease of access later.
     end_dims = cumsum(game.dims.x)
diff --git a/src/mcp.jl b/src/mcp.jl
@@ -7,15 +7,16 @@ The primal-dual system arises when we introduce slack variable `s` and set
                              G(x, y; θ)     = 0
                              H(x, y; θ) - s = 0
                              s ⦿ y - ϵ      = 0
-for some ϵ > 0. Define the function `F(x, y, s; θ, ϵ)` to return the left
-hand side of this system of equations.
+for some ϵ > 0. Define the function `F(x, y, s; θ, ϵ, [η])` to return the left
+hand side of this system of equations. Here, `η` is an optional nonnegative
+regularization parameter defined by "internally-regularized" problems.
 """
 struct PrimalDualMCP{T1,T2,T3}
-    "A callable `F!(result, x, y, s; θ, ϵ)` which computes the KKT error in-place."
+    "A callable `F!(result, x, y, s; θ, ϵ, [η])` to the KKT error in-place."
     F!::T1
-    "A callable `∇F_z!(result, x, y, s; θ, ϵ)` to compute ∇F wrt z in-place."
+    "A callable `∇F_z!(result, x, y, s; θ, ϵ, [η])` to compute ∇F wrt z in-place."
     ∇F_z!::T2
-    "A callable `∇F_θ!(result, x, y, s; θ, ϵ)` to compute ∇F wrt θ in-place."
+    "A callable `∇F_θ!(result, x, y, s; θ, ϵ, [η])` to compute ∇F wrt θ in-place."
     ∇F_θ!::T3
     "Dimension of unconstrained variable."
     unconstrained_dimension::Int
@@ -57,7 +58,8 @@ function PrimalDualMCP(
     H_symbolic::Vector{T},
     x_symbolic::Vector{T},
     y_symbolic::Vector{T},
-    θ_symbolic::Vector{T};
+    θ_symbolic::Vector{T},
+    η_symbolic::Union{Nothing,T} = nothing;
     compute_sensitivities = false,
     backend_options = (;),
 ) where {T<:Union{SymbolicTracingUtils.FD.Node,SymbolicTracingUtils.Symbolics.Num}}
@@ -79,7 +81,7 @@ function PrimalDualMCP(
         s_symbolic .* y_symbolic .- ϵ_symbolic
     ]
 
-    F! = let
+    F! = if isnothing(η_symbolic)
         _F! = SymbolicTracingUtils.build_function(
             F_symbolic,
             x_symbolic,
@@ -92,37 +94,28 @@ function PrimalDualMCP(
         )
 
         (result, x, y, s; θ, ϵ) -> _F!(result, x, y, s, θ, ϵ)
-    end
-
-    ∇F_z! = let
-        ∇F_symbolic = SymbolicTracingUtils.sparse_jacobian(F_symbolic, z_symbolic)
-        _∇F! = SymbolicTracingUtils.build_function(
-            ∇F_symbolic,
+    else
+        _F! = SymbolicTracingUtils.build_function(
+            F_symbolic,
             x_symbolic,
             y_symbolic,
             s_symbolic,
             θ_symbolic,
-            ϵ_symbolic;
+            ϵ_symbolic,
+            η_symbolic;
             in_place = true,
             backend_options,
         )
 
-        rows, cols, _ = SparseArrays.findnz(∇F_symbolic)
-        constant_entries =
-            SymbolicTracingUtils.get_constant_entries(∇F_symbolic, z_symbolic)
-        SymbolicTracingUtils.SparseFunction(
-            (result, x, y, s; θ, ϵ) -> _∇F!(result, x, y, s, θ, ϵ),
-            rows,
-            cols,
-            size(∇F_symbolic),
-            constant_entries,
-        )
+        (result, x, y, s; θ, ϵ, η = 0.0) -> _F!(result, x, y, s, θ, ϵ, η)
     end
 
-    ∇F_θ! =
-        !compute_sensitivities ? nothing :
-        let
-            ∇F_symbolic = SymbolicTracingUtils.sparse_jacobian(F_symbolic, θ_symbolic)
+    function process_∇F(F, var)
+        ∇F_symbolic = SymbolicTracingUtils.sparse_jacobian(F, var)
+        rows, cols, _ = SparseArrays.findnz(∇F_symbolic)
+        constant_entries = SymbolicTracingUtils.get_constant_entries(∇F_symbolic, var)
+
+        if isnothing(η_symbolic)
             _∇F! = SymbolicTracingUtils.build_function(
                 ∇F_symbolic,
                 x_symbolic,
@@ -134,17 +127,38 @@ function PrimalDualMCP(
                 backend_options,
             )
 
-            rows, cols, _ = SparseArrays.findnz(∇F_symbolic)
-            constant_entries =
-                SymbolicTracingUtils.get_constant_entries(∇F_symbolic, θ_symbolic)
-            SymbolicTracingUtils.SparseFunction(
+            return SymbolicTracingUtils.SparseFunction(
                 (result, x, y, s; θ, ϵ) -> _∇F!(result, x, y, s, θ, ϵ),
                 rows,
                 cols,
                 size(∇F_symbolic),
                 constant_entries,
             )
+        else
+            _∇F! = SymbolicTracingUtils.build_function(
+                ∇F_symbolic,
+                x_symbolic,
+                y_symbolic,
+                s_symbolic,
+                θ_symbolic,
+                ϵ_symbolic,
+                η_symbolic;
+                in_place = true,
+                backend_options,
+            )
+
+            return SymbolicTracingUtils.SparseFunction(
+                (result, x, y, s; θ, ϵ, η = 0.0) -> _∇F!(result, x, y, s, θ, ϵ, η),
+                rows,
+                cols,
+                size(∇F_symbolic),
+                constant_entries,
+            )
         end
+    end
+
+    ∇F_z! = process_∇F(F_symbolic, z_symbolic)
+    ∇F_θ! = !compute_sensitivities ? nothing : process_∇F(F_symbolic, θ_symbolic)
 
     PrimalDualMCP(F!, ∇F_z!, ∇F_θ!, length(x_symbolic), length(y_symbolic))
 end
@@ -157,6 +171,7 @@ function PrimalDualMCP(
     lower_bounds::Vector,
     upper_bounds::Vector;
     parameter_dimension,
+    internally_regularized = false,
     compute_sensitivities = false,
     backend = SymbolicTracingUtils.SymbolicsBackend(),
     backend_options = (;),
@@ -165,6 +180,21 @@ function PrimalDualMCP(
     θ_symbolic = SymbolicTracingUtils.make_variables(backend, :θ, parameter_dimension)
     K_symbolic = K(z_symbolic; θ = θ_symbolic)
 
+    if internally_regularized
+        η_symbolic = only(SymbolicTracingUtils.make_variables(backend, :η, 1))
+
+        return PrimalDualMCP(
+            K_symbolic,
+            z_symbolic,
+            θ_symbolic,
+            lower_bounds,
+            upper_bounds;
+            η_symbolic,
+            compute_sensitivities,
+            backend_options,
+        )
+    end
+
     PrimalDualMCP(
         K_symbolic,
         z_symbolic,
@@ -185,6 +215,7 @@ function PrimalDualMCP(
     θ_symbolic::Vector{T},
     lower_bounds::Vector,
     upper_bounds::Vector;
+    η_symbolic::Union{Nothing,T} = nothing,
     compute_sensitivities = false,
     backend_options = (;),
 ) where {T<:Union{SymbolicTracingUtils.FD.Node,SymbolicTracingUtils.Symbolics.Num}}
@@ -203,7 +234,8 @@ function PrimalDualMCP(
         H_symbolic,
         x_symbolic,
         y_symbolic,
-        θ_symbolic;
+        θ_symbolic,
+        η_symbolic;
         compute_sensitivities,
         backend_options,
     )
diff --git a/src/solver.jl b/src/solver.jl
@@ -27,11 +27,12 @@ Keyword arguments:
     - `tol::Real = 1e-4`: the tolerance for the KKT error.
     - `max_inner_iters::Int = 20`: the maximum number of inner iterations.
     - `max_outer_iters::Int = 50`: the maximum number of outer iterations.
-    - `tightening_rate::Real = 0.1`: the rate at which to tighten the tolerance.
-    - `loosening_rate::Real = 0.5`: the rate at which to loosen the tolerance.
+    - `tightening_rate::Real = 0.1`: rate for tightening tolerance and regularization.
+    - `loosening_rate::Real = 0.5`: rate for loosening tolerance and regularization.
     - `min_stepsize::Real = 1e-2`: the minimum step size for the linesearch.
     - `verbose::Bool = false`: whether to print debug information.
     - `linear_solve_algorithm::LinearSolve.SciMLLinearSolveAlgorithm`: the linear solve algorithm to use. Any solver from `LinearSolve.jl` can be used.
+    - `regularize_linear_solve::Symbol = :none`: scheme for regularizing the linear system matrix ∇F. Options are {:none, :identity, :internal}.
 """
 function solve(
     ::InteriorPoint,
@@ -49,6 +50,7 @@ function solve(
     min_stepsize = 1e-4,
     verbose = false,
     linear_solve_algorithm = UMFPACKFactorization(),
+    regularize_linear_solve = :identity,
 )
     # Set up common memory.
     ∇F = mcp.∇F_z!.result_buffer
@@ -61,11 +63,12 @@ function solve(
 
     linsolve = init(LinearProblem(∇F, δz), linear_solve_algorithm)
 
-    # Main solver loop.
+    # Initialize primal, dual, and slack variables.
     x = @something(x₀, zeros(mcp.unconstrained_dimension))
     y = @something(y₀, ones(mcp.constrained_dimension))
     s = @something(s₀, ones(mcp.constrained_dimension))
 
+    # Initialize IP relaxation parameter.
     if ϵ₀ === :auto
         is_warmstarted = !isnothing(x₀) && !isnothing(y₀) && !isnothing(s₀)
         if is_warmstarted
@@ -77,6 +80,10 @@ function solve(
         ϵ = ϵ₀
     end
 
+    # Initialize regularization parameter.
+    η = tol
+
+    # Main solver loop.
     status = :solved
     total_iters = 0
     inner_iters = 1
@@ -88,14 +95,30 @@ function solve(
 
         while kkt_error > ϵ && inner_iters < max_inner_iters
             total_iters += 1
-            # Compute the Newton step.
-            # TODO: Can add some adaptive regularization.
+
+            # Compute the (regularized) Newton step.
             # TODO: use a linear operator with a lazy gradient computation here.
-            mcp.F!(F, x, y, s; θ, ϵ)
-            mcp.∇F_z!(∇F, x, y, s; θ, ϵ)
-            linsolve.A = ∇F + tol * I
+            if regularize_linear_solve === :internal
+                mcp.F!(F, x, y, s; θ, ϵ, η = 0.0)
+                mcp.∇F_z!(∇F, x, y, s; θ, ϵ, η)
+            else
+                mcp.F!(F, x, y, s; θ, ϵ)
+                mcp.∇F_z!(∇F, x, y, s; θ, ϵ)
+            end
+
+            if regularize_linear_solve === :identity
+                if size(∇F, 1) == size(∇F, 2)
+                    linsolve.A = ∇F + η * I
+                else
+                    @warn "Cannot use identity regularization on a nonsquare problem."
+                end
+            else
+                linsolve.A = ∇F
+            end
+
             linsolve.b = -F
             solution = solve!(linsolve)
+
             if !SciMLBase.successful_retcode(solution) &&
                (solution.retcode !== SciMLBase.ReturnCode.Default)
                 verbose &&
@@ -129,10 +152,12 @@ function solve(
             break
         end
 
-        ϵ *= if status === :solved
-            1 - exp(-tightening_rate * inner_iters)
+        if status === :solved
+            ϵ *= 1 - exp(-tightening_rate * inner_iters)
+            η *= 1 - exp(-tightening_rate * inner_iters)
         else
-            1 + exp(-loosening_rate * inner_iters)
+            ϵ *= 1 + exp(-loosening_rate * inner_iters)
+            η *= 1 + exp(-loosening_rate * inner_iters)
         end
         ϵ = min(ϵ, one(ϵ))
         outer_iters += 1