From ec71ad74c3315783c9317ac6022f3a38d7a7b1ab Mon Sep 17 00:00:00 2001 From: fpacaud Date: Fri, 14 Jun 2024 19:29:16 +0200 Subject: [PATCH 1/2] [KKT] Add K2.5 formulation for augmented KKT system --- src/IPM/factorization.jl | 42 +++++ src/IPM/kernels.jl | 55 ++++++- src/KKT/KKTsystem.jl | 1 + src/KKT/Sparse/scaled_augmented.jl | 242 +++++++++++++++++++++++++++++ test/kkt_test.jl | 1 + test/madnlp_test.jl | 8 + 6 files changed, 347 insertions(+), 2 deletions(-) create mode 100644 src/KKT/Sparse/scaled_augmented.jl diff --git a/src/IPM/factorization.jl b/src/IPM/factorization.jl index e5c306d7..0552772e 100644 --- a/src/IPM/factorization.jl +++ b/src/IPM/factorization.jl @@ -41,6 +41,34 @@ function solve!(kkt::AbstractReducedKKTSystem, w::AbstractKKTVector) return w end +function solve!(kkt::ScaledSparseKKTSystem, w::AbstractKKTVector) + r3 = kkt.buffer1 + r4 = kkt.buffer2 + fill!(r3, 0.0) + fill!(r4, 0.0) + + wzl = dual_lb(w) # size nlb + wzu = dual_ub(w) # size nub + + r3[kkt.ind_lb] .= wzl + r3[kkt.ind_ub] .*= sqrt.(kkt.u_diag) + r3[kkt.ind_lb] ./= sqrt.(kkt.l_diag) + r4[kkt.ind_ub] .= wzu + r4[kkt.ind_lb] .*= sqrt.(kkt.l_diag) + r4[kkt.ind_ub] ./= sqrt.(kkt.u_diag) + # Build RHS + w.xp .*= kkt.scaling_factor + w.xp .+= (r3 .+ r4) + # Backsolve + solve!(kkt.linear_solver, primal_dual(w)) + # Unpack solution + w.xp .*= kkt.scaling_factor + + wzl .= (wzl .- kkt.l_lower .* w.xp_lr) ./ kkt.l_diag + wzu .= (.-wzu .+ kkt.u_lower .* w.xp_ur) ./ kkt.u_diag + return w +end + function solve!( kkt::SparseKKTSystem{T, VT, MT, QN}, w::AbstractKKTVector @@ -167,6 +195,20 @@ function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},Sp return w end +function mul!(w::AbstractKKTVector{T}, kkt::ScaledSparseKKTSystem{T,VT,MT,QN}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:ExactHessian} + mul!(primal(w), Symmetric(kkt.hess_com, :L), primal(x), alpha, beta) + mul!(primal(w), kkt.jac_com', dual(x), alpha, one(T)) + mul!(dual(w), kkt.jac_com, primal(x), alpha, beta) + # Custom reduction + primal(w) .+= alpha .* kkt.reg .* primal(x) + dual(w) .+= alpha .* kkt.du_diag .* dual(x) + w.xp_lr .-= alpha .* dual_lb(x) + w.xp_ur .+= alpha .* dual_ub(x) + dual_lb(w) .= beta .* dual_lb(w) .+ alpha .* (x.xp_lr .* kkt.l_lower .+ dual_lb(x) .* kkt.l_diag) + dual_ub(w) .= beta .* dual_ub(w) .+ alpha .* (x.xp_ur .* kkt.u_lower .- dual_ub(x) .* kkt.u_diag) + return w +end + function mul!(w::AbstractKKTVector{T}, kkt::Union{SparseKKTSystem{T,VT,MT,QN},SparseUnreducedKKTSystem{T,VT,MT,QN}}, x::AbstractKKTVector, alpha = one(T), beta = zero(T)) where {T, VT, MT, QN<:CompactLBFGS} qn = kkt.quasi_newton n, p = size(qn) diff --git a/src/IPM/kernels.jl b/src/IPM/kernels.jl index a89c82b2..af983d55 100644 --- a/src/IPM/kernels.jl +++ b/src/IPM/kernels.jl @@ -10,8 +10,8 @@ function set_aug_diagonal!(kkt::AbstractKKTSystem{T}, solver::MadNLPSolver{T}) w fill!(kkt.reg, zero(T)) fill!(kkt.du_diag, zero(T)) - kkt.l_diag .= solver.xl_r .- solver.x_lr - kkt.u_diag .= solver.x_ur .- solver.xu_r + kkt.l_diag .= solver.xl_r .- solver.x_lr # (Xˡ - X) + kkt.u_diag .= solver.x_ur .- solver.xu_r # (X - Xᵘ) copyto!(kkt.l_lower, solver.zl_r) copyto!(kkt.u_lower, solver.zu_r) @@ -33,6 +33,41 @@ function _set_aug_diagonal!(kkt::AbstractUnreducedKKTSystem) return end +function set_aug_diagonal!(kkt::ScaledSparseKKTSystem{T}, solver::MadNLPSolver{T}) where T + fill!(kkt.reg, zero(T)) + fill!(kkt.du_diag, zero(T)) + # Ensure l_diag and u_diag have only non negative entries + kkt.l_diag .= solver.x_lr .- solver.xl_r # (X - Xˡ) + kkt.u_diag .= solver.xu_r .- solver.x_ur # (Xᵘ - X) + copyto!(kkt.l_lower, solver.zl_r) + copyto!(kkt.u_lower, solver.zu_r) + _set_aug_diagonal!(kkt) +end + +function _set_aug_diagonal!(kkt::ScaledSparseKKTSystem{T}) where T + xlzu = kkt.buffer1 + xuzl = kkt.buffer2 + fill!(xlzu, zero(T)) + fill!(xuzl, zero(T)) + + xlzu[kkt.ind_ub] .= kkt.u_lower # zᵘ + xlzu[kkt.ind_lb] .*= kkt.l_diag # (X - Xˡ) zᵘ + + xuzl[kkt.ind_lb] .= kkt.l_lower # zˡ + xuzl[kkt.ind_ub] .*= kkt.u_diag # (Xᵘ - X) zˡ + + kkt.pr_diag .= xlzu .+ xuzl + + fill!(kkt.scaling_factor, one(T)) + kkt.scaling_factor[kkt.ind_lb] .*= sqrt.(kkt.l_diag) + kkt.scaling_factor[kkt.ind_ub] .*= sqrt.(kkt.u_diag) + + # Scale regularization by scaling factor. + kkt.pr_diag .+= kkt.reg .* kkt.scaling_factor.^2 + return +end + + # Robust restoration function set_aug_RR!(kkt::AbstractKKTSystem, solver::MadNLPSolver, RR::RobustRestorer) x = full(solver.x) @@ -48,7 +83,23 @@ function set_aug_RR!(kkt::AbstractKKTSystem, solver::MadNLPSolver, RR::RobustRes kkt.u_diag .= solver.x_ur .- solver.xu_r _set_aug_diagonal!(kkt) + return +end +function set_aug_RR!(kkt::ScaledSparseKKTSystem, solver::MadNLPSolver, RR::RobustRestorer) + x = full(solver.x) + xl = full(solver.xl) + xu = full(solver.xu) + zl = full(solver.zl) + zu = full(solver.zu) + kkt.reg .= RR.zeta .* RR.D_R .^ 2 + kkt.du_diag .= .- RR.pp ./ RR.zp .- RR.nn ./ RR.zn + copyto!(kkt.l_lower, solver.zl_r) + copyto!(kkt.u_lower, solver.zu_r) + kkt.l_diag .= solver.x_lr .- solver.xl_r + kkt.u_diag .= solver.xu_r .- solver.x_ur + + _set_aug_diagonal!(kkt) return end diff --git a/src/KKT/KKTsystem.jl b/src/KKT/KKTsystem.jl index e30ffeae..9370af57 100644 --- a/src/KKT/KKTsystem.jl +++ b/src/KKT/KKTsystem.jl @@ -243,5 +243,6 @@ include("Dense/utils.jl") include("Sparse/unreduced.jl") include("Sparse/augmented.jl") include("Sparse/condensed.jl") +include("Sparse/scaled_augmented.jl") include("Sparse/utils.jl") diff --git a/src/KKT/Sparse/scaled_augmented.jl b/src/KKT/Sparse/scaled_augmented.jl new file mode 100644 index 00000000..1748485c --- /dev/null +++ b/src/KKT/Sparse/scaled_augmented.jl @@ -0,0 +1,242 @@ +""" + ScaledSparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN} + +Scaled version of the [`AbstractReducedKKTSystem`](@ref) (using the K2.5 formulation introduced in [1]). + +The K2.5 formulation of the augmented KKT system has a better conditioning +than the original (K2) formulation. It is recommend switching to a `ScaledSparseKKTSystem` +if you encounter numerical difficulties in MadNLP. + +At a primal-dual iterate ``(x, s, y, z)``, the matrix writes +``` +[√Ξₓ Wₓₓ √Ξₓ + Δₓ 0 √Ξₓ Aₑ' √Ξₓ Aᵢ'] [√Ξₓ⁻¹ Δx] +[0 Δₛ 0 -√Ξₛ ] [√Ξₛ⁻¹ Δs] +[Aₑ√Ξₓ 0 0 0 ] [Δy ] +[Aᵢ√Ξₓ -√Ξₛ 0 0 ] [Δz ] +``` +with +* ``Wₓₓ``: Hessian of the Lagrangian. +* ``Aₑ``: Jacobian of the equality constraints +* ``Aᵢ``: Jacobian of the inequality constraints +* ``Δₓ = Xᵤ Zₗˣ + Xₗ Zᵤˣ`` +* ``Δₛ = Sᵤ Zₗˢ + Sₗ Zᵤˢ`` +* ``Ξₓ = Xₗ Xᵤ`` +* ``Ξₛ = Sₗ Sᵤ`` + +# References +[1] Saunders, M. A. (2019). CME 338 Class Notes 9: PDCO: Primal-Dual Interior Methods. url: +http://stanford.edu/class/cme338/notes.html + +""" +struct ScaledSparseKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractReducedKKTSystem{T, VT, MT, QN} + hess::VT + jac_callback::VT + jac::VT + quasi_newton::QN + reg::VT + pr_diag::VT + du_diag::VT + l_diag::VT + u_diag::VT + l_lower::VT + u_lower::VT + scaling_factor::VT + buffer1::VT + buffer2::VT + # Augmented system + aug_raw::SparseMatrixCOO{T,Int32,VT, VI32} + aug_com::MT + aug_csc_map::Union{Nothing, VI} + scaled_aug_raw::SparseMatrixCOO{T,Int32,VT, VI32} + # Hessian + hess_raw::SparseMatrixCOO{T,Int32,VT, VI32} + hess_com::MT + hess_csc_map::Union{Nothing, VI} + # Jacobian + jac_raw::SparseMatrixCOO{T,Int32,VT, VI32} + jac_com::MT + jac_csc_map::Union{Nothing, VI} + # LinearSolver + linear_solver::LS + # Info + ind_ineq::VI + ind_lb::VI + ind_ub::VI +end + +# Build KKT system directly from SparseCallback +function create_kkt_system( + ::Type{ScaledSparseKKTSystem}, + cb::SparseCallback{T,VT}, + ind_cons, + linear_solver::Type; + opt_linear_solver=default_options(linear_solver), + hessian_approximation=ExactHessian, +) where {T,VT} + + n_slack = length(ind_cons.ind_ineq) + # Deduce KKT size. + + n = cb.nvar + m = cb.ncon + # Evaluate sparsity pattern + jac_sparsity_I = create_array(cb, Int32, cb.nnzj) + jac_sparsity_J = create_array(cb, Int32, cb.nnzj) + _jac_sparsity_wrapper!(cb,jac_sparsity_I, jac_sparsity_J) + + quasi_newton = create_quasi_newton(hessian_approximation, cb, n) + hess_sparsity_I, hess_sparsity_J = build_hessian_structure(cb, hessian_approximation) + + nlb = length(ind_cons.ind_lb) + nub = length(ind_cons.ind_ub) + + force_lower_triangular!(hess_sparsity_I,hess_sparsity_J) + + ind_ineq = ind_cons.ind_ineq + + n_slack = length(ind_ineq) + n_jac = length(jac_sparsity_I) + n_hess = length(hess_sparsity_I) + n_tot = n + n_slack + + + aug_vec_length = n_tot+m + aug_mat_length = n_tot+m+n_hess+n_jac+n_slack + + I = create_array(cb, Int32, aug_mat_length) + J = create_array(cb, Int32, aug_mat_length) + V = VT(undef, aug_mat_length) + fill!(V, 0.0) # Need to initiate V to avoid NaN + + offset = n_tot+n_jac+n_slack+n_hess+m + + I[1:n_tot] .= 1:n_tot + I[n_tot+1:n_tot+n_hess] = hess_sparsity_I + I[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= (jac_sparsity_I.+n_tot) + I[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= ind_ineq .+ n_tot + I[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m) + + J[1:n_tot] .= 1:n_tot + J[n_tot+1:n_tot+n_hess] = hess_sparsity_J + J[n_tot+n_hess+1:n_tot+n_hess+n_jac] .= jac_sparsity_J + J[n_tot+n_hess+n_jac+1:n_tot+n_hess+n_jac+n_slack] .= (n+1:n+n_slack) + J[n_tot+n_hess+n_jac+n_slack+1:offset] .= (n_tot+1:n_tot+m) + + pr_diag = _madnlp_unsafe_wrap(V, n_tot) + du_diag = _madnlp_unsafe_wrap(V, m, n_jac+n_slack+n_hess+n_tot+1) + + reg = VT(undef, n_tot) + l_diag = VT(undef, nlb) + u_diag = VT(undef, nub) + l_lower = VT(undef, nlb) + u_lower = VT(undef, nub) + + scaling_factor = VT(undef, n_tot) + buffer1 = VT(undef, n_tot) + buffer2 = VT(undef, n_tot) + + hess = _madnlp_unsafe_wrap(V, n_hess, n_tot+1) + jac = _madnlp_unsafe_wrap(V, n_jac+n_slack, n_hess+n_tot+1) + jac_callback = _madnlp_unsafe_wrap(V, n_jac, n_hess+n_tot+1) + + aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,V) + jac_raw = SparseMatrixCOO( + m, n_tot, + Int32[jac_sparsity_I; ind_ineq], + Int32[jac_sparsity_J; n+1:n+n_slack], + jac, + ) + hess_raw = SparseMatrixCOO( + n_tot, n_tot, + hess_sparsity_I, + hess_sparsity_J, + hess, + ) + scaled_aug_raw = SparseMatrixCOO(aug_vec_length,aug_vec_length,I,J,copy(V)) + + aug_com, aug_csc_map = coo_to_csc(aug_raw) + jac_com, jac_csc_map = coo_to_csc(jac_raw) + hess_com, hess_csc_map = coo_to_csc(hess_raw) + + _linear_solver = linear_solver( + aug_com; opt = opt_linear_solver + ) + + return ScaledSparseKKTSystem( + hess, jac_callback, jac, quasi_newton, reg, pr_diag, du_diag, + l_diag, u_diag, l_lower, u_lower, + scaling_factor, buffer1, buffer2, + aug_raw, aug_com, aug_csc_map, scaled_aug_raw, + hess_raw, hess_com, hess_csc_map, + jac_raw, jac_com, jac_csc_map, + _linear_solver, + ind_ineq, ind_cons.ind_lb, ind_cons.ind_ub, + ) +end + +num_variables(kkt::ScaledSparseKKTSystem) = length(kkt.pr_diag) +get_jacobian(kkt::ScaledSparseKKTSystem) = kkt.jac_callback + +function initialize!(kkt::ScaledSparseKKTSystem{T}) where T + fill!(kkt.reg, one(T)) + fill!(kkt.pr_diag, one(T)) + fill!(kkt.du_diag, zero(T)) + fill!(kkt.hess, zero(T)) + fill!(kkt.l_lower, zero(T)) + fill!(kkt.u_lower, zero(T)) + fill!(kkt.l_diag, one(T)) + fill!(kkt.u_diag, one(T)) + fill!(kkt.scaling_factor, one(T)) + fill!(nonzeros(kkt.hess_com), zero(T)) # so that mul! in the initial primal-dual solve has no effect +end + +function jtprod!(y::AbstractVector, kkt::ScaledSparseKKTSystem, x::AbstractVector) + mul!(y, kkt.jac_com', x) +end + +function compress_jacobian!(kkt::ScaledSparseKKTSystem) + ns = length(kkt.ind_ineq) + kkt.jac[end-ns+1:end] .= -1.0 + transfer!(kkt.jac_com, kkt.jac_raw, kkt.jac_csc_map) +end + +function compress_hessian!(kkt::ScaledSparseKKTSystem) + transfer!(kkt.hess_com, kkt.hess_raw, kkt.hess_csc_map) +end + +# N.B. Matrices are assumed to have an augmented KKT structure and be lower-triangular. +function _build_scale_augmented_system_coo!(dest, src, scaling, n, m) + for (k, i, j) in zip(1:nnz(src), src.I, src.J) + # Primal regularization pr_diag + if k <= n + dest.V[k] = src.V[k] + # Hessian block + elseif i <= n && j <= n + dest.V[k] = src.V[k] * scaling[i] * scaling[j] + # Jacobian block + elseif n + 1 <= i <= n + m && j <= n + dest.V[k] = src.V[k] * scaling[j] + # Dual regularization du_diag + elseif n + 1 <= i <= n + m && n + 1 <= j <= n + m + dest.V[k] = src.V[k] + end + end +end + +function build_kkt!(kkt::ScaledSparseKKTSystem) + m, n = size(kkt.jac_raw) + _build_scale_augmented_system_coo!( + kkt.scaled_aug_raw, + kkt.aug_raw, + kkt.scaling_factor, + n, m, + ) + transfer!(kkt.aug_com, kkt.scaled_aug_raw, kkt.aug_csc_map) +end + +function regularize_diagonal!(kkt::ScaledSparseKKTSystem, primal, dual) + kkt.reg .+= primal + kkt.pr_diag .+= primal .* kkt.scaling_factor.^2 + kkt.du_diag .= .-dual +end + diff --git a/test/kkt_test.jl b/test/kkt_test.jl index 8502b280..4686d3cf 100644 --- a/test/kkt_test.jl +++ b/test/kkt_test.jl @@ -29,6 +29,7 @@ end (MadNLP.SparseKKTSystem, MadNLP.SparseCallback), (MadNLP.SparseUnreducedKKTSystem, MadNLP.SparseCallback), (MadNLP.SparseCondensedKKTSystem, MadNLP.SparseCallback), + (MadNLP.ScaledSparseKKTSystem, MadNLP.SparseCallback), (MadNLP.DenseKKTSystem, MadNLP.DenseCallback), (MadNLP.DenseCondensedKKTSystem, MadNLP.DenseCallback), ] diff --git a/test/madnlp_test.jl b/test/madnlp_test.jl index a9863dbc..40a88432 100644 --- a/test/madnlp_test.jl +++ b/test/madnlp_test.jl @@ -20,6 +20,14 @@ testset = [ print_level=MadNLP.ERROR), [] ], + [ + "ScaledSparseKKTSystem + LapackCPU", + ()->MadNLP.Optimizer( + linear_solver=MadNLP.LapackCPUSolver, + kkt_system=MadNLP.ScaledSparseKKTSystem, + print_level=MadNLP.ERROR), + [] + ], [ "DenseKKTSystem + LapackCPU-BUNCHKAUFMAN", ()->MadNLP.Optimizer( From 12ab67b70f6da7bd1904e3a0f4559004ef4e5f1d Mon Sep 17 00:00:00 2001 From: fpacaud Date: Mon, 24 Jun 2024 10:29:27 +0200 Subject: [PATCH 2/2] [ci skip] update reference --- src/KKT/Sparse/scaled_augmented.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/KKT/Sparse/scaled_augmented.jl b/src/KKT/Sparse/scaled_augmented.jl index 1748485c..b7316a30 100644 --- a/src/KKT/Sparse/scaled_augmented.jl +++ b/src/KKT/Sparse/scaled_augmented.jl @@ -1,7 +1,7 @@ """ ScaledSparseKKTSystem{T, VT, MT, QN} <: AbstractReducedKKTSystem{T, VT, MT, QN} -Scaled version of the [`AbstractReducedKKTSystem`](@ref) (using the K2.5 formulation introduced in [1]). +Scaled version of the [`AbstractReducedKKTSystem`](@ref) (using the K2.5 formulation introduced in [GOS]). The K2.5 formulation of the augmented KKT system has a better conditioning than the original (K2) formulation. It is recommend switching to a `ScaledSparseKKTSystem` @@ -24,8 +24,9 @@ with * ``Ξₛ = Sₗ Sᵤ`` # References -[1] Saunders, M. A. (2019). CME 338 Class Notes 9: PDCO: Primal-Dual Interior Methods. url: -http://stanford.edu/class/cme338/notes.html +[GOS] Ghannad, Alexandre, Dominique Orban, and Michael A. Saunders. +"Linear systems arising in interior methods for convex optimization: a symmetric formulation with bounded condition number." +Optimization Methods and Software 37, no. 4 (2022): 1344-1369. """ struct ScaledSparseKKTSystem{T, VT, MT, QN, LS, VI, VI32} <: AbstractReducedKKTSystem{T, VT, MT, QN}