diff --git a/ext/OptimizationEnzymeExt.jl b/ext/OptimizationEnzymeExt.jl index 40ce0ea..e2da730 100644 --- a/ext/OptimizationEnzymeExt.jl +++ b/ext/OptimizationEnzymeExt.jl @@ -43,7 +43,7 @@ function hv_f2_alloc(x, f, p) Enzyme.autodiff_deferred(Enzyme.Reverse, firstapply, Active, - f, + Const(f), Enzyme.Duplicated(x, dx), Const(p) ) @@ -105,7 +105,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, ) end elseif g == true - grad = (G, θ) -> f.grad(G, θ, p) + grad = (G, θ, p = p) -> f.grad(G, θ, p) else grad = nothing end @@ -123,7 +123,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, return y end elseif fg == true - fg! = (res, θ) -> f.fg(res, θ, p) + fg! = (res, θ, p = p) -> f.fg(res, θ, p) else fg! = nothing end @@ -139,7 +139,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) end - function hess(res, θ) + function hess(res, θ, p = p) Enzyme.make_zero!(bθ) Enzyme.make_zero!.(vdbθ) @@ -156,13 +156,13 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, end end elseif h == true - hess = (H, θ) -> f.hess(H, θ, p) + hess = (H, θ, p = p) -> f.hess(H, θ, p) else hess = nothing end if fgh == true && f.fgh === nothing - function fgh!(G, H, θ) + function fgh!(G, H, θ, p = p) vdθ = Tuple((Array(r) for r in eachrow(I(length(θ)) * one(eltype(θ))))) vdbθ = Tuple(zeros(eltype(θ), length(θ)) for i in eachindex(θ)) @@ -179,20 +179,20 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, end end elseif fgh == true - fgh! = (G, H, θ) -> f.fgh(G, H, θ, p) + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) else fgh! = nothing end if hv == true && f.hv === nothing - function hv!(H, θ, v) + function hv!(H, θ, v, p = p) H .= Enzyme.autodiff( Enzyme.Forward, hv_f2_alloc, DuplicatedNoNeed, Duplicated(θ, v), - Const(_f), Const(f.f), Const(p) + Const(f.f), Const(p) )[1] end elseif hv == true - hv! = (H, θ, v) -> f.hv(H, θ, v, p) + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) else hv! = nothing end @@ -247,7 +247,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, cons_j! = nothing end - if cons !== nothing && cons_vjp == true && f.cons_vjp == true + if cons !== nothing && cons_vjp == true && f.cons_vjp === nothing cons_res = zeros(eltype(x), num_cons) function cons_vjp!(res, θ, v) Enzyme.make_zero!(res) @@ -267,7 +267,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, cons_vjp! = nothing end - if cons !== nothing && cons_jvp == true && f.cons_jvp == true + if cons !== nothing && cons_jvp == true && f.cons_jvp === nothing cons_res = zeros(eltype(x), num_cons) function cons_jvp!(res, θ, v) @@ -327,7 +327,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) end - function lag_h!(h, θ, σ, μ) + function lag_h!(h, θ, σ, μ, p = p) Enzyme.make_zero!(lag_bθ) Enzyme.make_zero!.(lag_vdbθ) @@ -350,8 +350,30 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, k += i end end + + function lag_h!(H::AbstractMatrix, θ, σ, μ, p = p) + Enzyme.make_zero!(H) + Enzyme.make_zero!(lag_bθ) + Enzyme.make_zero!.(lag_vdbθ) + + Enzyme.autodiff(Enzyme.Forward, + lag_grad, + Enzyme.BatchDuplicated(θ, lag_vdθ), + Enzyme.BatchDuplicatedNoNeed(lag_bθ, lag_vdbθ), + Const(lagrangian), + Const(f.f), + Const(f.cons), + Const(p), + Const(σ), + Const(μ) + ) + + for i in eachindex(θ) + H[i, :] .= lag_vdbθ[i] + end + end elseif lag_h == true && cons !== nothing - lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p) + lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p) else lag_h! = nothing end @@ -389,7 +411,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x lag_h = false) if g == true && f.grad === nothing res = zeros(eltype(x), size(x)) - function grad(θ) + function grad(θ, p = p) Enzyme.make_zero!(res) Enzyme.autodiff(Enzyme.Reverse, Const(firstapply), @@ -401,14 +423,14 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x return res end elseif fg == true - grad = (θ) -> f.grad(θ, p) + grad = (θ, p = p) -> f.grad(θ, p) else grad = nothing end if fg == true && f.fg === nothing res_fg = zeros(eltype(x), size(x)) - function fg!(θ) + function fg!(θ, p = p) Enzyme.make_zero!(res_fg) y = Enzyme.autodiff(Enzyme.ReverseWithPrimal, Const(firstapply), @@ -420,7 +442,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x return y, res end elseif fg == true - fg! = (θ) -> f.fg(θ, p) + fg! = (θ, p = p) -> f.fg(θ, p) else fg! = nothing end @@ -430,7 +452,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x bθ = zeros(eltype(x), length(x)) vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) - function hess(θ) + function hess(θ, p = p) Enzyme.make_zero!(bθ) Enzyme.make_zero!.(vdbθ) @@ -446,7 +468,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x vcat, [reshape(vdbθ[i], (1, length(vdbθ[i]))) for i in eachindex(θ)]) end elseif h == true - hess = (θ) -> f.hess(θ, p) + hess = (θ, p = p) -> f.hess(θ, p) else hess = nothing end @@ -457,7 +479,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x G_fgh = zeros(eltype(x), length(x)) H_fgh = zeros(eltype(x), length(x), length(x)) - function fgh!(θ) + function fgh!(θ, p = p) Enzyme.make_zero!(G_fgh) Enzyme.make_zero!(H_fgh) Enzyme.make_zero!.(vdbθ_fgh) @@ -476,20 +498,20 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x return G_fgh, H_fgh end elseif fgh == true - fgh! = (θ) -> f.fgh(θ, p) + fgh! = (θ, p = p) -> f.fgh(θ, p) else fgh! = nothing end if hv == true && f.hv === nothing - function hv!(θ, v) + function hv!(θ, v, p = p) return Enzyme.autodiff( Enzyme.Forward, hv_f2_alloc, DuplicatedNoNeed, Duplicated(θ, v), Const(_f), Const(f.f), Const(p) )[1] end elseif hv == true - hv! = (θ, v) -> f.hv(θ, v, p) + hv! = (θ, v, p = p) -> f.hv(θ, v, p) else hv! = f.hv end @@ -604,7 +626,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) end - function lag_h!(θ, σ, μ) + function lag_h!(θ, σ, μ, p = p) Enzyme.make_zero!(lag_bθ) Enzyme.make_zero!.(lag_vdbθ) @@ -630,7 +652,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x return res end elseif lag_h == true && cons !== nothing - lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p) + lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p) else lag_h! = nothing end diff --git a/ext/OptimizationZygoteExt.jl b/ext/OptimizationZygoteExt.jl index 75f5650..4916321 100644 --- a/ext/OptimizationZygoteExt.jl +++ b/ext/OptimizationZygoteExt.jl @@ -7,8 +7,12 @@ import OptimizationBase.SciMLBase: OptimizationFunction import OptimizationBase.LinearAlgebra: I, dot import DifferentiationInterface import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp, - prepare_jacobian, value_and_gradient!, + prepare_pullback, prepare_pushforward, pullback!, + pushforward!, + pullback, pushforward, + prepare_jacobian, value_and_gradient!, value_and_gradient, value_derivative_and_second_derivative!, + value_derivative_and_second_derivative, gradient!, hessian!, hvp!, jacobian!, gradient, hessian, hvp, jacobian using ADTypes, SciMLBase @@ -78,8 +82,17 @@ function OptimizationBase.instantiate_function( function hess(res, θ) hessian!(_f, res, soadtype, θ, extras_hess) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + global _p = p + hessian!(_f, res, soadtype, θ) + end + end elseif h == true hess = (H, θ) -> f.hess(H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (H, θ, p) -> f.hess(H, θ, p) + end else hess = nothing end @@ -89,8 +102,18 @@ function OptimizationBase.instantiate_function( (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess) return y end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + global _p = p + (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ) + return y + end + end elseif fgh == true fgh! = (G, H, θ) -> f.fgh(G, H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p) + end else fgh! = nothing end @@ -100,8 +123,17 @@ function OptimizationBase.instantiate_function( function hv!(H, θ, v) hvp!(_f, H, soadtype, θ, v, extras_hvp) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + global _p = p + hvp!(_f, H, soadtype, θ, v) + end + end elseif hv == true hv! = (H, θ, v) -> f.hv(H, θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (H, θ, v, p) -> f.hv(H, θ, v, p) + end else hv! = nothing end @@ -119,8 +151,11 @@ function OptimizationBase.instantiate_function( return copy(_res) end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons_oop(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons_oop(θ)) end end @@ -141,7 +176,7 @@ function OptimizationBase.instantiate_function( end if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing - extras_pullback = prepare_pullback(cons_oop, adtype, x) + extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons)) function cons_vjp!(J, θ, v) pullback!(cons_oop, J, adtype, θ, v, extras_pullback) end @@ -152,7 +187,8 @@ function OptimizationBase.instantiate_function( end if cons !== nothing && f.cons_jvp === nothing && cons_jvp == true - extras_pushforward = prepare_pushforward(cons_oop, adtype, x) + extras_pushforward = prepare_pushforward( + cons_oop, adtype, x, ones(eltype(x), length(x))) function cons_jvp!(J, θ, v) pushforward!(cons_oop, J, adtype, θ, v, extras_pushforward) end @@ -182,7 +218,8 @@ function OptimizationBase.instantiate_function( lag_hess_prototype = f.lag_hess_prototype if f.lag_h === nothing && cons !== nothing && lag_h == true - lag_extras = prepare_hessian(lagrangian, soadtype, x) + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) lag_hess_prototype = zeros(Bool, length(x), length(x)) function lag_h!(H::AbstractMatrix, θ, σ, λ) @@ -190,7 +227,8 @@ function OptimizationBase.instantiate_function( cons_h(H, θ) H *= λ else - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)]) end end @@ -206,6 +244,31 @@ function OptimizationBase.instantiate_function( end end end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + global _p = p + H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)]) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + global _p = p + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end elseif cons !== nothing && lag_h == true lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p) else @@ -305,8 +368,18 @@ function OptimizationBase.instantiate_function( end hess_sparsity = extras_hess.coloring_result.S hess_colors = extras_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + global p = p + hessian!(_f, res, soadtype, θ) + end + end elseif h == true hess = (H, θ) -> f.hess(H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (H, θ, p) -> f.hess(H, θ, p) + end else hess = nothing end @@ -316,8 +389,19 @@ function OptimizationBase.instantiate_function( (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess) return y end + + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + global p = p + (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ) + return y + end + end elseif fgh == true fgh! = (G, H, θ) -> f.fgh(G, H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p) + end else fgh! = nothing end @@ -327,8 +411,17 @@ function OptimizationBase.instantiate_function( function hv!(H, θ, v) hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + global p = p + hvp!(_f, H, soadtype.dense_ad, θ, v) + end + end elseif hv == true hv! = (H, θ, v) -> f.hv(H, θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (H, θ, v, p) -> f.hv(H, θ, v, p) + end else hv! = nothing end @@ -346,8 +439,11 @@ function OptimizationBase.instantiate_function( return copy(_res) end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons_oop(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons(θ)) end end @@ -415,8 +511,9 @@ function OptimizationBase.instantiate_function( lag_hess_prototype = f.lag_hess_prototype if cons !== nothing && cons_h == true && f.lag_h === nothing - lag_extras = prepare_hessian(lagrangian, soadtype, x) - lag_hess_prototype = lag_extras.coloring_result.S + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) + lag_hess_prototype = lag_extras.coloring_result.S[1:length(θ), 1:length(θ)] lag_hess_colors = lag_extras.coloring_result.color function lag_h!(H::AbstractMatrix, θ, σ, λ) @@ -424,13 +521,14 @@ function OptimizationBase.instantiate_function( cons_h(H, θ) H *= λ else - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] end end function lag_h!(h, θ, σ, λ) - H = eltype(θ).(lag_hess_prototype) - hessian!((x) -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] k = 0 rows, cols, _ = findnz(H) for (i, j) in zip(rows, cols) @@ -440,6 +538,31 @@ function OptimizationBase.instantiate_function( end end end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + global p = p + H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + global p = p + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end elseif cons !== nothing && cons_h == true lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p) else diff --git a/src/OptimizationDIExt.jl b/src/OptimizationDIExt.jl index dd6e829..2f346b5 100644 --- a/src/OptimizationDIExt.jl +++ b/src/OptimizationDIExt.jl @@ -4,6 +4,9 @@ import OptimizationBase.SciMLBase: OptimizationFunction import OptimizationBase.LinearAlgebra: I import DifferentiationInterface import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp, + prepare_pullback, prepare_pushforward, pullback!, + pushforward!, + pullback, pushforward, prepare_jacobian, value_and_gradient!, value_and_gradient, value_derivative_and_second_derivative!, value_derivative_and_second_derivative, @@ -86,8 +89,17 @@ function instantiate_function( function hess(res, θ) hessian!(_f, res, soadtype, θ, extras_hess) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + global _p = p + hessian!(_f, res, soadtype, θ) + end + end elseif h == true hess = (H, θ) -> f.hess(H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (H, θ, p) -> f.hess(H, θ, p) + end else hess = nothing end @@ -98,8 +110,19 @@ function instantiate_function( _f, G, H, soadtype, θ, extras_hess) return y end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + global _p = p + (y, _, _) = value_derivative_and_second_derivative!( + _f, G, H, soadtype, θ) + return y + end + end elseif fgh == true fgh! = (G, H, θ) -> f.fgh(G, H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p) + end else fgh! = nothing end @@ -109,8 +132,17 @@ function instantiate_function( function hv!(H, θ, v) hvp!(_f, H, soadtype, θ, v, extras_hvp) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + global _p = p + hvp!(_f, H, soadtype, θ, v) + end + end elseif hv == true hv! = (H, θ, v) -> f.hv(H, θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (H, θ, v, p) -> f.hv(H, θ, v, p) + end else hv! = nothing end @@ -128,8 +160,11 @@ function instantiate_function( return _res end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons_oop(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons_oop(θ)) end end @@ -150,7 +185,7 @@ function instantiate_function( end if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing - extras_pullback = prepare_pullback(cons_oop, adtype, x) + extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons)) function cons_vjp!(J, θ, v) pullback!(cons_oop, J, adtype, θ, v, extras_pullback) end @@ -161,7 +196,8 @@ function instantiate_function( end if f.cons_jvp === nothing && cons_jvp == true && cons !== nothing - extras_pushforward = prepare_pushforward(cons_oop, adtype, x) + extras_pushforward = prepare_pushforward( + cons_oop, adtype, x, ones(eltype(x), length(x))) function cons_jvp!(J, θ, v) pushforward!(cons_oop, J, adtype, θ, v, extras_pushforward) end @@ -191,7 +227,8 @@ function instantiate_function( lag_hess_prototype = f.lag_hess_prototype if cons !== nothing && lag_h == true && f.lag_h === nothing - lag_extras = prepare_hessian(lagrangian, soadtype, x) + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) lag_hess_prototype = zeros(Bool, length(x), length(x)) function lag_h!(H::AbstractMatrix, θ, σ, λ) @@ -199,22 +236,46 @@ function instantiate_function( cons_h(H, θ) H *= λ else - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)]) end end - function lag_h!(h, θ, σ, λ) - H = eltype(θ).(lag_hess_prototype) - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + function lag_h!(h::AbstractVector, θ, σ, λ) + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras) k = 0 - rows, cols, _ = findnz(H) - for (i, j) in zip(rows, cols) - if i <= j + for i in 1:length(θ) + for j in 1:i k += 1 h[k] = H[i, j] end end end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + global _p = p + H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)]) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + global _p = p + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end elseif lag_h == true && cons !== nothing lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p) else @@ -316,8 +377,17 @@ function instantiate_function( function hess(θ) hessian(_f, soadtype, θ, extras_hess) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(θ, p) + global _p = p + hessian(_f, soadtype, θ) + end + end elseif h == true hess = (θ) -> f.hess(θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (θ, p) -> f.hess(θ, p) + end else hess = nothing end @@ -327,8 +397,18 @@ function instantiate_function( (y, G, H) = value_derivative_and_second_derivative(_f, adtype, θ, extras_hess) return y, G, H end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(θ, p) + global _p = p + (y, G, H) = value_derivative_and_second_derivative(_f, adtype, θ) + return y, G, H + end + end elseif fgh == true fgh! = (θ) -> f.fgh(θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (θ, p) -> f.fgh(θ, p) + end else fgh! = nothing end @@ -338,8 +418,17 @@ function instantiate_function( function hv!(θ, v) hvp(_f, soadtype, θ, v, extras_hvp) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(θ, v, p) + global _p = p + hvp(_f, soadtype, θ, v, extras_hvp) + end + end elseif hv == true hv! = (θ, v) -> f.hv(θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (θ, v, p) -> f.hv(θ, v, p) + end else hv! = nothing end @@ -351,8 +440,11 @@ function instantiate_function( return f.cons(θ, p) end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons(θ)) end end @@ -374,7 +466,7 @@ function instantiate_function( end if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing - extras_pullback = prepare_pullback(cons, adtype, x) + extras_pullback = prepare_pullback(cons, adtype, x, ones(eltype(x), num_cons)) function cons_vjp!(θ, v) return pullback(cons, adtype, θ, v, extras_pullback) end @@ -385,7 +477,8 @@ function instantiate_function( end if f.cons_jvp === nothing && cons_jvp == true && cons !== nothing - extras_pushforward = prepare_pushforward(cons, adtype, x) + extras_pushforward = prepare_pushforward( + cons, adtype, x, ones(eltype(x), length(x))) function cons_jvp!(θ, v) return pushforward(cons, adtype, θ, v, extras_pushforward) end @@ -416,18 +509,28 @@ function instantiate_function( lag_hess_prototype = f.lag_hess_prototype if cons !== nothing && lag_h == true && f.lag_h === nothing - lag_extras = prepare_hessian(lagrangian, soadtype, x) + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) lag_hess_prototype = zeros(Bool, length(x), length(x)) function lag_h!(θ, σ, λ) if σ == zero(eltype(θ)) - H = cons_h(θ) - for i in 1:num_cons - H[i] *= λ[i] - end - return H + return λ .* cons_h(θ) else - return hessian(x -> lagrangian(x, σ, λ), soadtype, θ, lag_extras) + return hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(θ, σ, λ, p) + if σ == zero(eltype(θ)) + return λ .* cons_h(θ) + else + global _p = p + return hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + end end end elseif lag_h == true && cons !== nothing diff --git a/src/OptimizationDISparseExt.jl b/src/OptimizationDISparseExt.jl index 6018d21..ccd8836 100644 --- a/src/OptimizationDISparseExt.jl +++ b/src/OptimizationDISparseExt.jl @@ -168,8 +168,18 @@ function instantiate_function( end hess_sparsity = extras_hess.coloring_result.S hess_colors = extras_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + global _p = p + hessian!(_f, res, soadtype, θ, extras_hess) + end + end elseif h == true hess = (H, θ) -> f.hess(H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (H, θ, p) -> f.hess(H, θ, p) + end else hess = nothing end @@ -179,8 +189,18 @@ function instantiate_function( (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess) return y end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + global _p = p + (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess) + return y + end + end elseif fgh == true fgh! = (G, H, θ) -> f.fgh(G, H, θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p) + end else fgh! = nothing end @@ -190,8 +210,17 @@ function instantiate_function( function hv!(H, θ, v) hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp) end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + global _p = p + hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp) + end + end elseif hv == true hv! = (H, θ, v) -> f.hv(H, θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (H, θ, v, p) -> f.hv(H, θ, v, p) + end else hv! = nothing end @@ -209,8 +238,11 @@ function instantiate_function( return _res end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons_oop(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons_oop(θ)) end end @@ -233,7 +265,7 @@ function instantiate_function( end if f.cons_vjp === nothing && cons_vjp == true - extras_pullback = prepare_pullback(cons_oop, adtype, x) + extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons)) function cons_vjp!(J, θ, v) pullback!(cons_oop, J, adtype.dense_ad, θ, v, extras_pullback) end @@ -244,7 +276,8 @@ function instantiate_function( end if f.cons_jvp === nothing && cons_jvp == true - extras_pushforward = prepare_pushforward(cons_oop, adtype, x) + extras_pushforward = prepare_pushforward( + cons_oop, adtype, x, ones(eltype(x), length(x))) function cons_jvp!(J, θ, v) pushforward!(cons_oop, J, adtype.dense_ad, θ, v, extras_pushforward) end @@ -279,8 +312,9 @@ function instantiate_function( lag_hess_prototype = f.lag_hess_prototype lag_hess_colors = f.lag_hess_colorvec if cons !== nothing && lag_h == true && f.lag_h === nothing - lag_extras = prepare_hessian(lagrangian, soadtype, x) - lag_hess_prototype = lag_extras.coloring_result.S + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) + lag_hess_prototype = lag_extras.coloring_result.S[1:length(x), 1:length(x)] lag_hess_colors = lag_extras.coloring_result.color function lag_h!(H::AbstractMatrix, θ, σ, λ) @@ -288,13 +322,14 @@ function instantiate_function( cons_h(H, θ) H *= λ else - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] end end function lag_h!(h, θ, σ, λ) - H = eltype(θ).(lag_hess_prototype) - hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras) + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] k = 0 rows, cols, _ = findnz(H) for (i, j) in zip(rows, cols) @@ -304,8 +339,38 @@ function instantiate_function( end end end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + global _p = p + H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + end + end + + function lag_h!(h, θ, σ, λ, p) + global _p = p + H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + k = 0 + rows, cols, _ = findnz(H) + for (i, j) in zip(rows, cols) + if i <= j + k += 1 + h[k] = H[i, j] + end + end + end + end elseif lag_h == true lag_h! = (H, θ, σ, λ) -> f.lag_h(H, θ, σ, λ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + lag_h! = (H, θ, σ, λ, p) -> f.lag_h(H, θ, σ, λ, p) + end else lag_h! = nothing end @@ -397,8 +462,19 @@ function instantiate_function( (y, G, H) = value_derivative_and_second_derivative(_f, soadtype, θ, extras_hess) return y, G, H end + + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(θ, p) + global _p = p + (y, G, H) = value_derivative_and_second_derivative(_f, soadtype, θ, extras_hess) + return y, G, H + end + end elseif fgh == true fgh! = (θ) -> f.fgh(θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + fgh! = (θ, p) -> f.fgh(θ, p) + end else fgh! = nothing end @@ -412,8 +488,18 @@ function instantiate_function( end hess_sparsity = extras_hess.coloring_result.S hess_colors = extras_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(θ, p) + global _p = p + hessian(_f, soadtype, θ, extras_hess) + end + end elseif h == true hess = (θ) -> f.hess(θ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hess = (θ, p) -> f.hess(θ, p) + end else hess = nothing end @@ -423,8 +509,18 @@ function instantiate_function( function hv!(θ, v) hvp(_f, soadtype.dense_ad, θ, v, extras_hvp) end + + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(θ, v, p) + global _p = p + hvp(_f, soadtype.dense_ad, θ, v, extras_hvp) + end + end elseif hv == true hv! = (θ, v) -> f.hv(θ, v, p) + if p !== SciMLBase.NullParameters() && p !== nothing + hv! = (θ, v, p) -> f.hv(θ, v, p) + end else hv! = nothing end @@ -436,8 +532,11 @@ function instantiate_function( f.cons(θ, p) end - function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons)) - return σ * _f(x) + dot(λ, cons(x)) + function lagrangian(augvars) + θ = augvars[1:length(x)] + σ = augvars[length(x) + 1] + λ = augvars[(length(x) + 2):end] + return σ * _f(θ) + dot(λ, cons(θ)) end end @@ -461,7 +560,7 @@ function instantiate_function( end if f.cons_vjp === nothing && cons_vjp == true - extras_pullback = prepare_pullback(cons, adtype, x) + extras_pullback = prepare_pullback(cons, adtype, x, ones(eltype(x), num_cons)) function cons_vjp!(θ, v) pullback(cons, adtype, θ, v, extras_pullback) end @@ -472,7 +571,8 @@ function instantiate_function( end if f.cons_jvp === nothing && cons_jvp == true - extras_pushforward = prepare_pushforward(cons, adtype, x) + extras_pushforward = prepare_pushforward( + cons, adtype, x, ones(eltype(x), length(x))) function cons_jvp!(θ, v) pushforward(cons, adtype, θ, v, extras_pushforward) end @@ -506,19 +606,37 @@ function instantiate_function( lag_hess_prototype = f.lag_hess_prototype lag_hess_colors = f.lag_hess_colorvec if cons !== nothing && lag_h == true && f.lag_h === nothing - lag_extras = prepare_hessian(lagrangian, soadtype, x) + lag_extras = prepare_hessian( + lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons))) function lag_h!(θ, σ, λ) if σ == zero(eltype(θ)) - return λ * cons_h!(θ) + return λ .* cons_h!(θ) else - hess = hessian(x -> lagrangian(x, σ, λ), soadtype, θ, lag_extras) + hess = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] return hess end end - lag_hess_prototype = lag_extras.coloring_result.S + lag_hess_prototype = lag_extras.coloring_result.S[1:length(θ), 1:length(θ)] lag_hess_colors = lag_extras.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(θ, σ, λ, p) + if σ == zero(eltype(θ)) + return λ .* cons_h!(θ) + else + global _p = p + hess = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[ + 1:length(θ), 1:length(θ)] + return hess + end + end + end elseif lag_h == true && cons !== nothing lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p) + if p !== SciMLBase.NullParameters() && p !== nothing + lag_h! = (θ, σ, μ, p) -> f.lag_h(θ, σ, μ, p) + end else lag_h! = nothing end diff --git a/test/adtests.jl b/test/adtests.jl index 30aca18..6072cb0 100644 --- a/test/adtests.jl +++ b/test/adtests.jl @@ -26,7 +26,7 @@ H2 = Array{Float64}(undef, 2, 2) g!(G1, x0) h!(H1, x0) -cons = (res, x, p) -> (res .= [x[1]^2 + x[2]^2]; return nothing) +cons = (res, x, p) -> (res[1] = x[1]^2 + x[2]^2; return nothing) optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoModelingToolkit(), cons = cons) optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoModelingToolkit(), @@ -46,7 +46,8 @@ optprob.cons_h(H3, x0) @test H3 == [[2.0 0.0; 0.0 2.0]] function con2_c(res, x, p) - res .= [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + res[1] = x[1]^2 + x[2]^2 + res[2] = x[2] * sin(x[1]) - x[1] return nothing end optf = OptimizationFunction(rosenbrock, @@ -69,424 +70,719 @@ H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] optprob.cons_h(H3, x0) @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] -G2 = Array{Float64}(undef, 2) -H2 = Array{Float64}(undef, 2, 2) - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = cons) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoEnzyme(), - nothing, 1, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 -res = Array{Float64}(undef, 1) -optprob.cons(res, x0) -@test res == [0.0] -J = Array{Float64}(undef, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J == [10.0, 6.0] -H3 = [Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 == [[2.0 0.0; 0.0 2.0]] -G2 = Array{Float64}(undef, 2) -H2 = Array{Float64}(undef, 2, 2) -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = con2_c) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoEnzyme(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -G2 = Array{Float64}(undef, 2) -H2 = Array{Float64}(undef, 2, 2) - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoReverseDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -G2 = Array{Float64}(undef, 2) -H2 = Array{Float64}(undef, 2, 2) - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoReverseDiff(compile = true), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -G2 = Array{Float64}(undef, 2) -H2 = Array{Float64}(undef, 2, 2) - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoZygote(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoModelingToolkit(true, true), - cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoModelingToolkit(true, true), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -using SparseArrays -sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -@test findnz(sH)[1:2] == findnz(optprob.hess_prototype)[1:2] -optprob.hess(sH, x0) -@test sH == H2 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -@test findnz(sJ)[1:2] == findnz(optprob.cons_jac_prototype)[1:2] -optprob.cons_j(sJ, [5.0, 3.0]) -@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -sH3 = [sparse([1, 2], [1, 2], zeros(2)), sparse([1, 1, 2], [1, 2, 1], zeros(3))] -@test getindex.(findnz.(sH3), Ref([1, 2])) == - getindex.(findnz.(optprob.cons_hess_prototype), Ref([1, 2])) -optprob.cons_h(sH3, x0) -@test Array.(sH3) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff()) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoForwardDiff(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 ≈ H2 - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote()) -optprob = OptimizationBase.instantiate_function(optf, - x0, - OptimizationBase.AutoZygote(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff()) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoReverseDiff(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1 == G2 -optprob.hess(H2, x0) -@test H1 == H2 - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoTracker()) -optprob = OptimizationBase.instantiate_function(optf, - x0, - OptimizationBase.AutoTracker(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1 == G2 -@test_broken optprob.hess(H2, x0) - -prob = OptimizationProblem(optf, x0) - -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff()) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoFiniteDiff(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-6 - -# Test new constraints -cons = (res, x, p) -> (res .= [x[1]^2 + x[2]^2]) -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoFiniteDiff(), - nothing, 1, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-6 -res = Array{Float64}(undef, 1) -optprob.cons(res, x0) -@test res == [0.0] -optprob.cons(res, [1.0, 4.0]) -@test res == [17.0] -J = zeros(1, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J ≈ [10.0 6.0] -H3 = [Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0]] - -H4 = Array{Float64}(undef, 2, 2) -μ = randn(1) -σ = rand() -# optprob.lag_h(H4, x0, σ, μ) -# @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 - -cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl -cons_jac_colors = 1:2 -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons, - cons_jac_prototype = cons_jac_proto, - cons_jac_colorvec = cons_jac_colors) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoFiniteDiff(), - nothing, 1, g = true, h = true, cons_j = true, cons_h = true) -@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it -@test optprob.cons_jac_colorvec == 1:2 -J = zeros(1, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J ≈ [10.0 6.0] - -function con2_c(res, x, p) - res .= [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] +@testset "one constraint tests" begin + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoForwardDiff(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoReverseDiff(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + @test_broken optprob.cons_j(J, [5.0, 3.0]) + # @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoReverseDiff(true), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + @test_broken optprob.cons_j(J, [5.0, 3.0]) + # @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoZygote(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoFiniteDiff(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1≈G2 rtol=1e-5 + optprob.hess(H2, x0) + @test H1≈H2 rtol=1e-5 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv≈[2.0, 200.0] rtol=1e-5 + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res ≈ [0.0] + J = Array{Float64}(undef, 1, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J≈[10.0 6.0] rtol=1e-5 + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ≈[10.0, 6.0] rtol=1e-5 + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv≈[8.0] rtol=1e-5 + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3≈[[2.0 0.0; 0.0 2.0]] rtol=1e-5 + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6 end -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoFiniteDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-6 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res == [0.0, 0.0] -optprob.cons(res, [1.0, 2.0]) -@test res ≈ [5.0, 0.682941969615793] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] - -cons_jac_proto = Float64.(sparse([1 1; 1 1])) -cons_jac_colors = 1:2 -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c, - cons_jac_prototype = cons_jac_proto, - cons_jac_colorvec = cons_jac_colors) -optprob = OptimizationBase.instantiate_function( - optf, x0, OptimizationBase.AutoFiniteDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it -@test optprob.cons_jac_colorvec == 1:2 -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -H2 = Array{Float64}(undef, 2, 2) -optprob.hess(H2, [5.0, 3.0]) -@test all(isapprox(H2, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3)) - -cons_j = (J, θ, p) -> optprob.cons_j(J, θ) -hess = (H, θ, p) -> optprob.hess(H, θ) -sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff(), hess = hess, - hess_prototype = copy(sH), cons = con2_c, cons_j = cons_j, - cons_jac_prototype = copy(sJ)) -optprob1 = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoForwardDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -@test optprob1.hess_prototype == sparse([0.0 0.0; 0.0 0.0]) # make sure it's still using it -optprob1.hess(sH, [5.0, 3.0]) -@test all(isapprox(sH, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3)) -@test optprob1.cons_jac_prototype == sparse([0.0 0.0; 0.0 0.0]) # make sure it's still using it -optprob1.cons_j(sJ, [5.0, 3.0]) -@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) - -grad = (G, θ, p) -> optprob.grad(G, θ) -hess = (H, θ, p) -> optprob.hess(H, θ) -cons_j = (J, θ, p) -> optprob.cons_j(J, θ) -cons_h = (res, θ, p) -> optprob.cons_h(res, θ) -sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4)) -sH3 = [sparse([1, 2], [1, 2], zeros(2)), sparse([1, 1, 2], [1, 2, 1], zeros(3))] -optf = OptimizationFunction(rosenbrock, SciMLBase.NoAD(), grad = grad, hess = hess, - cons = con2_c, cons_j = cons_j, cons_h = cons_h, - hess_prototype = sH, cons_jac_prototype = sJ, - cons_hess_prototype = sH3) -optprob2 = OptimizationBase.instantiate_function( - optf, x0, SciMLBase.NoAD(), nothing, 2, g = true, - h = true, cons_j = true, cons_h = true) -optprob2.hess(sH, [5.0, 3.0]) -@test all(isapprox(sH, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3)) -optprob2.cons_j(sJ, [5.0, 3.0]) -@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) -optprob2.cons_h(sH3, [5.0, 3.0]) -@test Array.(sH3)≈[ - [2.0 0.0; 0.0 2.0], - [2.8767727327346804 0.2836621681849162; 0.2836621681849162 -6.622738308376736e-9] -] rtol=1e-4 - -optf = OptimizationFunction(rosenbrock, - OptimizationBase.AutoSparseFiniteDiff(), - cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseFiniteDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -G2 = Array{Float64}(undef, 2) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-4 -H2 = Array{Float64}(undef, 2, 2) -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-4 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res≈[0.0, 0.0] atol=1e-4 -optprob.cons(res, [1.0, 2.0]) -@test res ≈ [5.0, 0.682941969615793] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3 -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseFiniteDiff()) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseFiniteDiff(), - nothing, g = true, h = true, cons_j = true, cons_h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-4 +@testset "two constraints tests" begin + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv ≈ 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(true), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoForwardDiff(), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(compile = true), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoZygote(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoFiniteDiff(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1≈G2 rtol=1e-5 + optprob.hess(H2, x0) + @test H1≈H2 rtol=1e-5 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv≈[2.0, 200.0] rtol=1e-5 + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res ≈ [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ≈sum(J, dims = 1)[:] rtol=1e-5 + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv≈0.5 * sum(J, dims = 2)[:] rtol=1e-5 + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3≈[[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] rtol=1e-5 + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 +end -optf = OptimizationFunction(rosenbrock, - OptimizationBase.AutoSparseForwardDiff(), - cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseForwardDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -G2 = Array{Float64}(undef, 2) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-4 -H2 = Array{Float64}(undef, 2, 2) -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-4 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res≈[0.0, 0.0] atol=1e-4 -optprob.cons(res, [1.0, 2.0]) -@test res ≈ [5.0, 0.682941969615793] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3 -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] +@testset "Sparse Tests" begin + # Define a sparse objective function + function sparse_objective(x, p) + return x[1]^2 + 100 * (x[3] - x[2]^2)^2 + end -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseForwardDiff()) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseForwardDiff(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-6 + # Define sparse constraints + function sparse_constraints(res, x, p) + res[1] = x[1] + x[2] + (x[2] * x[3])^2 - 1 + res[2] = x[1]^2 + x[3]^2 - 1 + end -optf = OptimizationFunction(rosenbrock, - OptimizationBase.AutoSparseReverseDiff(), - cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseReverseDiff(true), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -G2 = Array{Float64}(undef, 2) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-4 -H2 = Array{Float64}(undef, 2, 2) -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-4 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res≈[0.0, 0.0] atol=1e-4 -optprob.cons(res, [1.0, 2.0]) -@test res ≈ [5.0, 0.682941969615793] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3 -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + # Initial point + x0 = [0.5, 0.5, 0.5] -optf = OptimizationFunction(rosenbrock, - OptimizationBase.AutoSparseReverseDiff(), - cons = con2_c) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseReverseDiff(), - nothing, 2, g = true, h = true, cons_j = true, cons_h = true) -G2 = Array{Float64}(undef, 2) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-4 -H2 = Array{Float64}(undef, 2, 2) -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-4 -res = Array{Float64}(undef, 2) -optprob.cons(res, x0) -@test res≈[0.0, 0.0] atol=1e-4 -optprob.cons(res, [1.0, 2.0]) -@test res ≈ [5.0, 0.682941969615793] -J = Array{Float64}(undef, 2, 2) -optprob.cons_j(J, [5.0, 3.0]) -@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3 -H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] -optprob.cons_h(H3, x0) -@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + # Create OptimizationFunction + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseForwardDiff(), + cons = sparse_constraints) -optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseReverseDiff()) -optprob = OptimizationBase.instantiate_function(optf, x0, - OptimizationBase.AutoSparseReverseDiff(), - nothing, g = true, h = true) -optprob.grad(G2, x0) -@test G1≈G2 rtol=1e-6 -optprob.hess(H2, x0) -@test H1≈H2 rtol=1e-6 + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseReverseDiff(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction( + sparse_objective, OptimizationBase.AutoSparseReverseDiff(true), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseFiniteDiff(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseZygote(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 +end @testset "OOP" begin cons = (x, p) -> [x[1]^2 + x[2]^2] @@ -816,6 +1112,7 @@ using MLUtils G0 = zeros(3) optf.grad(G0, ones(3)) stochgrads = [] + i = 0 for (x, y) in data G = zeros(3) optf.grad(G, ones(3), (x, y)) diff --git a/test/matrixvalued.jl b/test/matrixvalued.jl new file mode 100644 index 0000000..d3f786d --- /dev/null +++ b/test/matrixvalued.jl @@ -0,0 +1,87 @@ +using OptimizationBase, LinearAlgebra, ForwardDiff, Zygote, FiniteDiff, + DifferentiationInterface, SparseConnectivityTracer +using Test, ReverseDiff + +@testset "Matrix Valued" begin + for adtype in [AutoForwardDiff(), AutoZygote(), AutoFiniteDiff(), + AutoSparse(AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()), + AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()), + AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector())] + # 1. Matrix Factorization + function matrix_factorization_objective(X, A) + U, V = @view(X[1:size(A, 1), 1:Int(size(A, 2) / 2)]), + @view(X[1:size(A, 1), (Int(size(A, 2) / 2) + 1):size(A, 2)]) + return norm(A - U * V') + end + function non_negative_constraint(X, A) + U, V = X + return [all(U .>= 0) && all(V .>= 0)] + end + A_mf = rand(4, 4) # Original matrix + U_mf = rand(4, 2) # Factor matrix U + V_mf = rand(4, 2) # Factor matrix V + + optf = OptimizationFunction{false}( + matrix_factorization_objective, adtype, cons = non_negative_constraint) + optf = OptimizationBase.instantiate_function( + optf, hcat(U_mf, V_mf), adtype, A_mf, g = true, h = true, + cons_j = true, cons_h = true) + optf.grad(hcat(U_mf, V_mf)) + optf.hess(hcat(U_mf, V_mf)) + if adtype != AutoSparse( + AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()) && + adtype != + AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()) && + adtype != + AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector()) + optf.cons_j(hcat(U_mf, V_mf)) + optf.cons_h(hcat(U_mf, V_mf)) + end + + # 2. Principal Component Analysis (PCA) + function pca_objective(X, A) + return -tr(X' * A * X) # Minimize the negative of the trace for maximization + end + function orthogonality_constraint(X, A) + return [norm(X' * X - I) < 1e-6] + end + A_pca = rand(4, 4) # Covariance matrix (can be symmetric positive definite) + X_pca = rand(4, 2) # Matrix to hold principal components + + optf = OptimizationFunction{false}( + pca_objective, adtype, cons = orthogonality_constraint) + optf = OptimizationBase.instantiate_function( + optf, X_pca, adtype, A_pca, g = true, h = true, + cons_j = true, cons_h = true) + optf.grad(X_pca) + optf.hess(X_pca) + if adtype != AutoSparse( + AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()) && + adtype != + AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()) && + adtype != + AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector()) + optf.cons_j(X_pca) + optf.cons_h(X_pca) + end + + # 3. Matrix Completion + function matrix_completion_objective(X, P) + A, Omega = P + return norm(Omega .* (A - X)) + end + # r = 2 # Rank of the matrix to be completed + # function rank_constraint(X, P) + # return [rank(X) <= r] + # end + A_mc = rand(4, 4) # Original matrix with missing entries + Omega_mc = rand(4, 4) .> 0.5 # Mask for observed entries (boolean matrix) + X_mc = rand(4, 4) # Matrix to be completed + optf = OptimizationFunction{false}( + matrix_completion_objective, adtype, cons = rank_constraint) + optf = OptimizationBase.instantiate_function( + optf, X_mc, adtype, (A_mc, Omega_mc), g = true, h = true) + optf.grad(X_mc) + optf.hess(X_mc) + end +end