diff --git a/ext/OptimizationEnzymeExt.jl b/ext/OptimizationEnzymeExt.jl
index 40ce0ea..e2da730 100644
--- a/ext/OptimizationEnzymeExt.jl
+++ b/ext/OptimizationEnzymeExt.jl
@@ -43,7 +43,7 @@ function hv_f2_alloc(x, f, p)
     Enzyme.autodiff_deferred(Enzyme.Reverse,
         firstapply,
         Active,
-        f,
+        Const(f),
         Enzyme.Duplicated(x, dx),
         Const(p)
     )
@@ -105,7 +105,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             )
         end
     elseif g == true
-        grad = (G, θ) -> f.grad(G, θ, p)
+        grad = (G, θ, p = p) -> f.grad(G, θ, p)
     else
         grad = nothing
     end
@@ -123,7 +123,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             return y
         end
     elseif fg == true
-        fg! = (res, θ) -> f.fg(res, θ, p)
+        fg! = (res, θ, p = p) -> f.fg(res, θ, p)
     else
         fg! = nothing
     end
@@ -139,7 +139,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype)))
         end
 
-        function hess(res, θ)
+        function hess(res, θ, p = p)
             Enzyme.make_zero!(bθ)
             Enzyme.make_zero!.(vdbθ)
 
@@ -156,13 +156,13 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             end
         end
     elseif h == true
-        hess = (H, θ) -> f.hess(H, θ, p)
+        hess = (H, θ, p = p) -> f.hess(H, θ, p)
     else
         hess = nothing
     end
 
     if fgh == true && f.fgh === nothing
-        function fgh!(G, H, θ)
+        function fgh!(G, H, θ, p = p)
             vdθ = Tuple((Array(r) for r in eachrow(I(length(θ)) * one(eltype(θ)))))
             vdbθ = Tuple(zeros(eltype(θ), length(θ)) for i in eachindex(θ))
 
@@ -179,20 +179,20 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             end
         end
     elseif fgh == true
-        fgh! = (G, H, θ) -> f.fgh(G, H, θ, p)
+        fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p)
     else
         fgh! = nothing
     end
 
     if hv == true && f.hv === nothing
-        function hv!(H, θ, v)
+        function hv!(H, θ, v, p = p)
             H .= Enzyme.autodiff(
                 Enzyme.Forward, hv_f2_alloc, DuplicatedNoNeed, Duplicated(θ, v),
-                Const(_f), Const(f.f), Const(p)
+                Const(f.f), Const(p)
             )[1]
         end
     elseif hv == true
-        hv! = (H, θ, v) -> f.hv(H, θ, v, p)
+        hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p)
     else
         hv! = nothing
     end
@@ -247,7 +247,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
         cons_j! = nothing
     end
 
-    if cons !== nothing && cons_vjp == true && f.cons_vjp == true
+    if cons !== nothing && cons_vjp == true && f.cons_vjp === nothing
         cons_res = zeros(eltype(x), num_cons)
         function cons_vjp!(res, θ, v)
             Enzyme.make_zero!(res)
@@ -267,7 +267,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
         cons_vjp! = nothing
     end
 
-    if cons !== nothing && cons_jvp == true && f.cons_jvp == true
+    if cons !== nothing && cons_jvp == true && f.cons_jvp === nothing
         cons_res = zeros(eltype(x), num_cons)
 
         function cons_jvp!(res, θ, v)
@@ -327,7 +327,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
             lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype)))
         end
 
-        function lag_h!(h, θ, σ, μ)
+        function lag_h!(h, θ, σ, μ, p = p)
             Enzyme.make_zero!(lag_bθ)
             Enzyme.make_zero!.(lag_vdbθ)
 
@@ -350,8 +350,30 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x,
                 k += i
             end
         end
+
+        function lag_h!(H::AbstractMatrix, θ, σ, μ, p = p)
+            Enzyme.make_zero!(H)
+            Enzyme.make_zero!(lag_bθ)
+            Enzyme.make_zero!.(lag_vdbθ)
+
+            Enzyme.autodiff(Enzyme.Forward,
+                lag_grad,
+                Enzyme.BatchDuplicated(θ, lag_vdθ),
+                Enzyme.BatchDuplicatedNoNeed(lag_bθ, lag_vdbθ),
+                Const(lagrangian),
+                Const(f.f),
+                Const(f.cons),
+                Const(p),
+                Const(σ),
+                Const(μ)
+            )
+
+            for i in eachindex(θ)
+                H[i, :] .= lag_vdbθ[i]
+            end
+        end
     elseif lag_h == true && cons !== nothing
-        lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p)
+        lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p)
     else
         lag_h! = nothing
     end
@@ -389,7 +411,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
         lag_h = false)
     if g == true && f.grad === nothing
         res = zeros(eltype(x), size(x))
-        function grad(θ)
+        function grad(θ, p = p)
             Enzyme.make_zero!(res)
             Enzyme.autodiff(Enzyme.Reverse,
                 Const(firstapply),
@@ -401,14 +423,14 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
             return res
         end
     elseif fg == true
-        grad = (θ) -> f.grad(θ, p)
+        grad = (θ, p = p) -> f.grad(θ, p)
     else
         grad = nothing
     end
 
     if fg == true && f.fg === nothing
         res_fg = zeros(eltype(x), size(x))
-        function fg!(θ)
+        function fg!(θ, p = p)
             Enzyme.make_zero!(res_fg)
             y = Enzyme.autodiff(Enzyme.ReverseWithPrimal,
                 Const(firstapply),
@@ -420,7 +442,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
             return y, res
         end
     elseif fg == true
-        fg! = (θ) -> f.fg(θ, p)
+        fg! = (θ, p = p) -> f.fg(θ, p)
     else
         fg! = nothing
     end
@@ -430,7 +452,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
         bθ = zeros(eltype(x), length(x))
         vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x))
 
-        function hess(θ)
+        function hess(θ, p = p)
             Enzyme.make_zero!(bθ)
             Enzyme.make_zero!.(vdbθ)
 
@@ -446,7 +468,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
                 vcat, [reshape(vdbθ[i], (1, length(vdbθ[i]))) for i in eachindex(θ)])
         end
     elseif h == true
-        hess = (θ) -> f.hess(θ, p)
+        hess = (θ, p = p) -> f.hess(θ, p)
     else
         hess = nothing
     end
@@ -457,7 +479,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
         G_fgh = zeros(eltype(x), length(x))
         H_fgh = zeros(eltype(x), length(x), length(x))
 
-        function fgh!(θ)
+        function fgh!(θ, p = p)
             Enzyme.make_zero!(G_fgh)
             Enzyme.make_zero!(H_fgh)
             Enzyme.make_zero!.(vdbθ_fgh)
@@ -476,20 +498,20 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
             return G_fgh, H_fgh
         end
     elseif fgh == true
-        fgh! = (θ) -> f.fgh(θ, p)
+        fgh! = (θ, p = p) -> f.fgh(θ, p)
     else
         fgh! = nothing
     end
 
     if hv == true && f.hv === nothing
-        function hv!(θ, v)
+        function hv!(θ, v, p = p)
             return Enzyme.autodiff(
                 Enzyme.Forward, hv_f2_alloc, DuplicatedNoNeed, Duplicated(θ, v),
                 Const(_f), Const(f.f), Const(p)
             )[1]
         end
     elseif hv == true
-        hv! = (θ, v) -> f.hv(θ, v, p)
+        hv! = (θ, v, p = p) -> f.hv(θ, v, p)
     else
         hv! = f.hv
     end
@@ -604,7 +626,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
             lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype)))
         end
 
-        function lag_h!(θ, σ, μ)
+        function lag_h!(θ, σ, μ, p = p)
             Enzyme.make_zero!(lag_bθ)
             Enzyme.make_zero!.(lag_vdbθ)
 
@@ -630,7 +652,7 @@ function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x
             return res
         end
     elseif lag_h == true && cons !== nothing
-        lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p)
+        lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p)
     else
         lag_h! = nothing
     end
diff --git a/ext/OptimizationZygoteExt.jl b/ext/OptimizationZygoteExt.jl
index 75f5650..4916321 100644
--- a/ext/OptimizationZygoteExt.jl
+++ b/ext/OptimizationZygoteExt.jl
@@ -7,8 +7,12 @@ import OptimizationBase.SciMLBase: OptimizationFunction
 import OptimizationBase.LinearAlgebra: I, dot
 import DifferentiationInterface
 import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp,
-                                 prepare_jacobian, value_and_gradient!,
+                                 prepare_pullback, prepare_pushforward, pullback!,
+                                 pushforward!,
+                                 pullback, pushforward,
+                                 prepare_jacobian, value_and_gradient!, value_and_gradient,
                                  value_derivative_and_second_derivative!,
+                                 value_derivative_and_second_derivative,
                                  gradient!, hessian!, hvp!, jacobian!, gradient, hessian,
                                  hvp, jacobian
 using ADTypes, SciMLBase
@@ -78,8 +82,17 @@ function OptimizationBase.instantiate_function(
         function hess(res, θ)
             hessian!(_f, res, soadtype, θ, extras_hess)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(res, θ, p)
+                global _p = p
+                hessian!(_f, res, soadtype, θ)
+            end
+        end
     elseif h == true
         hess = (H, θ) -> f.hess(H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (H, θ, p) -> f.hess(H, θ, p)
+        end
     else
         hess = nothing
     end
@@ -89,8 +102,18 @@ function OptimizationBase.instantiate_function(
             (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess)
             return y
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(G, H, θ, p)
+                global _p = p
+                (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ)
+                return y
+            end
+        end
     elseif fgh == true
         fgh! = (G, H, θ) -> f.fgh(G, H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -100,8 +123,17 @@ function OptimizationBase.instantiate_function(
         function hv!(H, θ, v)
             hvp!(_f, H, soadtype, θ, v, extras_hvp)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(H, θ, v, p)
+                global _p = p
+                hvp!(_f, H, soadtype, θ, v)
+            end
+        end
     elseif hv == true
         hv! = (H, θ, v) -> f.hv(H, θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (H, θ, v, p) -> f.hv(H, θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -119,8 +151,11 @@ function OptimizationBase.instantiate_function(
             return copy(_res)
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons_oop(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons_oop(θ))
         end
     end
 
@@ -141,7 +176,7 @@ function OptimizationBase.instantiate_function(
     end
 
     if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing
-        extras_pullback = prepare_pullback(cons_oop, adtype, x)
+        extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons))
         function cons_vjp!(J, θ, v)
             pullback!(cons_oop, J, adtype, θ, v, extras_pullback)
         end
@@ -152,7 +187,8 @@ function OptimizationBase.instantiate_function(
     end
 
     if cons !== nothing && f.cons_jvp === nothing && cons_jvp == true
-        extras_pushforward = prepare_pushforward(cons_oop, adtype, x)
+        extras_pushforward = prepare_pushforward(
+            cons_oop, adtype, x, ones(eltype(x), length(x)))
         function cons_jvp!(J, θ, v)
             pushforward!(cons_oop, J, adtype, θ, v, extras_pushforward)
         end
@@ -182,7 +218,8 @@ function OptimizationBase.instantiate_function(
     lag_hess_prototype = f.lag_hess_prototype
 
     if f.lag_h === nothing && cons !== nothing && lag_h == true
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
         lag_hess_prototype = zeros(Bool, length(x), length(x))
 
         function lag_h!(H::AbstractMatrix, θ, σ, λ)
@@ -190,7 +227,8 @@ function OptimizationBase.instantiate_function(
                 cons_h(H, θ)
                 H *= λ
             else
-                hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+                H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)])
             end
         end
 
@@ -206,6 +244,31 @@ function OptimizationBase.instantiate_function(
                 end
             end
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(H::AbstractMatrix, θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    cons_h(H, θ)
+                    H *= λ
+                else
+                    global _p = p
+                    H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)])
+                end
+            end
+    
+            function lag_h!(h::AbstractVector, θ, σ, λ, p)
+                global _p = p
+                H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)
+                k = 0
+                for i in 1:length(θ)
+                    for j in 1:i
+                        k += 1
+                        h[k] = H[i, j]
+                    end
+                end
+            end
+        end
     elseif cons !== nothing && lag_h == true
         lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p)
     else
@@ -305,8 +368,18 @@ function OptimizationBase.instantiate_function(
         end
         hess_sparsity = extras_hess.coloring_result.S
         hess_colors = extras_hess.coloring_result.color
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(res, θ, p)
+                global p = p
+                hessian!(_f, res, soadtype, θ)
+            end
+        end
     elseif h == true
         hess = (H, θ) -> f.hess(H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (H, θ, p) -> f.hess(H, θ, p)
+        end
     else
         hess = nothing
     end
@@ -316,8 +389,19 @@ function OptimizationBase.instantiate_function(
             (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess)
             return y
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(G, H, θ, p)
+                global p = p
+                (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ)
+                return y
+            end
+        end
     elseif fgh == true
         fgh! = (G, H, θ) -> f.fgh(G, H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -327,8 +411,17 @@ function OptimizationBase.instantiate_function(
         function hv!(H, θ, v)
             hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(H, θ, v, p)
+                global p = p
+                hvp!(_f, H, soadtype.dense_ad, θ, v)
+            end
+        end
     elseif hv == true
         hv! = (H, θ, v) -> f.hv(H, θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (H, θ, v, p) -> f.hv(H, θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -346,8 +439,11 @@ function OptimizationBase.instantiate_function(
             return copy(_res)
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons_oop(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons(θ))
         end
     end
 
@@ -415,8 +511,9 @@ function OptimizationBase.instantiate_function(
 
     lag_hess_prototype = f.lag_hess_prototype
     if cons !== nothing && cons_h == true && f.lag_h === nothing
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
-        lag_hess_prototype = lag_extras.coloring_result.S
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
+        lag_hess_prototype = lag_extras.coloring_result.S[1:length(θ), 1:length(θ)]
         lag_hess_colors = lag_extras.coloring_result.color
 
         function lag_h!(H::AbstractMatrix, θ, σ, λ)
@@ -424,13 +521,14 @@ function OptimizationBase.instantiate_function(
                 cons_h(H, θ)
                 H *= λ
             else
-                hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+                H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)]
             end
         end
 
         function lag_h!(h, θ, σ, λ)
-            H = eltype(θ).(lag_hess_prototype)
-            hessian!((x) -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+            H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                1:length(θ), 1:length(θ)]
             k = 0
             rows, cols, _ = findnz(H)
             for (i, j) in zip(rows, cols)
@@ -440,6 +538,31 @@ function OptimizationBase.instantiate_function(
                 end
             end
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(H::AbstractMatrix, θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    cons_h(H, θ)
+                    H *= λ
+                else
+                    global p = p
+                    H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)]
+                end
+            end
+
+            function lag_h!(h::AbstractVector, θ, σ, λ, p)
+                global p = p
+                H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)
+                k = 0
+                for i in 1:length(θ)
+                    for j in 1:i
+                        k += 1
+                        h[k] = H[i, j]
+                    end
+                end
+            end
+        end
     elseif cons !== nothing && cons_h == true
         lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p)
     else
diff --git a/src/OptimizationDIExt.jl b/src/OptimizationDIExt.jl
index dd6e829..2f346b5 100644
--- a/src/OptimizationDIExt.jl
+++ b/src/OptimizationDIExt.jl
@@ -4,6 +4,9 @@ import OptimizationBase.SciMLBase: OptimizationFunction
 import OptimizationBase.LinearAlgebra: I
 import DifferentiationInterface
 import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp,
+                                 prepare_pullback, prepare_pushforward, pullback!,
+                                 pushforward!,
+                                 pullback, pushforward,
                                  prepare_jacobian, value_and_gradient!, value_and_gradient,
                                  value_derivative_and_second_derivative!,
                                  value_derivative_and_second_derivative,
@@ -86,8 +89,17 @@ function instantiate_function(
         function hess(res, θ)
             hessian!(_f, res, soadtype, θ, extras_hess)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(res, θ, p)
+                global _p = p
+                hessian!(_f, res, soadtype, θ)
+            end
+        end
     elseif h == true
         hess = (H, θ) -> f.hess(H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (H, θ, p) -> f.hess(H, θ, p)
+        end
     else
         hess = nothing
     end
@@ -98,8 +110,19 @@ function instantiate_function(
                 _f, G, H, soadtype, θ, extras_hess)
             return y
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(G, H, θ, p)
+                global _p = p
+                (y, _, _) = value_derivative_and_second_derivative!(
+                    _f, G, H, soadtype, θ)
+                return y
+            end
+        end
     elseif fgh == true
         fgh! = (G, H, θ) -> f.fgh(G, H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -109,8 +132,17 @@ function instantiate_function(
         function hv!(H, θ, v)
             hvp!(_f, H, soadtype, θ, v, extras_hvp)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(H, θ, v, p)
+                global _p = p
+                hvp!(_f, H, soadtype, θ, v)
+            end
+        end
     elseif hv == true
         hv! = (H, θ, v) -> f.hv(H, θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (H, θ, v, p) -> f.hv(H, θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -128,8 +160,11 @@ function instantiate_function(
             return _res
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons_oop(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons_oop(θ))
         end
     end
 
@@ -150,7 +185,7 @@ function instantiate_function(
     end
 
     if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing
-        extras_pullback = prepare_pullback(cons_oop, adtype, x)
+        extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons))
         function cons_vjp!(J, θ, v)
             pullback!(cons_oop, J, adtype, θ, v, extras_pullback)
         end
@@ -161,7 +196,8 @@ function instantiate_function(
     end
 
     if f.cons_jvp === nothing && cons_jvp == true && cons !== nothing
-        extras_pushforward = prepare_pushforward(cons_oop, adtype, x)
+        extras_pushforward = prepare_pushforward(
+            cons_oop, adtype, x, ones(eltype(x), length(x)))
         function cons_jvp!(J, θ, v)
             pushforward!(cons_oop, J, adtype, θ, v, extras_pushforward)
         end
@@ -191,7 +227,8 @@ function instantiate_function(
     lag_hess_prototype = f.lag_hess_prototype
 
     if cons !== nothing && lag_h == true && f.lag_h === nothing
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
         lag_hess_prototype = zeros(Bool, length(x), length(x))
 
         function lag_h!(H::AbstractMatrix, θ, σ, λ)
@@ -199,22 +236,46 @@ function instantiate_function(
                 cons_h(H, θ)
                 H *= λ
             else
-                hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+                H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)])
             end
         end
 
-        function lag_h!(h, θ, σ, λ)
-            H = eltype(θ).(lag_hess_prototype)
-            hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+        function lag_h!(h::AbstractVector, θ, σ, λ)
+            H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)
             k = 0
-            rows, cols, _ = findnz(H)
-            for (i, j) in zip(rows, cols)
-                if i <= j
+            for i in 1:length(θ)
+                for j in 1:i
                     k += 1
                     h[k] = H[i, j]
                 end
             end
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(H::AbstractMatrix, θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    cons_h(H, θ)
+                    H *= λ
+                else
+                    global _p = p
+                    H .= @view(hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)])
+                end
+            end
+    
+            function lag_h!(h::AbstractVector, θ, σ, λ, p)
+                global _p = p
+                H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)
+                k = 0
+                for i in 1:length(θ)
+                    for j in 1:i
+                        k += 1
+                        h[k] = H[i, j]
+                    end
+                end
+            end
+        end
     elseif lag_h == true && cons !== nothing
         lag_h! = (res, θ, σ, μ) -> f.lag_h(res, θ, σ, μ, p)
     else
@@ -316,8 +377,17 @@ function instantiate_function(
         function hess(θ)
             hessian(_f, soadtype, θ, extras_hess)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(θ, p)
+                global _p = p
+                hessian(_f, soadtype, θ)
+            end
+        end
     elseif h == true
         hess = (θ) -> f.hess(θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (θ, p) -> f.hess(θ, p)
+        end
     else
         hess = nothing
     end
@@ -327,8 +397,18 @@ function instantiate_function(
             (y, G, H) = value_derivative_and_second_derivative(_f, adtype, θ, extras_hess)
             return y, G, H
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(θ, p)
+                global _p = p
+                (y, G, H) = value_derivative_and_second_derivative(_f, adtype, θ)
+                return y, G, H
+            end
+        end
     elseif fgh == true
         fgh! = (θ) -> f.fgh(θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (θ, p) -> f.fgh(θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -338,8 +418,17 @@ function instantiate_function(
         function hv!(θ, v)
             hvp(_f, soadtype, θ, v, extras_hvp)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(θ, v, p)
+                global _p = p
+                hvp(_f, soadtype, θ, v, extras_hvp)
+            end
+        end
     elseif hv == true
         hv! = (θ, v) -> f.hv(θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (θ, v, p) -> f.hv(θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -351,8 +440,11 @@ function instantiate_function(
             return f.cons(θ, p)
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons(θ))
         end
     end
 
@@ -374,7 +466,7 @@ function instantiate_function(
     end
 
     if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing
-        extras_pullback = prepare_pullback(cons, adtype, x)
+        extras_pullback = prepare_pullback(cons, adtype, x, ones(eltype(x), num_cons))
         function cons_vjp!(θ, v)
             return pullback(cons, adtype, θ, v, extras_pullback)
         end
@@ -385,7 +477,8 @@ function instantiate_function(
     end
 
     if f.cons_jvp === nothing && cons_jvp == true && cons !== nothing
-        extras_pushforward = prepare_pushforward(cons, adtype, x)
+        extras_pushforward = prepare_pushforward(
+            cons, adtype, x, ones(eltype(x), length(x)))
         function cons_jvp!(θ, v)
             return pushforward(cons, adtype, θ, v, extras_pushforward)
         end
@@ -416,18 +509,28 @@ function instantiate_function(
     lag_hess_prototype = f.lag_hess_prototype
 
     if cons !== nothing && lag_h == true && f.lag_h === nothing
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
         lag_hess_prototype = zeros(Bool, length(x), length(x))
 
         function lag_h!(θ, σ, λ)
             if σ == zero(eltype(θ))
-                H = cons_h(θ)
-                for i in 1:num_cons
-                    H[i] *= λ[i]
-                end
-                return H
+                return λ .* cons_h(θ)
             else
-                return hessian(x -> lagrangian(x, σ, λ), soadtype, θ, lag_extras)
+                return hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)]
+            end
+        end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    return λ .* cons_h(θ)
+                else
+                    global _p = p
+                    return hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)]
+                end
             end
         end
     elseif lag_h == true && cons !== nothing
diff --git a/src/OptimizationDISparseExt.jl b/src/OptimizationDISparseExt.jl
index 6018d21..ccd8836 100644
--- a/src/OptimizationDISparseExt.jl
+++ b/src/OptimizationDISparseExt.jl
@@ -168,8 +168,18 @@ function instantiate_function(
         end
         hess_sparsity = extras_hess.coloring_result.S
         hess_colors = extras_hess.coloring_result.color
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(res, θ, p)
+                global _p = p
+                hessian!(_f, res, soadtype, θ, extras_hess)
+            end
+        end
     elseif h == true
         hess = (H, θ) -> f.hess(H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (H, θ, p) -> f.hess(H, θ, p)
+        end
     else
         hess = nothing
     end
@@ -179,8 +189,18 @@ function instantiate_function(
             (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess)
             return y
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(G, H, θ, p)
+                global _p = p
+                (y, _, _) = value_derivative_and_second_derivative!(_f, G, H, θ, extras_hess)
+                return y
+            end
+        end
     elseif fgh == true
         fgh! = (G, H, θ) -> f.fgh(G, H, θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (G, H, θ, p) -> f.fgh(G, H, θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -190,8 +210,17 @@ function instantiate_function(
         function hv!(H, θ, v)
             hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp)
         end
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(H, θ, v, p)
+                global _p = p
+                hvp!(_f, H, soadtype.dense_ad, θ, v, extras_hvp)
+            end
+        end
     elseif hv == true
         hv! = (H, θ, v) -> f.hv(H, θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (H, θ, v, p) -> f.hv(H, θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -209,8 +238,11 @@ function instantiate_function(
             return _res
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons_oop(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons_oop(θ))
         end
     end
 
@@ -233,7 +265,7 @@ function instantiate_function(
     end
 
     if f.cons_vjp === nothing && cons_vjp == true
-        extras_pullback = prepare_pullback(cons_oop, adtype, x)
+        extras_pullback = prepare_pullback(cons_oop, adtype, x, ones(eltype(x), num_cons))
         function cons_vjp!(J, θ, v)
             pullback!(cons_oop, J, adtype.dense_ad, θ, v, extras_pullback)
         end
@@ -244,7 +276,8 @@ function instantiate_function(
     end
 
     if f.cons_jvp === nothing && cons_jvp == true
-        extras_pushforward = prepare_pushforward(cons_oop, adtype, x)
+        extras_pushforward = prepare_pushforward(
+            cons_oop, adtype, x, ones(eltype(x), length(x)))
         function cons_jvp!(J, θ, v)
             pushforward!(cons_oop, J, adtype.dense_ad, θ, v, extras_pushforward)
         end
@@ -279,8 +312,9 @@ function instantiate_function(
     lag_hess_prototype = f.lag_hess_prototype
     lag_hess_colors = f.lag_hess_colorvec
     if cons !== nothing && lag_h == true && f.lag_h === nothing
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
-        lag_hess_prototype = lag_extras.coloring_result.S
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
+        lag_hess_prototype = lag_extras.coloring_result.S[1:length(x), 1:length(x)]
         lag_hess_colors = lag_extras.coloring_result.color
 
         function lag_h!(H::AbstractMatrix, θ, σ, λ)
@@ -288,13 +322,14 @@ function instantiate_function(
                 cons_h(H, θ)
                 H *= λ
             else
-                hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+                H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)]
             end
         end
 
         function lag_h!(h, θ, σ, λ)
-            H = eltype(θ).(lag_hess_prototype)
-            hessian!(x -> lagrangian(x, σ, λ), H, soadtype, θ, lag_extras)
+            H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                1:length(θ), 1:length(θ)]
             k = 0
             rows, cols, _ = findnz(H)
             for (i, j) in zip(rows, cols)
@@ -304,8 +339,38 @@ function instantiate_function(
                 end
             end
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(H::AbstractMatrix, θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    cons_h(H, θ)
+                    H *= λ
+                else
+                    global _p = p
+                    H .= hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)]
+                end
+            end
+    
+            function lag_h!(h, θ, σ, λ, p)
+                global _p = p
+                H = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)]
+                k = 0
+                rows, cols, _ = findnz(H)
+                for (i, j) in zip(rows, cols)
+                    if i <= j
+                        k += 1
+                        h[k] = H[i, j]
+                    end
+                end
+            end
+        end
     elseif lag_h == true
         lag_h! = (H, θ, σ, λ) -> f.lag_h(H, θ, σ, λ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            lag_h! = (H, θ, σ, λ, p) -> f.lag_h(H, θ, σ, λ, p)
+        end
     else
         lag_h! = nothing
     end
@@ -397,8 +462,19 @@ function instantiate_function(
             (y, G, H) = value_derivative_and_second_derivative(_f, soadtype, θ, extras_hess)
             return y, G, H
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function fgh!(θ, p)
+                global _p = p
+                (y, G, H) = value_derivative_and_second_derivative(_f, soadtype, θ, extras_hess)
+                return y, G, H
+            end
+        end
     elseif fgh == true
         fgh! = (θ) -> f.fgh(θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            fgh! = (θ, p) -> f.fgh(θ, p)
+        end
     else
         fgh! = nothing
     end
@@ -412,8 +488,18 @@ function instantiate_function(
         end
         hess_sparsity = extras_hess.coloring_result.S
         hess_colors = extras_hess.coloring_result.color
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hess(θ, p)
+                global _p = p
+                hessian(_f, soadtype, θ, extras_hess)
+            end
+        end
     elseif h == true
         hess = (θ) -> f.hess(θ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hess = (θ, p) -> f.hess(θ, p)
+        end
     else
         hess = nothing
     end
@@ -423,8 +509,18 @@ function instantiate_function(
         function hv!(θ, v)
             hvp(_f, soadtype.dense_ad, θ, v, extras_hvp)
         end
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function hv!(θ, v, p)
+                global _p = p
+                hvp(_f, soadtype.dense_ad, θ, v, extras_hvp)
+            end
+        end
     elseif hv == true
         hv! = (θ, v) -> f.hv(θ, v, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            hv! = (θ, v, p) -> f.hv(θ, v, p)
+        end
     else
         hv! = nothing
     end
@@ -436,8 +532,11 @@ function instantiate_function(
             f.cons(θ, p)
         end
 
-        function lagrangian(x, σ = one(eltype(x)), λ = ones(eltype(x), num_cons))
-            return σ * _f(x) + dot(λ, cons(x))
+        function lagrangian(augvars)
+            θ = augvars[1:length(x)]
+            σ = augvars[length(x) + 1]
+            λ = augvars[(length(x) + 2):end]
+            return σ * _f(θ) + dot(λ, cons(θ))
         end
     end
 
@@ -461,7 +560,7 @@ function instantiate_function(
     end
 
     if f.cons_vjp === nothing && cons_vjp == true
-        extras_pullback = prepare_pullback(cons, adtype, x)
+        extras_pullback = prepare_pullback(cons, adtype, x, ones(eltype(x), num_cons))
         function cons_vjp!(θ, v)
             pullback(cons, adtype, θ, v, extras_pullback)
         end
@@ -472,7 +571,8 @@ function instantiate_function(
     end
 
     if f.cons_jvp === nothing && cons_jvp == true
-        extras_pushforward = prepare_pushforward(cons, adtype, x)
+        extras_pushforward = prepare_pushforward(
+            cons, adtype, x, ones(eltype(x), length(x)))
         function cons_jvp!(θ, v)
             pushforward(cons, adtype, θ, v, extras_pushforward)
         end
@@ -506,19 +606,37 @@ function instantiate_function(
     lag_hess_prototype = f.lag_hess_prototype
     lag_hess_colors = f.lag_hess_colorvec
     if cons !== nothing && lag_h == true && f.lag_h === nothing
-        lag_extras = prepare_hessian(lagrangian, soadtype, x)
+        lag_extras = prepare_hessian(
+            lagrangian, soadtype, vcat(x, [one(eltype(x))], ones(eltype(x), num_cons)))
         function lag_h!(θ, σ, λ)
             if σ == zero(eltype(θ))
-                return λ * cons_h!(θ)
+                return λ .* cons_h!(θ)
             else
-                hess = hessian(x -> lagrangian(x, σ, λ), soadtype, θ, lag_extras)
+                hess = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                    1:length(θ), 1:length(θ)]
                 return hess
             end
         end
-        lag_hess_prototype = lag_extras.coloring_result.S
+        lag_hess_prototype = lag_extras.coloring_result.S[1:length(θ), 1:length(θ)]
         lag_hess_colors = lag_extras.coloring_result.color
+
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            function lag_h!(θ, σ, λ, p)
+                if σ == zero(eltype(θ))
+                    return λ .* cons_h!(θ)
+                else
+                    global _p = p
+                    hess = hessian(lagrangian, soadtype, vcat(θ, [σ], λ), lag_extras)[
+                        1:length(θ), 1:length(θ)]
+                    return hess
+                end
+            end
+        end
     elseif lag_h == true && cons !== nothing
         lag_h! = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p)
+        if p !== SciMLBase.NullParameters() && p !== nothing
+            lag_h! = (θ, σ, μ, p) -> f.lag_h(θ, σ, μ, p)
+        end
     else
         lag_h! = nothing
     end
diff --git a/test/adtests.jl b/test/adtests.jl
index 30aca18..6072cb0 100644
--- a/test/adtests.jl
+++ b/test/adtests.jl
@@ -26,7 +26,7 @@ H2 = Array{Float64}(undef, 2, 2)
 g!(G1, x0)
 h!(H1, x0)
 
-cons = (res, x, p) -> (res .= [x[1]^2 + x[2]^2]; return nothing)
+cons = (res, x, p) -> (res[1] = x[1]^2 + x[2]^2; return nothing)
 optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoModelingToolkit(), cons = cons)
 optprob = OptimizationBase.instantiate_function(optf, x0,
     OptimizationBase.AutoModelingToolkit(),
@@ -46,7 +46,8 @@ optprob.cons_h(H3, x0)
 @test H3 == [[2.0 0.0; 0.0 2.0]]
 
 function con2_c(res, x, p)
-    res .= [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
+    res[1] = x[1]^2 + x[2]^2
+    res[2] = x[2] * sin(x[1]) - x[1]
     return nothing
 end
 optf = OptimizationFunction(rosenbrock,
@@ -69,424 +70,719 @@ H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
 optprob.cons_h(H3, x0)
 @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
 
-G2 = Array{Float64}(undef, 2)
-H2 = Array{Float64}(undef, 2, 2)
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = cons)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoEnzyme(),
-    nothing, 1, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-res = Array{Float64}(undef, 1)
-optprob.cons(res, x0)
-@test res == [0.0]
-J = Array{Float64}(undef, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J == [10.0, 6.0]
-H3 = [Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 == [[2.0 0.0; 0.0 2.0]]
-G2 = Array{Float64}(undef, 2)
-H2 = Array{Float64}(undef, 2, 2)
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = con2_c)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoEnzyme(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-G2 = Array{Float64}(undef, 2)
-H2 = Array{Float64}(undef, 2, 2)
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoReverseDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-G2 = Array{Float64}(undef, 2)
-H2 = Array{Float64}(undef, 2, 2)
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoReverseDiff(compile = true),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-G2 = Array{Float64}(undef, 2)
-H2 = Array{Float64}(undef, 2, 2)
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoZygote(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoModelingToolkit(true, true),
-    cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoModelingToolkit(true, true),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-using SparseArrays
-sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-@test findnz(sH)[1:2] == findnz(optprob.hess_prototype)[1:2]
-optprob.hess(sH, x0)
-@test sH == H2
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-@test findnz(sJ)[1:2] == findnz(optprob.cons_jac_prototype)[1:2]
-optprob.cons_j(sJ, [5.0, 3.0])
-@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-sH3 = [sparse([1, 2], [1, 2], zeros(2)), sparse([1, 1, 2], [1, 2, 1], zeros(3))]
-@test getindex.(findnz.(sH3), Ref([1, 2])) ==
-      getindex.(findnz.(optprob.cons_hess_prototype), Ref([1, 2]))
-optprob.cons_h(sH3, x0)
-@test Array.(sH3) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff())
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoForwardDiff(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 ≈ H2
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote())
-optprob = OptimizationBase.instantiate_function(optf,
-    x0,
-    OptimizationBase.AutoZygote(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff())
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoReverseDiff(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-optprob.hess(H2, x0)
-@test H1 == H2
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoTracker())
-optprob = OptimizationBase.instantiate_function(optf,
-    x0,
-    OptimizationBase.AutoTracker(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1 == G2
-@test_broken optprob.hess(H2, x0)
-
-prob = OptimizationProblem(optf, x0)
-
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff())
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoFiniteDiff(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-6
-
-# Test new constraints
-cons = (res, x, p) -> (res .= [x[1]^2 + x[2]^2])
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoFiniteDiff(),
-    nothing, 1, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-6
-res = Array{Float64}(undef, 1)
-optprob.cons(res, x0)
-@test res == [0.0]
-optprob.cons(res, [1.0, 4.0])
-@test res == [17.0]
-J = zeros(1, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J ≈ [10.0 6.0]
-H3 = [Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0]]
-
-H4 = Array{Float64}(undef, 2, 2)
-μ = randn(1)
-σ = rand()
-# optprob.lag_h(H4, x0, σ, μ)
-# @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
-
-cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl
-cons_jac_colors = 1:2
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons,
-    cons_jac_prototype = cons_jac_proto,
-    cons_jac_colorvec = cons_jac_colors)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoFiniteDiff(),
-    nothing, 1, g = true, h = true, cons_j = true, cons_h = true)
-@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it
-@test optprob.cons_jac_colorvec == 1:2
-J = zeros(1, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J ≈ [10.0 6.0]
-
-function con2_c(res, x, p)
-    res .= [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
+@testset "one constraint tests" begin
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoEnzyme(),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res == [0.0]
+    J = Array{Float64}(undef, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test J == [10.0, 6.0]
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ == [10.0, 6.0]
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == [8.0]
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoForwardDiff(),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res == [0.0]
+    J = Array{Float64}(undef, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test J == [10.0, 6.0]
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ == [10.0, 6.0]
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == [8.0]
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoReverseDiff(),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res == [0.0]
+    J = Array{Float64}(undef, 2)
+    @test_broken optprob.cons_j(J, [5.0, 3.0])
+    # @test J == [10.0, 6.0]
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ == [10.0, 6.0]
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == [8.0]
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(
+        rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoReverseDiff(true),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res == [0.0]
+    J = Array{Float64}(undef, 2)
+    @test_broken optprob.cons_j(J, [5.0, 3.0])
+    # @test J == [10.0, 6.0]
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ == [10.0, 6.0]
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == [8.0]
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoZygote(),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res == [0.0]
+    J = Array{Float64}(undef, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test J == [10.0, 6.0]
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ == [10.0, 6.0]
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == [8.0]
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoFiniteDiff(),
+        nothing, 1, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1≈G2 rtol=1e-5
+    optprob.hess(H2, x0)
+    @test H1≈H2 rtol=1e-5
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv≈[2.0, 200.0] rtol=1e-5
+    res = Array{Float64}(undef, 1)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0]
+    J = Array{Float64}(undef, 1, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test J≈[10.0 6.0] rtol=1e-5
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0])
+    @test vJ≈[10.0, 6.0] rtol=1e-5
+    Jv = Array{Float64}(undef, 1)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv≈[8.0] rtol=1e-5
+    H3 = [Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3≈[[2.0 0.0; 0.0 2.0]] rtol=1e-5
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(1)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + μ[1] * H3[1] rtol=1e-6
 end
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoFiniteDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-6
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res == [0.0, 0.0]
-optprob.cons(res, [1.0, 2.0])
-@test res ≈ [5.0, 0.682941969615793]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
-
-cons_jac_proto = Float64.(sparse([1 1; 1 1]))
-cons_jac_colors = 1:2
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c,
-    cons_jac_prototype = cons_jac_proto,
-    cons_jac_colorvec = cons_jac_colors)
-optprob = OptimizationBase.instantiate_function(
-    optf, x0, OptimizationBase.AutoFiniteDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it
-@test optprob.cons_jac_colorvec == 1:2
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-H2 = Array{Float64}(undef, 2, 2)
-optprob.hess(H2, [5.0, 3.0])
-@test all(isapprox(H2, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3))
-
-cons_j = (J, θ, p) -> optprob.cons_j(J, θ)
-hess = (H, θ, p) -> optprob.hess(H, θ)
-sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff(), hess = hess,
-    hess_prototype = copy(sH), cons = con2_c, cons_j = cons_j,
-    cons_jac_prototype = copy(sJ))
-optprob1 = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoForwardDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-@test optprob1.hess_prototype == sparse([0.0 0.0; 0.0 0.0]) # make sure it's still using it
-optprob1.hess(sH, [5.0, 3.0])
-@test all(isapprox(sH, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3))
-@test optprob1.cons_jac_prototype == sparse([0.0 0.0; 0.0 0.0]) # make sure it's still using it
-optprob1.cons_j(sJ, [5.0, 3.0])
-@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-
-grad = (G, θ, p) -> optprob.grad(G, θ)
-hess = (H, θ, p) -> optprob.hess(H, θ)
-cons_j = (J, θ, p) -> optprob.cons_j(J, θ)
-cons_h = (res, θ, p) -> optprob.cons_h(res, θ)
-sH = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-sJ = sparse([1, 1, 2, 2], [1, 2, 1, 2], zeros(4))
-sH3 = [sparse([1, 2], [1, 2], zeros(2)), sparse([1, 1, 2], [1, 2, 1], zeros(3))]
-optf = OptimizationFunction(rosenbrock, SciMLBase.NoAD(), grad = grad, hess = hess,
-    cons = con2_c, cons_j = cons_j, cons_h = cons_h,
-    hess_prototype = sH, cons_jac_prototype = sJ,
-    cons_hess_prototype = sH3)
-optprob2 = OptimizationBase.instantiate_function(
-    optf, x0, SciMLBase.NoAD(), nothing, 2, g = true,
-    h = true, cons_j = true, cons_h = true)
-optprob2.hess(sH, [5.0, 3.0])
-@test all(isapprox(sH, [28802.0 -2000.0; -2000.0 200.0]; rtol = 1e-3))
-optprob2.cons_j(sJ, [5.0, 3.0])
-@test all(isapprox(sJ, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
-optprob2.cons_h(sH3, [5.0, 3.0])
-@test Array.(sH3)≈[
-    [2.0 0.0; 0.0 2.0],
-    [2.8767727327346804 0.2836621681849162; 0.2836621681849162 -6.622738308376736e-9]
-] rtol=1e-4
-
-optf = OptimizationFunction(rosenbrock,
-    OptimizationBase.AutoSparseFiniteDiff(),
-    cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseFiniteDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-G2 = Array{Float64}(undef, 2)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-4
-H2 = Array{Float64}(undef, 2, 2)
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-4
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res≈[0.0, 0.0] atol=1e-4
-optprob.cons(res, [1.0, 2.0])
-@test res ≈ [5.0, 0.682941969615793]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
 
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseFiniteDiff())
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseFiniteDiff(),
-    nothing, g = true, h = true, cons_j = true, cons_h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-4
+@testset "two constraints tests" begin
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoEnzyme(),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res == [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ == sum(J, dims = 1)[:]
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv ≈ 0.5 * sum(J, dims = 2)[:]
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(
+        rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoReverseDiff(),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res == [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ == sum(J, dims = 1)[:]
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == 0.5 * sum(J, dims = 2)[:]
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(
+        rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoReverseDiff(true),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res == [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ == sum(J, dims = 1)[:]
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == 0.5 * sum(J, dims = 2)[:]
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(
+        rosenbrock, OptimizationBase.AutoForwardDiff(), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoReverseDiff(compile = true),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res == [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ == sum(J, dims = 1)[:]
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == 0.5 * sum(J, dims = 2)[:]
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoZygote(), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoZygote(),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1 == G2
+    optprob.hess(H2, x0)
+    @test H1 == H2
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv == [2.0, 200.0]
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res == [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ == sum(J, dims = 1)[:]
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv == 0.5 * sum(J, dims = 2)[:]
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+
+    G2 = Array{Float64}(undef, 2)
+    H2 = Array{Float64}(undef, 2, 2)
+
+    optf = OptimizationFunction(
+        rosenbrock, OptimizationBase.AutoFiniteDiff(), cons = con2_c)
+    optprob = OptimizationBase.instantiate_function(
+        optf, x0, OptimizationBase.AutoFiniteDiff(),
+        nothing, 2, g = true, h = true, hv = true,
+        cons_j = true, cons_h = true, cons_vjp = true,
+        cons_jvp = true, lag_h = true)
+    optprob.grad(G2, x0)
+    @test G1≈G2 rtol=1e-5
+    optprob.hess(H2, x0)
+    @test H1≈H2 rtol=1e-5
+    Hv = Array{Float64}(undef, 2)
+    optprob.hv(Hv, x0, [1.0, 1.0])
+    @test Hv≈[2.0, 200.0] rtol=1e-5
+    res = Array{Float64}(undef, 2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0, 0.0]
+    J = Array{Float64}(undef, 2, 2)
+    optprob.cons_j(J, [5.0, 3.0])
+    @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3))
+    vJ = Array{Float64}(undef, 2)
+    optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0])
+    @test vJ≈sum(J, dims = 1)[:] rtol=1e-5
+    Jv = Array{Float64}(undef, 2)
+    optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5])
+    @test Jv≈0.5 * sum(J, dims = 2)[:] rtol=1e-5
+    H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+    optprob.cons_h(H3, x0)
+    @test H3≈[[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] rtol=1e-5
+    H4 = Array{Float64}(undef, 2, 2)
+    μ = randn(2)
+    σ = rand()
+    optprob.lag_h(H4, x0, σ, μ)
+    @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6
+end
 
-optf = OptimizationFunction(rosenbrock,
-    OptimizationBase.AutoSparseForwardDiff(),
-    cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseForwardDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-G2 = Array{Float64}(undef, 2)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-4
-H2 = Array{Float64}(undef, 2, 2)
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-4
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res≈[0.0, 0.0] atol=1e-4
-optprob.cons(res, [1.0, 2.0])
-@test res ≈ [5.0, 0.682941969615793]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+@testset "Sparse Tests" begin
+    # Define a sparse objective function
+    function sparse_objective(x, p)
+        return x[1]^2 + 100 * (x[3] - x[2]^2)^2
+    end
 
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseForwardDiff())
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseForwardDiff(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-6
+    # Define sparse constraints
+    function sparse_constraints(res, x, p)
+        res[1] = x[1] + x[2] + (x[2] * x[3])^2 - 1
+        res[2] = x[1]^2 + x[3]^2 - 1
+    end
 
-optf = OptimizationFunction(rosenbrock,
-    OptimizationBase.AutoSparseReverseDiff(),
-    cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseReverseDiff(true),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-G2 = Array{Float64}(undef, 2)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-4
-H2 = Array{Float64}(undef, 2, 2)
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-4
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res≈[0.0, 0.0] atol=1e-4
-optprob.cons(res, [1.0, 2.0])
-@test res ≈ [5.0, 0.682941969615793]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    # Initial point
+    x0 = [0.5, 0.5, 0.5]
 
-optf = OptimizationFunction(rosenbrock,
-    OptimizationBase.AutoSparseReverseDiff(),
-    cons = con2_c)
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseReverseDiff(),
-    nothing, 2, g = true, h = true, cons_j = true, cons_h = true)
-G2 = Array{Float64}(undef, 2)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-4
-H2 = Array{Float64}(undef, 2, 2)
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-4
-res = Array{Float64}(undef, 2)
-optprob.cons(res, x0)
-@test res≈[0.0, 0.0] atol=1e-4
-optprob.cons(res, [1.0, 2.0])
-@test res ≈ [5.0, 0.682941969615793]
-J = Array{Float64}(undef, 2, 2)
-optprob.cons_j(J, [5.0, 3.0])
-@test J≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-3
-H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
-optprob.cons_h(H3, x0)
-@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+    # Create OptimizationFunction
+    optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseForwardDiff(),
+        cons = sparse_constraints)
 
-optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoSparseReverseDiff())
-optprob = OptimizationBase.instantiate_function(optf, x0,
-    OptimizationBase.AutoSparseReverseDiff(),
-    nothing, g = true, h = true)
-optprob.grad(G2, x0)
-@test G1≈G2 rtol=1e-6
-optprob.hess(H2, x0)
-@test H1≈H2 rtol=1e-6
+    # Instantiate the optimization problem
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoSparseForwardDiff(),
+        nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true)
+    # Test gradient
+    G = zeros(3)
+    optprob.grad(G, x0)
+    @test G ≈ [1.0, -50.0, 50.0]
+
+    # Test Hessian
+    H_expected = sparse(
+        [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3)
+    H = similar(optprob.hess_prototype, Float64)
+    optprob.hess(H, x0)
+    @test H ≈ H_expected
+    @test nnz(H) == 5  # Check sparsity
+
+    # Test constraints
+    res = zeros(2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0625, -0.5]
+
+    # Test constraint Jacobian
+    J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3)
+    J = similar(optprob.cons_jac_prototype, Float64)
+    optprob.cons_j(J, x0)
+    @test J ≈ J_expected
+    @test nnz(J) == 5  # Check sparsity
+
+    # Test constraint Hessians
+    H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3),
+        sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)]
+    H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype]
+    optprob.cons_h(H_cons, x0)
+    @test all(H_cons .≈ H_cons_expected)
+    @test all(nnz.(H_cons) .== [4, 2])  # Check sparsity
+
+    lag_H_expected = sparse(
+        [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3)
+    σ = 1.0
+    λ = [1.0, 2.0]
+    lag_H = similar(optprob.lag_hess_prototype, Float64)
+    optprob.lag_h(lag_H, x0, σ, λ)
+    @test lag_H ≈ lag_H_expected
+    @test nnz(lag_H) == 5
+
+    optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseReverseDiff(),
+        cons = sparse_constraints)
+
+    # Instantiate the optimization problem
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoSparseForwardDiff(),
+        nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true)
+    # Test gradient
+    G = zeros(3)
+    optprob.grad(G, x0)
+    @test G ≈ [1.0, -50.0, 50.0]
+
+    # Test Hessian
+    H_expected = sparse(
+        [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3)
+    H = similar(optprob.hess_prototype, Float64)
+    optprob.hess(H, x0)
+    @test H ≈ H_expected
+    @test nnz(H) == 5  # Check sparsity
+
+    # Test constraints
+    res = zeros(2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0625, -0.5]
+
+    # Test constraint Jacobian
+    J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3)
+    J = similar(optprob.cons_jac_prototype, Float64)
+    optprob.cons_j(J, x0)
+    @test J ≈ J_expected
+    @test nnz(J) == 5  # Check sparsity
+
+    # Test constraint Hessians
+    H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3),
+        sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)]
+    H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype]
+    optprob.cons_h(H_cons, x0)
+    @test all(H_cons .≈ H_cons_expected)
+    @test all(nnz.(H_cons) .== [4, 2])  # Check sparsity
+
+    lag_H_expected = sparse(
+        [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3)
+    σ = 1.0
+    λ = [1.0, 2.0]
+    lag_H = similar(optprob.lag_hess_prototype, Float64)
+    optprob.lag_h(lag_H, x0, σ, λ)
+    @test lag_H ≈ lag_H_expected
+    @test nnz(lag_H) == 5
+
+    optf = OptimizationFunction(
+        sparse_objective, OptimizationBase.AutoSparseReverseDiff(true),
+        cons = sparse_constraints)
+
+    # Instantiate the optimization problem
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoSparseForwardDiff(),
+        nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true)
+    # Test gradient
+    G = zeros(3)
+    optprob.grad(G, x0)
+    @test G ≈ [1.0, -50.0, 50.0]
+
+    # Test Hessian
+    H_expected = sparse(
+        [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3)
+    H = similar(optprob.hess_prototype, Float64)
+    optprob.hess(H, x0)
+    @test H ≈ H_expected
+    @test nnz(H) == 5  # Check sparsity
+
+    # Test constraints
+    res = zeros(2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0625, -0.5]
+
+    # Test constraint Jacobian
+    J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3)
+    J = similar(optprob.cons_jac_prototype, Float64)
+    optprob.cons_j(J, x0)
+    @test J ≈ J_expected
+    @test nnz(J) == 5  # Check sparsity
+
+    # Test constraint Hessians
+    H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3),
+        sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)]
+    H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype]
+    optprob.cons_h(H_cons, x0)
+    @test all(H_cons .≈ H_cons_expected)
+    @test all(nnz.(H_cons) .== [4, 2])  # Check sparsity
+
+    lag_H_expected = sparse(
+        [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3)
+    σ = 1.0
+    λ = [1.0, 2.0]
+    lag_H = similar(optprob.lag_hess_prototype, Float64)
+    optprob.lag_h(lag_H, x0, σ, λ)
+    @test lag_H ≈ lag_H_expected
+    @test nnz(lag_H) == 5
+
+    optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseFiniteDiff(),
+        cons = sparse_constraints)
+
+    # Instantiate the optimization problem
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoSparseForwardDiff(),
+        nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true)
+    # Test gradient
+    G = zeros(3)
+    optprob.grad(G, x0)
+    @test G ≈ [1.0, -50.0, 50.0]
+
+    # Test Hessian
+    H_expected = sparse(
+        [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3)
+    H = similar(optprob.hess_prototype, Float64)
+    optprob.hess(H, x0)
+    @test H ≈ H_expected
+    @test nnz(H) == 5  # Check sparsity
+
+    # Test constraints
+    res = zeros(2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0625, -0.5]
+
+    # Test constraint Jacobian
+    J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3)
+    J = similar(optprob.cons_jac_prototype, Float64)
+    optprob.cons_j(J, x0)
+    @test J ≈ J_expected
+    @test nnz(J) == 5  # Check sparsity
+
+    # Test constraint Hessians
+    H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3),
+        sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)]
+    H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype]
+    optprob.cons_h(H_cons, x0)
+    @test all(H_cons .≈ H_cons_expected)
+    @test all(nnz.(H_cons) .== [4, 2])  # Check sparsity
+
+    lag_H_expected = sparse(
+        [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3)
+    σ = 1.0
+    λ = [1.0, 2.0]
+    lag_H = similar(optprob.lag_hess_prototype, Float64)
+    optprob.lag_h(lag_H, x0, σ, λ)
+    @test lag_H ≈ lag_H_expected
+    @test nnz(lag_H) == 5
+
+    optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseZygote(),
+        cons = sparse_constraints)
+
+    # Instantiate the optimization problem
+    optprob = OptimizationBase.instantiate_function(optf, x0,
+        OptimizationBase.AutoSparseForwardDiff(),
+        nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true)
+    # Test gradient
+    G = zeros(3)
+    optprob.grad(G, x0)
+    @test G ≈ [1.0, -50.0, 50.0]
+
+    # Test Hessian
+    H_expected = sparse(
+        [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3)
+    H = similar(optprob.hess_prototype, Float64)
+    optprob.hess(H, x0)
+    @test H ≈ H_expected
+    @test nnz(H) == 5  # Check sparsity
+
+    # Test constraints
+    res = zeros(2)
+    optprob.cons(res, x0)
+    @test res ≈ [0.0625, -0.5]
+
+    # Test constraint Jacobian
+    J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3)
+    J = similar(optprob.cons_jac_prototype, Float64)
+    optprob.cons_j(J, x0)
+    @test J ≈ J_expected
+    @test nnz(J) == 5  # Check sparsity
+
+    # Test constraint Hessians
+    H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3),
+        sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)]
+    H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype]
+    optprob.cons_h(H_cons, x0)
+    @test all(H_cons .≈ H_cons_expected)
+    @test all(nnz.(H_cons) .== [4, 2])  # Check sparsity
+
+    lag_H_expected = sparse(
+        [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3)
+    σ = 1.0
+    λ = [1.0, 2.0]
+    lag_H = similar(optprob.lag_hess_prototype, Float64)
+    optprob.lag_h(lag_H, x0, σ, λ)
+    @test lag_H ≈ lag_H_expected
+    @test nnz(lag_H) == 5
+end
 
 @testset "OOP" begin
     cons = (x, p) -> [x[1]^2 + x[2]^2]
@@ -816,6 +1112,7 @@ using MLUtils
     G0 = zeros(3)
     optf.grad(G0, ones(3))
     stochgrads = []
+    i = 0
     for (x, y) in data
         G = zeros(3)
         optf.grad(G, ones(3), (x, y))
diff --git a/test/matrixvalued.jl b/test/matrixvalued.jl
new file mode 100644
index 0000000..d3f786d
--- /dev/null
+++ b/test/matrixvalued.jl
@@ -0,0 +1,87 @@
+using OptimizationBase, LinearAlgebra, ForwardDiff, Zygote, FiniteDiff,
+      DifferentiationInterface, SparseConnectivityTracer
+using Test, ReverseDiff
+
+@testset "Matrix Valued" begin
+    for adtype in [AutoForwardDiff(), AutoZygote(), AutoFiniteDiff(),
+        AutoSparse(AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()),
+        AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()),
+        AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector())]
+        # 1. Matrix Factorization
+        function matrix_factorization_objective(X, A)
+            U, V = @view(X[1:size(A, 1), 1:Int(size(A, 2) / 2)]),
+            @view(X[1:size(A, 1), (Int(size(A, 2) / 2) + 1):size(A, 2)])
+            return norm(A - U * V')
+        end
+        function non_negative_constraint(X, A)
+            U, V = X
+            return [all(U .>= 0) && all(V .>= 0)]
+        end
+        A_mf = rand(4, 4)  # Original matrix
+        U_mf = rand(4, 2)  # Factor matrix U
+        V_mf = rand(4, 2)  # Factor matrix V
+
+        optf = OptimizationFunction{false}(
+            matrix_factorization_objective, adtype, cons = non_negative_constraint)
+        optf = OptimizationBase.instantiate_function(
+            optf, hcat(U_mf, V_mf), adtype, A_mf, g = true, h = true,
+            cons_j = true, cons_h = true)
+        optf.grad(hcat(U_mf, V_mf))
+        optf.hess(hcat(U_mf, V_mf))
+        if adtype != AutoSparse(
+               AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()) &&
+           adtype !=
+           AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()) &&
+           adtype !=
+           AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector())
+            optf.cons_j(hcat(U_mf, V_mf))
+            optf.cons_h(hcat(U_mf, V_mf))
+        end
+
+        # 2. Principal Component Analysis (PCA)
+        function pca_objective(X, A)
+            return -tr(X' * A * X)  # Minimize the negative of the trace for maximization
+        end
+        function orthogonality_constraint(X, A)
+            return [norm(X' * X - I) < 1e-6]
+        end
+        A_pca = rand(4, 4)  # Covariance matrix (can be symmetric positive definite)
+        X_pca = rand(4, 2)  # Matrix to hold principal components
+
+        optf = OptimizationFunction{false}(
+            pca_objective, adtype, cons = orthogonality_constraint)
+        optf = OptimizationBase.instantiate_function(
+            optf, X_pca, adtype, A_pca, g = true, h = true,
+            cons_j = true, cons_h = true)
+        optf.grad(X_pca)
+        optf.hess(X_pca)
+        if adtype != AutoSparse(
+               AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()) &&
+           adtype !=
+           AutoSparse(AutoZygote(), sparsity_detector = TracerLocalSparsityDetector()) &&
+           adtype !=
+           AutoSparse(AutoFiniteDiff(), sparsity_detector = TracerLocalSparsityDetector())
+            optf.cons_j(X_pca)
+            optf.cons_h(X_pca)
+        end
+
+        # 3. Matrix Completion
+        function matrix_completion_objective(X, P)
+            A, Omega = P
+            return norm(Omega .* (A - X))
+        end
+        # r = 2  # Rank of the matrix to be completed
+        # function rank_constraint(X, P)
+        #     return [rank(X) <= r]
+        # end
+        A_mc = rand(4, 4)  # Original matrix with missing entries
+        Omega_mc = rand(4, 4) .> 0.5  # Mask for observed entries (boolean matrix)
+        X_mc = rand(4, 4)  # Matrix to be completed
+        optf = OptimizationFunction{false}(
+            matrix_completion_objective, adtype, cons = rank_constraint)
+        optf = OptimizationBase.instantiate_function(
+            optf, X_mc, adtype, (A_mc, Omega_mc), g = true, h = true)
+        optf.grad(X_mc)
+        optf.hess(X_mc)
+    end
+end