Merge pull request #318 from DanielVandH/master

ChrisRackauckas · web-flow · commit fd343ab74bc8 · 2022-07-09T07:45:44.000+03:00
Constraints for AutoFiniteDiff
diff --git a/src/function/finitediff.jl b/src/function/finitediff.jl
@@ -1,5 +1,5 @@
 """
-AutoFiniteDiff{T1,T2} <: AbstractADType
+AutoFiniteDiff{T1,T2,T3} <: AbstractADType
 
 An AbstractADType choice for use in OptimizationFunction for automatically
 generating the unspecified derivative functions. Usage:
@@ -18,7 +18,7 @@ introduces numerical error into the derivative estimates.
 - Compatible with GPUs
 - Compatible with Hessian-based optimization
 - Compatible with Hv-based optimization
-- Not compatible with constraint functions
+- Compatible with constraint functions
 
 Note that only the unspecified derivative functions are defined. For example,
 if a `hess` function is supplied to the `OptimizationFunction`, then the
@@ -27,35 +27,36 @@ Hessian is not defined via FiniteDiff.
 ## Constructor
 
 ```julia
-AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral))
+AutoFiniteDiff(;fdtype = Val(:forward) fdjtype = fdtype, fdhtype = Val(:hcentral))
 ```
 
 - `fdtype`: the method used for defining the gradient
+- `fdjtype`: the method used for defining the Jacobian of constraints.
 - `fdhtype`: the method used for defining the Hessian
 
 For more information on the derivative type specifiers, see the
 [FiniteDiff.jl documentation](https://github.com/JuliaDiff/FiniteDiff.jl).
 """
-struct AutoFiniteDiff{T1,T2} <: AbstractADType
+struct AutoFiniteDiff{T1,T2,T3} <: AbstractADType
     fdtype::T1
-    fdhtype::T2
+    fdjtype::T2
+    fdhtype::T3
 end
 
-AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral)) =
-                                                  AutoFiniteDiff(fdtype,fdhtype)
+AutoFiniteDiff(; fdtype=Val(:forward), fdjtype=fdtype, fdhtype=Val(:hcentral)) =
+    AutoFiniteDiff(fdtype, fdjtype, fdhtype)
 
-function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
-    num_cons != 0 && error("AutoFiniteDiff does not currently support constraints")
+function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons=0)
     _f = (θ, args...) -> first(f.f(θ, p, args...))
 
     if f.grad === nothing
-        grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x ->_f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
+        grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x -> _f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
     else
         grad = f.grad
     end
 
     if f.hess === nothing
-        hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x ->_f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
+        hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x -> _f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
     else
         hess = f.hess
     end
@@ -64,13 +65,45 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
         hv = function (H, θ, v, args...)
             res = ArrayInterfaceCore.zeromatrix(θ)
             hess(res, θ, args...)
-            H .= res*v
+            H .= res * v
         end
     else
         hv = f.hv
     end
 
-    return OptimizationFunction{false}(f, adtype; grad=grad, hess=hess, hv=hv, 
-        cons=nothing, cons_j=nothing, cons_h=nothing,
-        hess_prototype=nothing, cons_jac_prototype=nothing, cons_hess_prototype=nothing)
+    if f.cons === nothing
+        cons = nothing
+    else
+        cons = θ -> f.cons(θ, p)
+    end
+
+    cons_jac_colorvec = f.cons_jac_colorvec === nothing ? (1:length(x)) : f.cons_jac_colorvec
+
+    if cons !== nothing && f.cons_j === nothing
+        function iip_cons(dx, x) # need f(dx, x) for jacobian? 
+            dx .= [cons(x)[i] for i in 1:num_cons] # Not very efficient probably?
+            nothing
+        end
+        cons_j = function (J, θ)
+            y0 = zeros(num_cons)
+            FiniteDiff.finite_difference_jacobian!(J, iip_cons, θ, FiniteDiff.JacobianCache(copy(θ), copy(y0), copy(y0), adtype.fdjtype; colorvec = cons_jac_colorvec, sparsity = f.cons_jac_prototype))
+        end
+    else
+        cons_j = f.cons_j
+    end
+
+    if cons !== nothing && f.cons_h === nothing
+        cons_h = function (res, θ)
+            for i in 1:num_cons#note: colorvecs not yet supported by FiniteDiff for Hessians 
+                FiniteDiff.finite_difference_hessian!(res[i], (x) -> cons(x)[i], θ, FiniteDiff.HessianCache(copy(θ), adtype.fdhtype))
+            end
+        end
+    else
+        cons_h = f.cons_h
+    end
+
+    return OptimizationFunction{true}(f, adtype; grad=grad, hess=hess, hv=hv,
+        cons=cons, cons_j=cons_j, cons_h=cons_h,
+        cons_jac_colorvec = cons_jac_colorvec,
+        hess_prototype=nothing, cons_jac_prototype=f.cons_jac_prototype, cons_hess_prototype=nothing)
 end
diff --git a/test/ADtests.jl b/test/ADtests.jl
@@ -161,3 +161,103 @@ sol = solve(prob, Optim.KrylovTrustRegion())
 
 sol = solve(prob, Optimisers.ADAM(0.1), maxiters=1000)
 @test 10 * sol.minimum < l1
+
+# Test new constraints
+cons = (x, p) -> [x[1]^2 + x[2]^2]
+optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons)
+optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
+optprob.grad(G2, x0)
+@test G1 ≈ G2 rtol = 1e-6
+optprob.hess(H2, x0)
+@test H1 ≈ H2 rtol = 1e-6
+@test optprob.cons(x0) == [0.0]
+@test optprob.cons([1.0, 4.0]) == [17.0]
+J = zeros(1, 2)
+optprob.cons_j(J, [5.0, 3.0])
+@test J ≈ [10.0 6.0]
+H3 = [Array{Float64}(undef, 2, 2)]
+optprob.cons_h(H3, x0)
+@test H3 ≈ [[2.0 0.0; 0.0 2.0]]
+
+cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl
+cons_jac_colors = 1:2 
+optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
+optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
+@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it 
+@test optprob.cons_jac_colorvec == 1:2
+J = zeros(1, 2)
+optprob.cons_j(J, [5.0, 3.0])
+@test J ≈ [10.0 6.0]
+
+function con2_c(x, p)
+    [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
+end
+optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c)
+optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
+optprob.grad(G2, x0)
+@test G1 ≈ G2 rtol = 1e-6
+optprob.hess(H2, x0)
+@test H1 ≈ H2 rtol = 1e-6
+@test optprob.cons(x0) == [0.0, 0.0]
+@test optprob.cons([1.0, 2.0]) ≈ [5.0, 0.682941969615793]
+J = Array{Float64}(undef, 2, 2)
+optprob.cons_j(J, [5.0, 3.0])
+@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))
+H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
+optprob.cons_h(H3, x0)
+@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
+
+cons_jac_proto = Float64.(sparse([1 1; 1 1])) 
+cons_jac_colors = 1:2 
+optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
+optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
+@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it 
+@test optprob.cons_jac_colorvec == 1:2
+J = Array{Float64}(undef, 2, 2)
+optprob.cons_j(J, [5.0, 3.0])
+@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))
+
+# Can we solve problems? Using AutoForwardDiff to test since we know that works 
+for consf in [cons, con2_c]
+    optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
+    prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
+    sol1 = solve(prob1,BFGS())
+    optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
+    prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
+    sol2 = solve(prob2,BFGS())
+    @test sol1.minimum ≈ sol2.minimum 
+    @test sol1.u ≈ sol2.u
+
+    optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
+    lcons = consf == cons ? [0.2] : [0.2, -0.81]
+    ucons = consf == cons ? [0.55] : [0.55, -0.1]
+    prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
+    sol1 = solve(prob1,Optim.SAMIN(), maxiters = 10000) # a lot of iterations... doesn't even converge actually
+    optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
+    prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
+    sol2 = solve(prob2,Optim.SAMIN(), maxiters = 10000)
+    @test sol1.minimum ≈ sol2.minimum rtol = 1e-4
+    @test sol1.u ≈ sol2.u
+    @test lcons[1] ≤ consf(sol1.u, nothing)[1] ≤ ucons[1]
+    if consf == con2_c
+        @test lcons[2] ≤ consf(sol1.u, nothing)[2] ≤ ucons[2]
+    end
+
+    # --- These equality constraints are so fiddly. Can't get it to pass with consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1 being true 
+    #      (I can get sol1.minimum ≈ sol2.minimum and sol1.u ≈ sol2.u, though, just not the constraint - or I can get the constraint and not 
+    #        sol1.minimum ≈ sol2.minimum, sol1.u ≈ sol2.u)
+    lcons = consf == cons ? [0.2] : [0.2, 0.5]
+    ucons = consf == cons ? [0.2] : [0.2, 0.5]
+    optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
+    prob1 = OptimizationProblem(optf1, [0.5, 0.5], lcons = lcons, ucons = ucons)
+    sol1 = solve(prob1,Optim.IPNewton())
+    optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
+    prob2 = OptimizationProblem(optf2, [0.5, 0.5], lcons = lcons, ucons = ucons)
+    sol2 = solve(prob2,Optim.IPNewton())
+    @test_broken sol1.minimum ≈ sol2.minimum 
+    @test_broken sol1.u ≈ sol2.u 
+    @test consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1
+    if consf == con2_c
+        @test_broken consf(sol1.u, nothing)[2] ≈ lcons[2]
+    end
+end