Skip to content

Constraints for AutoFiniteDiff #318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 9, 2022
63 changes: 48 additions & 15 deletions src/function/finitediff.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
AutoFiniteDiff{T1,T2} <: AbstractADType
AutoFiniteDiff{T1,T2,T3} <: AbstractADType

An AbstractADType choice for use in OptimizationFunction for automatically
generating the unspecified derivative functions. Usage:
Expand All @@ -18,7 +18,7 @@ introduces numerical error into the derivative estimates.
- Compatible with GPUs
- Compatible with Hessian-based optimization
- Compatible with Hv-based optimization
- Not compatible with constraint functions
- Compatible with constraint functions

Note that only the unspecified derivative functions are defined. For example,
if a `hess` function is supplied to the `OptimizationFunction`, then the
Expand All @@ -27,35 +27,36 @@ Hessian is not defined via FiniteDiff.
## Constructor

```julia
AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral))
AutoFiniteDiff(;fdtype = Val(:forward) fdjtype = fdtype, fdhtype = Val(:hcentral))
```

- `fdtype`: the method used for defining the gradient
- `fdjtype`: the method used for defining the Jacobian of constraints.
- `fdhtype`: the method used for defining the Hessian

For more information on the derivative type specifiers, see the
[FiniteDiff.jl documentation](https://github.com/JuliaDiff/FiniteDiff.jl).
"""
struct AutoFiniteDiff{T1,T2} <: AbstractADType
struct AutoFiniteDiff{T1,T2,T3} <: AbstractADType
fdtype::T1
fdhtype::T2
fdjtype::T2
fdhtype::T3
end

AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral)) =
AutoFiniteDiff(fdtype,fdhtype)
AutoFiniteDiff(; fdtype=Val(:forward), fdjtype=fdtype, fdhtype=Val(:hcentral)) =
AutoFiniteDiff(fdtype, fdjtype, fdhtype)

function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
num_cons != 0 && error("AutoFiniteDiff does not currently support constraints")
function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons=0)
_f = (θ, args...) -> first(f.f(θ, p, args...))

if f.grad === nothing
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x ->_f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x -> _f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
else
grad = f.grad
end

if f.hess === nothing
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x ->_f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x -> _f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
else
hess = f.hess
end
Expand All @@ -64,13 +65,45 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
hv = function (H, θ, v, args...)
res = ArrayInterfaceCore.zeromatrix(θ)
hess(res, θ, args...)
H .= res*v
H .= res * v
end
else
hv = f.hv
end

return OptimizationFunction{false}(f, adtype; grad=grad, hess=hess, hv=hv,
cons=nothing, cons_j=nothing, cons_h=nothing,
hess_prototype=nothing, cons_jac_prototype=nothing, cons_hess_prototype=nothing)
if f.cons === nothing
cons = nothing
else
cons = θ -> f.cons(θ, p)
end

cons_jac_colorvec = f.cons_jac_colorvec === nothing ? (1:length(x)) : f.cons_jac_colorvec

if cons !== nothing && f.cons_j === nothing
function iip_cons(dx, x) # need f(dx, x) for jacobian?
dx .= [cons(x)[i] for i in 1:num_cons] # Not very efficient probably?
nothing
end
cons_j = function (J, θ)
y0 = zeros(num_cons)
FiniteDiff.finite_difference_jacobian!(J, iip_cons, θ, FiniteDiff.JacobianCache(copy(θ), copy(y0), copy(y0), adtype.fdjtype; colorvec = cons_jac_colorvec, sparsity = f.cons_jac_prototype))
end
else
cons_j = f.cons_j
end

if cons !== nothing && f.cons_h === nothing
cons_h = function (res, θ)
for i in 1:num_cons#note: colorvecs not yet supported by FiniteDiff for Hessians
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should start integrating/testing JuliaDiff/SparseDiffTools.jl#190 and then do similar to the FiniteDiff side (cc @ElOceanografo)

FiniteDiff.finite_difference_hessian!(res[i], (x) -> cons(x)[i], θ, FiniteDiff.HessianCache(copy(θ), adtype.fdhtype))
end
end
else
cons_h = f.cons_h
end

return OptimizationFunction{true}(f, adtype; grad=grad, hess=hess, hv=hv,
cons=cons, cons_j=cons_j, cons_h=cons_h,
cons_jac_colorvec = cons_jac_colorvec,
hess_prototype=nothing, cons_jac_prototype=f.cons_jac_prototype, cons_hess_prototype=nothing)
end
100 changes: 100 additions & 0 deletions test/ADtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,103 @@ sol = solve(prob, Optim.KrylovTrustRegion())

sol = solve(prob, Optimisers.ADAM(0.1), maxiters=1000)
@test 10 * sol.minimum < l1

# Test new constraints
cons = (x, p) -> [x[1]^2 + x[2]^2]
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons)
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
optprob.grad(G2, x0)
@test G1 ≈ G2 rtol = 1e-6
optprob.hess(H2, x0)
@test H1 ≈ H2 rtol = 1e-6
@test optprob.cons(x0) == [0.0]
@test optprob.cons([1.0, 4.0]) == [17.0]
J = zeros(1, 2)
optprob.cons_j(J, [5.0, 3.0])
@test J ≈ [10.0 6.0]
H3 = [Array{Float64}(undef, 2, 2)]
optprob.cons_h(H3, x0)
@test H3 ≈ [[2.0 0.0; 0.0 2.0]]

cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl
cons_jac_colors = 1:2
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it
@test optprob.cons_jac_colorvec == 1:2
J = zeros(1, 2)
optprob.cons_j(J, [5.0, 3.0])
@test J ≈ [10.0 6.0]

function con2_c(x, p)
[x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
end
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c)
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
optprob.grad(G2, x0)
@test G1 ≈ G2 rtol = 1e-6
optprob.hess(H2, x0)
@test H1 ≈ H2 rtol = 1e-6
@test optprob.cons(x0) == [0.0, 0.0]
@test optprob.cons([1.0, 2.0]) ≈ [5.0, 0.682941969615793]
J = Array{Float64}(undef, 2, 2)
optprob.cons_j(J, [5.0, 3.0])
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))
H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
optprob.cons_h(H3, x0)
@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]

cons_jac_proto = Float64.(sparse([1 1; 1 1]))
cons_jac_colors = 1:2
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it
@test optprob.cons_jac_colorvec == 1:2
J = Array{Float64}(undef, 2, 2)
optprob.cons_j(J, [5.0, 3.0])
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))

# Can we solve problems? Using AutoForwardDiff to test since we know that works
for consf in [cons, con2_c]
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
sol1 = solve(prob1,BFGS())
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
sol2 = solve(prob2,BFGS())
@test sol1.minimum ≈ sol2.minimum
@test sol1.u ≈ sol2.u

optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
lcons = consf == cons ? [0.2] : [0.2, -0.81]
ucons = consf == cons ? [0.55] : [0.55, -0.1]
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
sol1 = solve(prob1,Optim.SAMIN(), maxiters = 10000) # a lot of iterations... doesn't even converge actually
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
sol2 = solve(prob2,Optim.SAMIN(), maxiters = 10000)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SAMIN doesn't use the constraints though?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it doesn't error if constraints are given? Optim just lets them through?

Copy link
Member Author

@DanielVandH DanielVandH Jul 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it did? Based on what is given here http://optimization.sciml.ai/stable/optimization_packages/optim/#With-Constraint-Equations "The following method in Optim is performing global optimization on problems with constraint equation"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did miss that SAMIN is derivative-free though (PSO too), so we should probably mark that in the docs.

@test sol1.minimum ≈ sol2.minimum rtol = 1e-4
@test sol1.u ≈ sol2.u
@test lcons[1] ≤ consf(sol1.u, nothing)[1] ≤ ucons[1]
if consf == con2_c
@test lcons[2] ≤ consf(sol1.u, nothing)[2] ≤ ucons[2]
end

# --- These equality constraints are so fiddly. Can't get it to pass with consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1 being true
# (I can get sol1.minimum ≈ sol2.minimum and sol1.u ≈ sol2.u, though, just not the constraint - or I can get the constraint and not
# sol1.minimum ≈ sol2.minimum, sol1.u ≈ sol2.u)
lcons = consf == cons ? [0.2] : [0.2, 0.5]
ucons = consf == cons ? [0.2] : [0.2, 0.5]
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
prob1 = OptimizationProblem(optf1, [0.5, 0.5], lcons = lcons, ucons = ucons)
sol1 = solve(prob1,Optim.IPNewton())
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
prob2 = OptimizationProblem(optf2, [0.5, 0.5], lcons = lcons, ucons = ucons)
sol2 = solve(prob2,Optim.IPNewton())
@test_broken sol1.minimum ≈ sol2.minimum
@test_broken sol1.u ≈ sol2.u
@test consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1
if consf == con2_c
@test_broken consf(sol1.u, nothing)[2] ≈ lcons[2]
end
end