-
-
Notifications
You must be signed in to change notification settings - Fork 98
Constraints for AutoFiniteDiff #318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ad98f57
16ebabf
77a555c
156fa53
869aba5
fe3f0c9
fc4188e
b9301bb
8cefccf
d8896c9
ba40958
9ed5aae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" | ||
AutoFiniteDiff{T1,T2} <: AbstractADType | ||
AutoFiniteDiff{T1,T2,T3} <: AbstractADType | ||
|
||
An AbstractADType choice for use in OptimizationFunction for automatically | ||
generating the unspecified derivative functions. Usage: | ||
|
@@ -18,7 +18,7 @@ introduces numerical error into the derivative estimates. | |
- Compatible with GPUs | ||
- Compatible with Hessian-based optimization | ||
- Compatible with Hv-based optimization | ||
- Not compatible with constraint functions | ||
- Compatible with constraint functions | ||
|
||
Note that only the unspecified derivative functions are defined. For example, | ||
if a `hess` function is supplied to the `OptimizationFunction`, then the | ||
|
@@ -27,35 +27,36 @@ Hessian is not defined via FiniteDiff. | |
## Constructor | ||
|
||
```julia | ||
AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral)) | ||
AutoFiniteDiff(;fdtype = Val(:forward) fdjtype = fdtype, fdhtype = Val(:hcentral)) | ||
``` | ||
|
||
- `fdtype`: the method used for defining the gradient | ||
- `fdjtype`: the method used for defining the Jacobian of constraints. | ||
- `fdhtype`: the method used for defining the Hessian | ||
|
||
For more information on the derivative type specifiers, see the | ||
[FiniteDiff.jl documentation](https://github.com/JuliaDiff/FiniteDiff.jl). | ||
""" | ||
struct AutoFiniteDiff{T1,T2} <: AbstractADType | ||
struct AutoFiniteDiff{T1,T2,T3} <: AbstractADType | ||
fdtype::T1 | ||
fdhtype::T2 | ||
fdjtype::T2 | ||
fdhtype::T3 | ||
end | ||
|
||
AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral)) = | ||
AutoFiniteDiff(fdtype,fdhtype) | ||
AutoFiniteDiff(; fdtype=Val(:forward), fdjtype=fdtype, fdhtype=Val(:hcentral)) = | ||
AutoFiniteDiff(fdtype, fdjtype, fdhtype) | ||
|
||
function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0) | ||
num_cons != 0 && error("AutoFiniteDiff does not currently support constraints") | ||
function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons=0) | ||
_f = (θ, args...) -> first(f.f(θ, p, args...)) | ||
|
||
if f.grad === nothing | ||
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x ->_f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype)) | ||
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x -> _f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype)) | ||
else | ||
grad = f.grad | ||
end | ||
|
||
if f.hess === nothing | ||
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x ->_f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype)) | ||
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x -> _f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype)) | ||
else | ||
hess = f.hess | ||
end | ||
|
@@ -64,13 +65,45 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0) | |
hv = function (H, θ, v, args...) | ||
res = ArrayInterfaceCore.zeromatrix(θ) | ||
hess(res, θ, args...) | ||
H .= res*v | ||
H .= res * v | ||
end | ||
else | ||
hv = f.hv | ||
end | ||
|
||
return OptimizationFunction{false}(f, adtype; grad=grad, hess=hess, hv=hv, | ||
cons=nothing, cons_j=nothing, cons_h=nothing, | ||
hess_prototype=nothing, cons_jac_prototype=nothing, cons_hess_prototype=nothing) | ||
if f.cons === nothing | ||
cons = nothing | ||
else | ||
cons = θ -> f.cons(θ, p) | ||
end | ||
|
||
cons_jac_colorvec = f.cons_jac_colorvec === nothing ? (1:length(x)) : f.cons_jac_colorvec | ||
|
||
if cons !== nothing && f.cons_j === nothing | ||
function iip_cons(dx, x) # need f(dx, x) for jacobian? | ||
dx .= [cons(x)[i] for i in 1:num_cons] # Not very efficient probably? | ||
nothing | ||
end | ||
cons_j = function (J, θ) | ||
y0 = zeros(num_cons) | ||
FiniteDiff.finite_difference_jacobian!(J, iip_cons, θ, FiniteDiff.JacobianCache(copy(θ), copy(y0), copy(y0), adtype.fdjtype; colorvec = cons_jac_colorvec, sparsity = f.cons_jac_prototype)) | ||
end | ||
else | ||
cons_j = f.cons_j | ||
end | ||
|
||
if cons !== nothing && f.cons_h === nothing | ||
cons_h = function (res, θ) | ||
for i in 1:num_cons#note: colorvecs not yet supported by FiniteDiff for Hessians | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should start integrating/testing JuliaDiff/SparseDiffTools.jl#190 and then do similar to the FiniteDiff side (cc @ElOceanografo) |
||
FiniteDiff.finite_difference_hessian!(res[i], (x) -> cons(x)[i], θ, FiniteDiff.HessianCache(copy(θ), adtype.fdhtype)) | ||
end | ||
end | ||
else | ||
cons_h = f.cons_h | ||
end | ||
|
||
return OptimizationFunction{true}(f, adtype; grad=grad, hess=hess, hv=hv, | ||
cons=cons, cons_j=cons_j, cons_h=cons_h, | ||
cons_jac_colorvec = cons_jac_colorvec, | ||
hess_prototype=nothing, cons_jac_prototype=f.cons_jac_prototype, cons_hess_prototype=nothing) | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -161,3 +161,103 @@ sol = solve(prob, Optim.KrylovTrustRegion()) | |
|
||
sol = solve(prob, Optimisers.ADAM(0.1), maxiters=1000) | ||
@test 10 * sol.minimum < l1 | ||
|
||
# Test new constraints | ||
cons = (x, p) -> [x[1]^2 + x[2]^2] | ||
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons) | ||
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1) | ||
optprob.grad(G2, x0) | ||
@test G1 ≈ G2 rtol = 1e-6 | ||
optprob.hess(H2, x0) | ||
@test H1 ≈ H2 rtol = 1e-6 | ||
@test optprob.cons(x0) == [0.0] | ||
@test optprob.cons([1.0, 4.0]) == [17.0] | ||
J = zeros(1, 2) | ||
optprob.cons_j(J, [5.0, 3.0]) | ||
@test J ≈ [10.0 6.0] | ||
H3 = [Array{Float64}(undef, 2, 2)] | ||
optprob.cons_h(H3, x0) | ||
@test H3 ≈ [[2.0 0.0; 0.0 2.0]] | ||
|
||
cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl | ||
cons_jac_colors = 1:2 | ||
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors) | ||
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1) | ||
@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it | ||
@test optprob.cons_jac_colorvec == 1:2 | ||
J = zeros(1, 2) | ||
optprob.cons_j(J, [5.0, 3.0]) | ||
@test J ≈ [10.0 6.0] | ||
|
||
function con2_c(x, p) | ||
[x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] | ||
end | ||
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c) | ||
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2) | ||
optprob.grad(G2, x0) | ||
@test G1 ≈ G2 rtol = 1e-6 | ||
optprob.hess(H2, x0) | ||
@test H1 ≈ H2 rtol = 1e-6 | ||
@test optprob.cons(x0) == [0.0, 0.0] | ||
@test optprob.cons([1.0, 2.0]) ≈ [5.0, 0.682941969615793] | ||
J = Array{Float64}(undef, 2, 2) | ||
optprob.cons_j(J, [5.0, 3.0]) | ||
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3)) | ||
H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] | ||
optprob.cons_h(H3, x0) | ||
@test H3 ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] | ||
|
||
cons_jac_proto = Float64.(sparse([1 1; 1 1])) | ||
cons_jac_colors = 1:2 | ||
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors) | ||
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2) | ||
@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it | ||
@test optprob.cons_jac_colorvec == 1:2 | ||
J = Array{Float64}(undef, 2, 2) | ||
optprob.cons_j(J, [5.0, 3.0]) | ||
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3)) | ||
|
||
# Can we solve problems? Using AutoForwardDiff to test since we know that works | ||
for consf in [cons, con2_c] | ||
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf) | ||
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8]) | ||
sol1 = solve(prob1,BFGS()) | ||
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf) | ||
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8]) | ||
sol2 = solve(prob2,BFGS()) | ||
@test sol1.minimum ≈ sol2.minimum | ||
@test sol1.u ≈ sol2.u | ||
|
||
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf) | ||
lcons = consf == cons ? [0.2] : [0.2, -0.81] | ||
ucons = consf == cons ? [0.55] : [0.55, -0.1] | ||
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons) | ||
sol1 = solve(prob1,Optim.SAMIN(), maxiters = 10000) # a lot of iterations... doesn't even converge actually | ||
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf) | ||
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons) | ||
sol2 = solve(prob2,Optim.SAMIN(), maxiters = 10000) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But it doesn't error if constraints are given? Optim just lets them through? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought it did? Based on what is given here http://optimization.sciml.ai/stable/optimization_packages/optim/#With-Constraint-Equations "The following method in Optim is performing global optimization on problems with constraint equation" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did miss that SAMIN is derivative-free though (PSO too), so we should probably mark that in the docs. |
||
@test sol1.minimum ≈ sol2.minimum rtol = 1e-4 | ||
@test sol1.u ≈ sol2.u | ||
@test lcons[1] ≤ consf(sol1.u, nothing)[1] ≤ ucons[1] | ||
if consf == con2_c | ||
@test lcons[2] ≤ consf(sol1.u, nothing)[2] ≤ ucons[2] | ||
end | ||
|
||
# --- These equality constraints are so fiddly. Can't get it to pass with consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1 being true | ||
# (I can get sol1.minimum ≈ sol2.minimum and sol1.u ≈ sol2.u, though, just not the constraint - or I can get the constraint and not | ||
# sol1.minimum ≈ sol2.minimum, sol1.u ≈ sol2.u) | ||
lcons = consf == cons ? [0.2] : [0.2, 0.5] | ||
ucons = consf == cons ? [0.2] : [0.2, 0.5] | ||
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf) | ||
prob1 = OptimizationProblem(optf1, [0.5, 0.5], lcons = lcons, ucons = ucons) | ||
sol1 = solve(prob1,Optim.IPNewton()) | ||
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf) | ||
prob2 = OptimizationProblem(optf2, [0.5, 0.5], lcons = lcons, ucons = ucons) | ||
sol2 = solve(prob2,Optim.IPNewton()) | ||
@test_broken sol1.minimum ≈ sol2.minimum | ||
@test_broken sol1.u ≈ sol2.u | ||
@test consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1 | ||
if consf == con2_c | ||
@test_broken consf(sol1.u, nothing)[2] ≈ lcons[2] | ||
end | ||
end |
Uh oh!
There was an error while loading. Please reload this page.