Skip to content

Commit fd343ab

Browse files
Merge pull request #318 from DanielVandH/master
Constraints for AutoFiniteDiff
2 parents a9be183 + 9ed5aae commit fd343ab

File tree

2 files changed

+148
-15
lines changed

2 files changed

+148
-15
lines changed

src/function/finitediff.jl

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
AutoFiniteDiff{T1,T2} <: AbstractADType
2+
AutoFiniteDiff{T1,T2,T3} <: AbstractADType
33
44
An AbstractADType choice for use in OptimizationFunction for automatically
55
generating the unspecified derivative functions. Usage:
@@ -18,7 +18,7 @@ introduces numerical error into the derivative estimates.
1818
- Compatible with GPUs
1919
- Compatible with Hessian-based optimization
2020
- Compatible with Hv-based optimization
21-
- Not compatible with constraint functions
21+
- Compatible with constraint functions
2222
2323
Note that only the unspecified derivative functions are defined. For example,
2424
if a `hess` function is supplied to the `OptimizationFunction`, then the
@@ -27,35 +27,36 @@ Hessian is not defined via FiniteDiff.
2727
## Constructor
2828
2929
```julia
30-
AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral))
30+
AutoFiniteDiff(;fdtype = Val(:forward) fdjtype = fdtype, fdhtype = Val(:hcentral))
3131
```
3232
3333
- `fdtype`: the method used for defining the gradient
34+
- `fdjtype`: the method used for defining the Jacobian of constraints.
3435
- `fdhtype`: the method used for defining the Hessian
3536
3637
For more information on the derivative type specifiers, see the
3738
[FiniteDiff.jl documentation](https://github.com/JuliaDiff/FiniteDiff.jl).
3839
"""
39-
struct AutoFiniteDiff{T1,T2} <: AbstractADType
40+
struct AutoFiniteDiff{T1,T2,T3} <: AbstractADType
4041
fdtype::T1
41-
fdhtype::T2
42+
fdjtype::T2
43+
fdhtype::T3
4244
end
4345

44-
AutoFiniteDiff(;fdtype = Val(:forward), fdhtype = Val(:hcentral)) =
45-
AutoFiniteDiff(fdtype,fdhtype)
46+
AutoFiniteDiff(; fdtype=Val(:forward), fdjtype=fdtype, fdhtype=Val(:hcentral)) =
47+
AutoFiniteDiff(fdtype, fdjtype, fdhtype)
4648

47-
function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
48-
num_cons != 0 && error("AutoFiniteDiff does not currently support constraints")
49+
function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons=0)
4950
_f = (θ, args...) -> first(f.f(θ, p, args...))
5051

5152
if f.grad === nothing
52-
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x ->_f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
53+
grad = (res, θ, args...) -> FiniteDiff.finite_difference_gradient!(res, x -> _f(x, args...), θ, FiniteDiff.GradientCache(res, x, adtype.fdtype))
5354
else
5455
grad = f.grad
5556
end
5657

5758
if f.hess === nothing
58-
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x ->_f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
59+
hess = (res, θ, args...) -> FiniteDiff.finite_difference_hessian!(res, x -> _f(x, args...), θ, FiniteDiff.HessianCache(x, adtype.fdhtype))
5960
else
6061
hess = f.hess
6162
end
@@ -64,13 +65,45 @@ function instantiate_function(f, x, adtype::AutoFiniteDiff, p, num_cons = 0)
6465
hv = function (H, θ, v, args...)
6566
res = ArrayInterfaceCore.zeromatrix(θ)
6667
hess(res, θ, args...)
67-
H .= res*v
68+
H .= res * v
6869
end
6970
else
7071
hv = f.hv
7172
end
7273

73-
return OptimizationFunction{false}(f, adtype; grad=grad, hess=hess, hv=hv,
74-
cons=nothing, cons_j=nothing, cons_h=nothing,
75-
hess_prototype=nothing, cons_jac_prototype=nothing, cons_hess_prototype=nothing)
74+
if f.cons === nothing
75+
cons = nothing
76+
else
77+
cons = θ -> f.cons(θ, p)
78+
end
79+
80+
cons_jac_colorvec = f.cons_jac_colorvec === nothing ? (1:length(x)) : f.cons_jac_colorvec
81+
82+
if cons !== nothing && f.cons_j === nothing
83+
function iip_cons(dx, x) # need f(dx, x) for jacobian?
84+
dx .= [cons(x)[i] for i in 1:num_cons] # Not very efficient probably?
85+
nothing
86+
end
87+
cons_j = function (J, θ)
88+
y0 = zeros(num_cons)
89+
FiniteDiff.finite_difference_jacobian!(J, iip_cons, θ, FiniteDiff.JacobianCache(copy(θ), copy(y0), copy(y0), adtype.fdjtype; colorvec = cons_jac_colorvec, sparsity = f.cons_jac_prototype))
90+
end
91+
else
92+
cons_j = f.cons_j
93+
end
94+
95+
if cons !== nothing && f.cons_h === nothing
96+
cons_h = function (res, θ)
97+
for i in 1:num_cons#note: colorvecs not yet supported by FiniteDiff for Hessians
98+
FiniteDiff.finite_difference_hessian!(res[i], (x) -> cons(x)[i], θ, FiniteDiff.HessianCache(copy(θ), adtype.fdhtype))
99+
end
100+
end
101+
else
102+
cons_h = f.cons_h
103+
end
104+
105+
return OptimizationFunction{true}(f, adtype; grad=grad, hess=hess, hv=hv,
106+
cons=cons, cons_j=cons_j, cons_h=cons_h,
107+
cons_jac_colorvec = cons_jac_colorvec,
108+
hess_prototype=nothing, cons_jac_prototype=f.cons_jac_prototype, cons_hess_prototype=nothing)
76109
end

test/ADtests.jl

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,103 @@ sol = solve(prob, Optim.KrylovTrustRegion())
161161

162162
sol = solve(prob, Optimisers.ADAM(0.1), maxiters=1000)
163163
@test 10 * sol.minimum < l1
164+
165+
# Test new constraints
166+
cons = (x, p) -> [x[1]^2 + x[2]^2]
167+
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons)
168+
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
169+
optprob.grad(G2, x0)
170+
@test G1 G2 rtol = 1e-6
171+
optprob.hess(H2, x0)
172+
@test H1 H2 rtol = 1e-6
173+
@test optprob.cons(x0) == [0.0]
174+
@test optprob.cons([1.0, 4.0]) == [17.0]
175+
J = zeros(1, 2)
176+
optprob.cons_j(J, [5.0, 3.0])
177+
@test J [10.0 6.0]
178+
H3 = [Array{Float64}(undef, 2, 2)]
179+
optprob.cons_h(H3, x0)
180+
@test H3 [[2.0 0.0; 0.0 2.0]]
181+
182+
cons_jac_proto = Float64.(sparse([1 1])) # Things break if you only use [1 1]; see FiniteDiff.jl
183+
cons_jac_colors = 1:2
184+
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=cons, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
185+
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 1)
186+
@test optprob.cons_jac_prototype == sparse([1.0 1.0]) # make sure it's still using it
187+
@test optprob.cons_jac_colorvec == 1:2
188+
J = zeros(1, 2)
189+
optprob.cons_j(J, [5.0, 3.0])
190+
@test J [10.0 6.0]
191+
192+
function con2_c(x, p)
193+
[x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
194+
end
195+
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c)
196+
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
197+
optprob.grad(G2, x0)
198+
@test G1 G2 rtol = 1e-6
199+
optprob.hess(H2, x0)
200+
@test H1 H2 rtol = 1e-6
201+
@test optprob.cons(x0) == [0.0, 0.0]
202+
@test optprob.cons([1.0, 2.0]) [5.0, 0.682941969615793]
203+
J = Array{Float64}(undef, 2, 2)
204+
optprob.cons_j(J, [5.0, 3.0])
205+
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))
206+
H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)]
207+
optprob.cons_h(H3, x0)
208+
@test H3 [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
209+
210+
cons_jac_proto = Float64.(sparse([1 1; 1 1]))
211+
cons_jac_colors = 1:2
212+
optf = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(), cons=con2_c, cons_jac_prototype = cons_jac_proto, cons_jac_colorvec = cons_jac_colors)
213+
optprob = Optimization.instantiate_function(optf, x0, Optimization.AutoFiniteDiff(), nothing, 2)
214+
@test optprob.cons_jac_prototype == sparse([1.0 1.0; 1.0 1.0]) # make sure it's still using it
215+
@test optprob.cons_jac_colorvec == 1:2
216+
J = Array{Float64}(undef, 2, 2)
217+
optprob.cons_j(J, [5.0, 3.0])
218+
@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol=1e-3))
219+
220+
# Can we solve problems? Using AutoForwardDiff to test since we know that works
221+
for consf in [cons, con2_c]
222+
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
223+
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
224+
sol1 = solve(prob1,BFGS())
225+
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
226+
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8])
227+
sol2 = solve(prob2,BFGS())
228+
@test sol1.minimum sol2.minimum
229+
@test sol1.u sol2.u
230+
231+
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
232+
lcons = consf == cons ? [0.2] : [0.2, -0.81]
233+
ucons = consf == cons ? [0.55] : [0.55, -0.1]
234+
prob1 = OptimizationProblem(optf1, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
235+
sol1 = solve(prob1,Optim.SAMIN(), maxiters = 10000) # a lot of iterations... doesn't even converge actually
236+
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
237+
prob2 = OptimizationProblem(optf2, [0.3, 0.5], lb = [0.2, 0.4], ub = [0.6, 0.8], lcons = lcons, ucons = ucons)
238+
sol2 = solve(prob2,Optim.SAMIN(), maxiters = 10000)
239+
@test sol1.minimum sol2.minimum rtol = 1e-4
240+
@test sol1.u sol2.u
241+
@test lcons[1] consf(sol1.u, nothing)[1] ucons[1]
242+
if consf == con2_c
243+
@test lcons[2] consf(sol1.u, nothing)[2] ucons[2]
244+
end
245+
246+
# --- These equality constraints are so fiddly. Can't get it to pass with consf(sol1.u, nothing)[1] ≈ lcons[1] rtol = 0.1 being true
247+
# (I can get sol1.minimum ≈ sol2.minimum and sol1.u ≈ sol2.u, though, just not the constraint - or I can get the constraint and not
248+
# sol1.minimum ≈ sol2.minimum, sol1.u ≈ sol2.u)
249+
lcons = consf == cons ? [0.2] : [0.2, 0.5]
250+
ucons = consf == cons ? [0.2] : [0.2, 0.5]
251+
optf1 = OptimizationFunction(rosenbrock, Optimization.AutoFiniteDiff(); cons = consf)
252+
prob1 = OptimizationProblem(optf1, [0.5, 0.5], lcons = lcons, ucons = ucons)
253+
sol1 = solve(prob1,Optim.IPNewton())
254+
optf2 = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff(); cons = consf)
255+
prob2 = OptimizationProblem(optf2, [0.5, 0.5], lcons = lcons, ucons = ucons)
256+
sol2 = solve(prob2,Optim.IPNewton())
257+
@test_broken sol1.minimum sol2.minimum
258+
@test_broken sol1.u sol2.u
259+
@test consf(sol1.u, nothing)[1] lcons[1] rtol = 0.1
260+
if consf == con2_c
261+
@test_broken consf(sol1.u, nothing)[2] lcons[2]
262+
end
263+
end

0 commit comments

Comments
 (0)