Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/SimpleNonlinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ include("raphson.jl")
include("ad.jl")
include("broyden.jl")
include("klement.jl")
include("trustRegion.jl")

import SnoopPrecompile

Expand All @@ -30,6 +31,10 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
solve(prob_no_brack, alg(), tol = T(1e-2))
end

for alg in (TrustRegion(10.0),)
solve(prob_no_brack, alg, tol = T(1e-2))
end

#=
for alg in (SimpleNewtonRaphson,)
for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0])
Expand All @@ -47,6 +52,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
end end

# DiffEq styled algorithms
export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson
export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, TrustRegion

end # module
174 changes: 174 additions & 0 deletions src/trustRegion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
"""
```julia
TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
autodiff = Val{true}(), diff_type = Val{:forward})
```

A low-overhead implementation of a
[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
solver

### Arguments
- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm
will change dynamically. However, it will never be greater than the `max_trust_radius`.

### Keyword Arguments

- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
system. This allows for multiple derivative columns to be computed simultaneously,
improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
default chunk size mechanism. For more details, see the documentation for
[ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
Note that this argument is ignored if an analytical Jacobian is passed; as that will be
used instead. Defaults to `Val{true}`, which means ForwardDiff.jl is used by default.
If `Val{false}`, then FiniteDiff.jl is used for finite differencing.
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
`Val{:forward}` for forward finite differences. For more details on the choices, see the
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
- `initial_trust_radius`: the initial trust region radius. Defaults to
`max_trust_radius / 11`.
- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
compared with a value `r`, which is the actual reduction in the objective function divided
by the predicted reduction. If `step_threshold > r` the model is not a good approximation,
and the step is rejected. Defaults to `0.1`. For more details, see
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
- `shrink_threshold`: the threshold for shrinking the trust region radius. In every
iteration, the threshold is compared with a value `r` which is the actual reduction in the
objective function divided by the predicted reduction. If `shrink_threshold > r` the trust
region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
- `expand_threshold`: the threshold for expanding the trust region radius. If a step is
taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also
made to see if `expand_threshold < r`. If that is true, the trust region radius is
expanded by `expand_factor`. Defaults to `0.75`.
- `shrink_factor`: the factor to shrink the trust region radius with if
`shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`.
- `expand_factor`: the factor to expand the trust region radius with if
`expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`.
- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a
row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`.
"""
struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT}
max_trust_radius::Number
initial_trust_radius::Number
step_threshold::Number
shrink_threshold::Number
expand_threshold::Number
shrink_factor::Number
expand_factor::Number
max_shrink_times::Int
function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
autodiff = Val{true}(),
diff_type = Val{:forward},
initial_trust_radius::Number = max_trust_radius / 11,
step_threshold::Number = 0.1,
shrink_threshold::Number = 0.25,
expand_threshold::Number = 0.75,
shrink_factor::Number = 0.25,
expand_factor::Number = 2.0,
max_shrink_times::Int = 32)
new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff),
SciMLBase._unwrap_val(diff_type)}(max_trust_radius, initial_trust_radius,
step_threshold,
shrink_threshold, expand_threshold,
shrink_factor,
expand_factor, max_shrink_times)
end
end

function SciMLBase.solve(prob::NonlinearProblem,
alg::TrustRegion, args...; abstol = nothing,
reltol = nothing,
maxiters = 1000, kwargs...)
f = Base.Fix2(prob.f, prob.p)
x = float(prob.u0)
T = typeof(x)
Δₘₐₓ = float(alg.max_trust_radius)
Δ = float(alg.initial_trust_radius)
η₁ = float(alg.step_threshold)
η₂ = float(alg.shrink_threshold)
η₃ = float(alg.expand_threshold)
t₁ = float(alg.shrink_factor)
t₂ = float(alg.expand_factor)
max_shrink_times = alg.max_shrink_times

if SciMLBase.isinplace(prob)
error("TrustRegion currently only supports out-of-place nonlinear problems")
end

atol = abstol !== nothing ? abstol :
real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5)
rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5)

if alg_autodiff(alg)
F, ∇f = value_derivative(f, x)
elseif x isa AbstractArray
F = f(x)
∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), F)
else
F = f(x)
∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F)
end

fₖ = 0.5 * norm(F)^2
H = ∇f * ∇f
g = ∇f * F
shrink_counter = 0

for k in 1:maxiters
# Solve the trust region subproblem.
δ = dogleg_method(H, g, Δ)
xₖ₊₁ = x + δ
Fₖ₊₁ = f(xₖ₊₁)
fₖ₊₁ = 0.5 * norm(Fₖ₊₁)^2

# Compute the ratio of the actual to predicted reduction.
model = -(δ' * g + 0.5 * δ' * H * δ)
r = model \ (fₖ - fₖ₊₁)

# Update the trust region radius.
if r < η₂
Δ = t₁ * Δ
shrink_counter += 1
if shrink_counter > max_shrink_times
return SciMLBase.build_solution(prob, alg, x, F;
retcode = ReturnCode.Success)
end
else
shrink_counter = 0
end
if r > η₁
if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol)
return SciMLBase.build_solution(prob, alg, xₖ₊₁, Fₖ₊₁;
retcode = ReturnCode.Success)
end
# Take the step.
x = xₖ₊₁
F = Fₖ₊₁
if alg_autodiff(alg)
F, ∇f = value_derivative(f, x)
elseif x isa AbstractArray
∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x),
F)
else
∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg),
eltype(x),
F)
end

iszero(F) &&
return SciMLBase.build_solution(prob, alg, x, F;
retcode = ReturnCode.Success)

# Update the trust region radius.
if r > η₃ && norm(δ) ≈ Δ
Δ = min(t₂ * Δ, Δₘₐₓ)
end
fₖ = fₖ₊₁
H = ∇f * ∇f
g = ∇f * F
end
end
return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters)
end
25 changes: 25 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,28 @@ function init_J(x)
end
return J
end

function dogleg_method(H, g, Δ)
# Compute the Newton step.
δN = -H \ g
# Test if the full step is within the trust region.
if norm(δN) ≤ Δ
return δN
end

# Calcualte Cauchy point, optimum along the steepest descent direction.
δsd = -g
norm_δsd = norm(δsd)
if norm_δsd ≥ Δ
return δsd .* Δ / norm_δsd
end

# Find the intersection point on the boundary.
δN_δsd = δN - δsd
dot_δN_δsd = dot(δN_δsd, δN_δsd)
dot_δsd_δN_δsd = dot(δsd, δN_δsd)
dot_δsd = dot(δsd, δsd)
fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2)
tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd
return δsd + tau * δN_δsd
end
79 changes: 76 additions & 3 deletions test/basictests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,24 @@ sol = benchmark_scalar(sf, csu0)
@test sol.u * sol.u - 2 < 1e-9
@test (@ballocated benchmark_scalar(sf, csu0)) == 0

# TrustRegion
function benchmark_scalar(f, u0)
probN = NonlinearProblem{false}(f, u0)
sol = (solve(probN, TrustRegion(10.0)))
end

sol = benchmark_scalar(sf, csu0)
@test sol.retcode === ReturnCode.Success
@test sol.u * sol.u - 2 < 1e-9

# AD Tests
using ForwardDiff

# Immutable
f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]

for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
TrustRegion(10.0)]
g = function (p)
probN = NonlinearProblem{false}(f, csu0, p)
sol = solve(probN, alg, tol = 1e-9)
Expand All @@ -67,7 +78,8 @@ end

# Scalar
f, u0 = (u, p) -> u * u - p, 1.0
for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
TrustRegion(10.0)]
g = function (p)
probN = NonlinearProblem{false}(f, oftype(p, u0), p)
sol = solve(probN, alg)
Expand Down Expand Up @@ -108,7 +120,8 @@ for alg in [Bisection(), Falsi()]
@test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p)
end

for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
TrustRegion(10.0)]
global g, p
g = function (p)
probN = NonlinearProblem{false}(f, 0.5, p)
Expand All @@ -128,6 +141,11 @@ probN = NonlinearProblem(f, u0)
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0)
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] ≈ sqrt(2.0)

@test solve(probN, TrustRegion(10.0)).u[end] ≈ sqrt(2.0)
@test solve(probN, TrustRegion(10.0); immutable = false).u[end] ≈ sqrt(2.0)
@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0)
@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] ≈ sqrt(2.0)

@test solve(probN, Broyden()).u[end] ≈ sqrt(2.0)
@test solve(probN, Broyden(); immutable = false).u[end] ≈ sqrt(2.0)

Expand All @@ -144,6 +162,10 @@ for u0 in [1.0, [1, 1.0]]
@test solve(probN, SimpleNewtonRaphson()).u ≈ sol
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u ≈ sol

@test solve(probN, TrustRegion(10.0)).u ≈ sol
@test solve(probN, TrustRegion(10.0)).u ≈ sol
@test solve(probN, TrustRegion(10.0; autodiff = false)).u ≈ sol

@test solve(probN, Broyden()).u ≈ sol

@test solve(probN, Klement()).u ≈ sol
Expand Down Expand Up @@ -185,3 +207,54 @@ sol = solve(probB, Bisection(; exact_left = true, exact_right = true); immutable
@test f(nextfloat(sol.left), nothing) >= 0.0
@test f(sol.right, nothing) >= 0.0
@test f(prevfloat(sol.right), nothing) <= 0.0

# Test that `TrustRegion` passes a test that `SimpleNewtonRaphson` fails on.
u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
global g, f
f = (u, p) -> 0.010000000000000002 .+
10.000000000000002 ./ (1 .+
(0.21640425613334457 .+
216.40425613334457 ./ (1 .+
(0.21640425613334457 .+
216.40425613334457 ./
(1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
0.0011552453009332421u
.-p
g = function (p)
probN = NonlinearProblem{false}(f, u0, p)
sol = solve(probN, TrustRegion(100.0))
return sol.u
end
p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
u = g(p)
f(u, p)
@test all(f(u, p) .< 1e-10)

# Test kwars in `TrustRegion`
max_trust_radius = [10.0, 100.0, 1000.0]
initial_trust_radius = [10.0, 1.0, 0.1]
step_threshold = [0.0, 0.01, 0.25]
shrink_threshold = [0.25, 0.3, 0.5]
expand_threshold = [0.5, 0.8, 0.9]
shrink_factor = [0.1, 0.3, 0.5]
expand_factor = [1.5, 2.0, 3.0]
max_shrink_times = [10, 20, 30]

list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
shrink_threshold, expand_threshold, shrink_factor,
expand_factor, max_shrink_times)
for options in list_of_options
local probN, sol, alg
alg = TrustRegion(options[1];
initial_trust_radius = options[2],
step_threshold = options[3],
shrink_threshold = options[4],
expand_threshold = options[5],
shrink_factor = options[6],
expand_factor = options[7],
max_shrink_times = options[8])

probN = NonlinearProblem(f, u0, p)
sol = solve(probN, alg)
@test all(f(u, p) .< 1e-10)
end