diff --git a/src/SimpleNonlinearSolve.jl b/src/SimpleNonlinearSolve.jl index 33f5895..940db08 100644 --- a/src/SimpleNonlinearSolve.jl +++ b/src/SimpleNonlinearSolve.jl @@ -24,13 +24,14 @@ include("klement.jl") include("trustRegion.jl") include("ridder.jl") include("brent.jl") +include("dfsane.jl") include("ad.jl") import SnoopPrecompile SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) prob_no_brack = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)) - for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion) + for alg in (SimpleNewtonRaphson, Broyden, Klement, SimpleTrustRegion, SimpleDFSane) solve(prob_no_brack, alg(), abstol = T(1e-2)) end @@ -51,7 +52,7 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64) end end # DiffEq styled algorithms -export Bisection, Brent, Broyden, Falsi, Klement, Ridder, SimpleNewtonRaphson, +export Bisection, Brent, Broyden, SimpleDFSane, Falsi, Klement, Ridder, SimpleNewtonRaphson, SimpleTrustRegion end # module diff --git a/src/ad.jl b/src/ad.jl index 58a6181..dbd9024 100644 --- a/src/ad.jl +++ b/src/ad.jl @@ -31,7 +31,7 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:Dual{T, V, P}}, - alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, + alg::AbstractSimpleNonlinearSolveAlgorithm, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; @@ -40,7 +40,7 @@ end function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, iip, <:AbstractArray{<:Dual{T, V, P}}}, - alg::Union{SimpleNewtonRaphson, SimpleTrustRegion}, args...; + alg::AbstractSimpleNonlinearSolveAlgorithm, args...; kwargs...) where {iip, T, V, P} sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...) return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid; diff --git a/src/dfsane.jl b/src/dfsane.jl new file mode 100644 index 0000000..3d2e9c8 --- /dev/null +++ b/src/dfsane.jl @@ -0,0 +1,155 @@ +""" +```julia +SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, + M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, + nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2) +``` + +A low-overhead implementation of the df-sane method for solving large-scale nonlinear +systems of equations. For in depth information about all the parameters and the algorithm, +see the paper: [W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without +gradient information for solving large-scale nonlinear systems of equations, Mathematics of +Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_Spectral_Residual_Method_without_Gradient_Information_for_Solving_Large-Scale_Nonlinear_Systems_of_Equations) + +### Keyword Arguments + +- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step + size in the algorithm. Defaults to `1e-10`. +- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step + size in the algorithm. Defaults to `1e10`. +- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step + size in the algorithm.. Defaults to `1.0`. +- `M`: The monotonicity of the algorithm is determined by a this positive integer. + A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm + of the function `f`. However, higher values allow for more flexibility in this reduction. + Despite this, the algorithm still ensures global convergence through the use of a + non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi + condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call + for a higher value of `M`. The default setting is 10. +- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ` + will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`. +- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this + parameter is the minimum value of that factor. Defaults to `0.1`. +- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this + parameter is the maximum value of that factor. Defaults to `0.5`. +- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses + `nexp ∈ {1,2}`. Defaults to `2`. +- `η_strategy`: function to determine the parameter `η_k`, which enables growth + of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as + ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and + `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to + ``||F||^2 / k^2``. +""" +struct SimpleDFSane{T} <: AbstractSimpleNonlinearSolveAlgorithm + σ_min::T + σ_max::T + σ_1::T + M::Int + γ::T + τ_min::T + τ_max::T + nexp::Int + η_strategy::Function + + function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0, + M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5, + nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2) + new{typeof(σ_min)}(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, η_strategy) + end +end + +function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, + args...; abstol = nothing, reltol = nothing, maxiters = 1000, + kwargs...) + f = Base.Fix2(prob.f, prob.p) + x = float(prob.u0) + T = eltype(x) + σ_min = float(alg.σ_min) + σ_max = float(alg.σ_max) + σ_k = float(alg.σ_1) + M = alg.M + γ = float(alg.γ) + τ_min = float(alg.τ_min) + τ_max = float(alg.τ_max) + nexp = alg.nexp + η_strategy = alg.η_strategy + + if SciMLBase.isinplace(prob) + error("SimpleDFSane currently only supports out-of-place nonlinear problems") + end + + atol = abstol !== nothing ? abstol : + real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5) + rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5) + + function ff(x) + F = f(x) + f_k = norm(F)^nexp + return f_k, F + end + + f_k, F_k = ff(x) + α_1 = convert(T, 1.0) + f_1 = f_k + history_f_k = fill(f_k, M) + + for k in 1:maxiters + iszero(F_k) && + return SciMLBase.build_solution(prob, alg, x, F_k; + retcode = ReturnCode.Success) + + # Spectral parameter range check + if abs(σ_k) > σ_max + σ_k = sign(σ_k) * σ_max + elseif abs(σ_k) < σ_min + σ_k = sign(σ_k) * σ_min + end + + # Line search direction + d = -σ_k * F_k + + η = η_strategy(f_1, k, x, F_k) + f̄ = maximum(history_f_k) + α_p = α_1 + α_m = α_1 + x_new = x + α_p * d + f_new, F_new = ff(x_new) + while true + if f_new ≤ f̄ + η - γ * α_p^2 * f_k + break + end + + α_tp = α_p^2 * f_k / (f_new + (2 * α_p - 1) * f_k) + x_new = x - α_m * d + f_new, F_new = ff(x_new) + + if f_new ≤ f̄ + η - γ * α_m^2 * f_k + break + end + + α_tm = α_m^2 * f_k / (f_new + (2 * α_m - 1) * f_k) + α_p = min(τ_max * α_p, max(α_tp, τ_min * α_p)) + α_m = min(τ_max * α_m, max(α_tm, τ_min * α_m)) + x_new = x + α_p * d + f_new, F_new = ff(x_new) + end + + if isapprox(x_new, x, atol = atol, rtol = rtol) + return SciMLBase.build_solution(prob, alg, x_new, F_new; + retcode = ReturnCode.Success) + end + # Update spectral parameter + s_k = x_new - x + y_k = F_new - F_k + σ_k = (s_k' * s_k) / (s_k' * y_k) + + # Take step + x = x_new + F_k = F_new + f_k = f_new + + # Store function value + history_f_k[k % M + 1] = f_new + end + return SciMLBase.build_solution(prob, alg, x, F_k; retcode = ReturnCode.MaxIters) +end diff --git a/test/basictests.jl b/test/basictests.jl index fa15e79..d8fb583 100644 --- a/test/basictests.jl +++ b/test/basictests.jl @@ -62,13 +62,24 @@ sol = benchmark_scalar(sf, csu0) @test sol.retcode === ReturnCode.Success @test sol.u * sol.u - 2 < 1e-9 +# SimpleDFSane +function benchmark_scalar(f, u0) + probN = NonlinearProblem{false}(f, u0) + sol = (solve(probN, SimpleDFSane())) +end + +sol = benchmark_scalar(sf, csu0) +@test sol.retcode === ReturnCode.Success +@test sol.u * sol.u - 2 < 1e-9 + # AD Tests using ForwardDiff # Immutable f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0] -for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(), + SimpleDFSane()) g = function (p) probN = NonlinearProblem{false}(f, csu0, p) sol = solve(probN, alg, abstol = 1e-9) @@ -76,14 +87,15 @@ for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) end for p in 1.1:0.1:100.0 - @test g(p) ≈ sqrt(p) - @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p)) + @test abs.(g(p)) ≈ sqrt(p) + @test abs.(ForwardDiff.derivative(g, p)) ≈ 1 / (2 * sqrt(p)) end end # Scalar f, u0 = (u, p) -> u * u - p, 1.0 -for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(), + SimpleDFSane()) g = function (p) probN = NonlinearProblem{false}(f, oftype(p, u0), p) sol = solve(probN, alg) @@ -91,8 +103,8 @@ for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) end for p in 1.1:0.1:100.0 - @test g(p) ≈ sqrt(p) - @test ForwardDiff.derivative(g, p) ≈ 1 / (2 * sqrt(p)) + @test abs(g(p)) ≈ sqrt(p) + @test abs(ForwardDiff.derivative(g, p)) ≈ 1 / (2 * sqrt(p)) end end @@ -148,7 +160,8 @@ for alg in [Bisection(), Falsi(), Ridder(), Brent()] @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p) end -for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion()) +for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(), + SimpleDFSane()) global g, p g = function (p) probN = NonlinearProblem{false}(f, 0.5, p) @@ -169,6 +182,7 @@ probN = NonlinearProblem(f, u0) @test solve(probN, SimpleTrustRegion(; autodiff = false)).u[end] ≈ sqrt(2.0) @test solve(probN, Broyden()).u[end] ≈ sqrt(2.0) @test solve(probN, Klement()).u[end] ≈ sqrt(2.0) +@test solve(probN, SimpleDFSane()).u[end] ≈ sqrt(2.0) for u0 in [1.0, [1, 1.0]] local f, probN, sol @@ -185,8 +199,8 @@ for u0 in [1.0, [1, 1.0]] @test solve(probN, SimpleTrustRegion(; autodiff = false)).u ≈ sol @test solve(probN, Broyden()).u ≈ sol - @test solve(probN, Klement()).u ≈ sol + @test solve(probN, SimpleDFSane()).u ≈ sol end # Bisection Tests @@ -299,3 +313,58 @@ for options in list_of_options sol = solve(probN, alg) @test all(abs.(f(u, p)) .< 1e-10) end + +# Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on. +u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0] +global g, f +f = (u, p) -> 0.010000000000000002 .+ + 10.000000000000002 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ (1 .+ + (0.21640425613334457 .+ + 216.40425613334457 ./ + (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .- + 0.0011552453009332421u .- p +g = function (p) + probN = NonlinearProblem{false}(f, u0, p) + sol = solve(probN, SimpleDFSane()) + return sol.u +end +p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +u = g(p) +f(u, p) +@test all(abs.(f(u, p)) .< 1e-10) + +# Test kwars in `SimpleDFSane` +σ_min = [1e-10, 1e-5, 1e-4] +σ_max = [1e10, 1e5, 1e4] +σ_1 = [1.0, 0.5, 2.0] +M = [10, 1, 100] +γ = [1e-4, 1e-3, 1e-5] +τ_min = [0.1, 0.2, 0.3] +τ_max = [0.5, 0.8, 0.9] +nexp = [2, 1, 2] +η_strategy = [ + (f_1, k, x, F) -> f_1 / k^2, + (f_1, k, x, F) -> f_1 / k^3, + (f_1, k, x, F) -> f_1 / k^4, +] + +list_of_options = zip(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, + η_strategy) +for options in list_of_options + local probN, sol, alg + alg = SimpleDFSane(σ_min = options[1], + σ_max = options[2], + σ_1 = options[3], + M = options[4], + γ = options[5], + τ_min = options[6], + τ_max = options[7], + nexp = options[8], + η_strategy = options[9]) + + probN = NonlinearProblem(f, u0, p) + sol = solve(probN, alg) + @test all(abs.(f(u, p)) .< 1e-10) +end