SciML · ChrisRackauckas · Oct 10, 2023 · Sep 21, 2023 · Sep 21, 2023 · Sep 21, 2023
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -16,8 +16,7 @@ jobs:
     strategy:
       matrix:
         group:
-          - Core
-          - 23TestProblems
+          - All
         version:
           - '1'
     steps:
@@ -41,6 +40,8 @@ jobs:
           GROUP: ${{ matrix.group }}
           JULIA_NUM_THREADS: 11
       - uses: julia-actions/julia-processcoverage@v1
+        with:
+          directories: src,ext
       - uses: codecov/codecov-action@v3
         with:
           file: lcov.info
diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
@@ -20,6 +20,7 @@ jobs:
           - {user: SciML, repo: ModelingToolkit.jl, group: All}
           - {user: SciML, repo: OrdinaryDiffEq.jl, group: Interface}
           - {user: SciML, repo: OrdinaryDiffEq.jl, group: Regression}
+          - {user: SciML, repo: BoundaryValueDiffEq.jl, group: All}
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
@@ -50,6 +51,8 @@ jobs:
             exit(0)  # Exit immediately, as a success
           end
       - uses: julia-actions/julia-processcoverage@v1
+        with:
+          directories: src,ext
       - uses: codecov/codecov-action@v3
         with:
           file: lcov.info
diff --git a/Project.toml b/Project.toml
@@ -39,7 +39,7 @@ NonlinearProblemLibrary = "0.1"
 PrecompileTools = "1"
 RecursiveArrayTools = "2"
 Reexport = "0.2, 1"
-SciMLBase = "1.97, 2"
+SciMLBase = "2.4"
 SimpleNonlinearSolve = "0.1"
 SparseDiffTools = "2.6"
 StaticArraysCore = "1.4"

diff --git a/docs/pages.jl b/docs/pages.jl
@@ -12,7 +12,8 @@ pages = ["index.md",
         "basics/FAQ.md"],
     "Solver Summaries and Recommendations" => Any["solvers/NonlinearSystemSolvers.md",
         "solvers/BracketingSolvers.md",
-        "solvers/SteadyStateSolvers.md"],
+        "solvers/SteadyStateSolvers.md",
+        "solvers/NonlinearLeastSquaresSolvers.md"],
     "Detailed Solver APIs" => Any["api/nonlinearsolve.md",
         "api/simplenonlinearsolve.md",
         "api/minpack.md",

diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md
@@ -7,6 +7,8 @@ These are the native solvers of NonlinearSolve.jl.
 ```@docs
 NewtonRaphson
 TrustRegion
+LevenbergMarquardt
+GaussNewton
 ```
 
 ## Radius Update Schemes for Trust Region (RadiusUpdateSchemes)

diff --git a/docs/src/solvers/NonlinearLeastSquaresSolvers.md b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
@@ -0,0 +1,32 @@
+# Nonlinear Least Squares Solvers
+
+`solve(prob::NonlinearLeastSquaresProblem, alg; kwargs...)`
+
+Solves the nonlinear least squares problem defined by `prob` using the algorithm
+`alg`. If no algorithm is given, a default algorithm will be chosen.
+
+## Recommended Methods
+
+`LevenbergMarquardt` is a good choice for most problems.
+
+## Full List of Methods
+
+  - `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
+    improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
+    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
+    large-scale and numerically-difficult nonlinear systems.
+  - `GaussNewton()`: An advanced GaussNewton implementation with support for efficient
+    handling of sparse matrices via colored automatic differentiation and preconditioned
+    linear solvers. Designed for large-scale and numerically-difficult nonlinear least squares
+    problems.
+  - `SimpleNewtonRaphson()`: Newton Raphson implementation that uses Linear Least Squares
+    solution at every step to compute the descent direction. **WARNING**: This method is not
+    a robust solver for nonlinear least squares problems. The computed delta step might not
+    be the correct descent direction!
+
+## Example usage
+
+```julia
+using NonlinearSolve
+sol = solve(prob, LevenbergMarquardt())
+```
diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -42,6 +42,10 @@ features, but have a bit of overhead on very small problems.
     methods for high performance on large and sparse systems.
   - `TrustRegion()`: A Newton Trust Region dogleg method with swappable nonlinear solvers and
     autodiff methods for high performance on large and sparse systems.
+  - `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
+    improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
+    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
+    large-scale and numerically-difficult nonlinear systems.
 
 ### SimpleNonlinearSolve.jl
 

diff --git a/docs/src/tutorials/advanced.md b/docs/src/tutorials/advanced.md
@@ -278,12 +278,12 @@ For more information on the preconditioner interface, see the
 
 ## Speed up Jacobian computation with sparsity exploitation and matrix coloring
 
-To cut down the of Jacobian building overhead, we can choose to exploit the sparsity pattern and deploy matrix coloring during Jacobian construction. With NonlinearSolve.jl, we can simply use ```autodiff=AutoSparseForwardDiff()``` to automatically exploit the sparsity pattern of Jacobian matrices:
+To cut down the of Jacobian building overhead, we can choose to exploit the sparsity pattern and deploy matrix coloring during Jacobian construction. With NonlinearSolve.jl, we can simply use `autodiff=AutoSparseForwardDiff()` to automatically exploit the sparsity pattern of Jacobian matrices:
 
 ```@example ill_conditioned_nlprob
 @benchmark solve(prob_brusselator_2d,
-    NewtonRaphson(linsolve=KrylovJL_GMRES(), precs=incompletelu, concrete_jac=true,
-        autodiff=AutoSparseForwardDiff()));
+    NewtonRaphson(linsolve = KrylovJL_GMRES(), precs = incompletelu, concrete_jac = true,
+        autodiff = AutoSparseForwardDiff()));
 nothing # hide
 ```
 
@@ -292,11 +292,11 @@ To setup matrix coloring for the jacobian sparsity pattern, we can simply get th
 ```@example ill_conditioned_nlprob
 using ArrayInterface
 colorvec = ArrayInterface.matrix_colors(jac_sparsity)
-ff = NonlinearFunction(brusselator_2d_loop; jac_prototype=float.(jac_sparsity), colorvec)
+ff = NonlinearFunction(brusselator_2d_loop; jac_prototype = float.(jac_sparsity), colorvec)
 prob_brusselator_2d_sparse = NonlinearProblem(ff, u0, p)
 
 @benchmark solve(prob_brusselator_2d_sparse,
-    NewtonRaphson(linsolve=KrylovJL_GMRES(), precs=incompletelu, concrete_jac=true,
-        autodiff=AutoSparseForwardDiff()));
+    NewtonRaphson(linsolve = KrylovJL_GMRES(), precs = incompletelu, concrete_jac = true,
+        autodiff = AutoSparseForwardDiff()));
 nothing # hide
-```
+```
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
@@ -28,17 +28,43 @@ const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences,
 abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end
 abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end
 
-function SciMLBase.__solve(prob::NonlinearProblem, alg::AbstractNonlinearSolveAlgorithm,
-    args...; kwargs...)
+abstract type AbstractNonlinearSolveCache{iip} end
+
+isinplace(::AbstractNonlinearSolveCache{iip}) where {iip} = iip
+
+function SciMLBase.__solve(prob::Union{NonlinearProblem, NonlinearLeastSquaresProblem},
+    alg::AbstractNonlinearSolveAlgorithm, args...; kwargs...)
     cache = init(prob, alg, args...; kwargs...)
     return solve!(cache)
 end
 
+function not_terminated(cache::AbstractNonlinearSolveCache)
+    return !cache.force_stop && cache.stats.nsteps < cache.maxiters
+end
+get_fu(cache::AbstractNonlinearSolveCache) = cache.fu1
+
+function SciMLBase.solve!(cache::AbstractNonlinearSolveCache)
+    while not_terminated(cache)
+        perform_step!(cache)
+        cache.stats.nsteps += 1
+    end
+
+    if cache.stats.nsteps == cache.maxiters
+        cache.retcode = ReturnCode.MaxIters
+    else
+        cache.retcode = ReturnCode.Success
+    end
+
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, get_fu(cache);
+        cache.retcode, cache.stats)
+end
+
 include("utils.jl")
 include("linesearch.jl")
 include("raphson.jl")
 include("trustRegion.jl")
 include("levenberg.jl")
+include("gaussnewton.jl")
 include("jacobian.jl")
 include("ad.jl")
 
@@ -66,7 +92,7 @@ end
 
 export RadiusUpdateSchemes
 
-export NewtonRaphson, TrustRegion, LevenbergMarquardt
+export NewtonRaphson, TrustRegion, LevenbergMarquardt, GaussNewton
 
 export LineSearch
 

diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
@@ -0,0 +1,162 @@
+"""
+    GaussNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
+        adkwargs...)
+
+An advanced GaussNewton implementation with support for efficient handling of sparse
+matrices via colored automatic differentiation and preconditioned linear solvers. Designed
+for large-scale and numerically-difficult nonlinear least squares problems.
+
+!!! note
+    In most practical situations, users should prefer using `LevenbergMarquardt` instead! It
+    is a more general extension of `Gauss-Newton` Method.
+
+### Keyword Arguments
+
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+  - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+    then the Jacobian will not be constructed and instead direct Jacobian-vector products
+    `J*v` are computed using forward-mode automatic differentiation or finite differencing
+    tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+    for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+    the construction of the Jacobian.
+  - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+
+!!! warning
+
+    Jacobian-Free version of `GaussNewton` doesn't work yet, and it forces jacobian
+    construction. This will be fixed in the near future.
+"""
+@concrete struct GaussNewton{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
+end
+
+function GaussNewton(; concrete_jac = nothing, linsolve = NormalCholeskyFactorization(),
+    precs = DEFAULT_PRECS, adkwargs...)
+    ad = default_adargs_to_adtype(; adkwargs...)
+    return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs)
+end
+
+@concrete mutable struct GaussNewtonCache{iip} <: AbstractNonlinearSolveCache{iip}
+    f
+    alg
+    u
+    fu1
+    fu2
+    fu_new
+    du
+    p
+    uf
+    linsolve
+    J
+    JᵀJ
+    Jᵀf
+    jac_cache
+    force_stop
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+end
+
+function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg::GaussNewton,
+    args...; alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    if iip
+        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
+        f(fu1, u, p)
+    else
+        fu1 = f(u, p)
+    end
+    uf, linsolve, J, fu2, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p, Val(iip);
+        linsolve_with_JᵀJ = Val(true))
+
+    return GaussNewtonCache{iip}(f, alg, u, fu1, fu2, zero(fu1), du, p, uf, linsolve, J,
+        JᵀJ, Jᵀf, jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol,
+        prob, NLStats(1, 0, 0, 0, 0))
+end
+
+function perform_step!(cache::GaussNewtonCache{true})
+    @unpack u, fu1, f, p, alg, J, JᵀJ, Jᵀf, linsolve, du = cache
+    jacobian!!(J, cache)
+    mul!(JᵀJ, J', J)
+    mul!(Jᵀf, J', fu1)
+
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = JᵀJ, b = _vec(Jᵀf), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    @. u = u - du
+    f(cache.fu_new, u, p)
+
+    (cache.internalnorm(cache.fu_new .- cache.fu1) < cache.abstol ||
+     cache.internalnorm(cache.fu_new) < cache.abstol) &&
+        (cache.force_stop = true)
+    cache.fu1 .= cache.fu_new
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function perform_step!(cache::GaussNewtonCache{false})
+    @unpack u, fu1, f, p, alg, linsolve = cache
+
+    cache.J = jacobian!!(cache.J, cache)
+
+    cache.JᵀJ = cache.J' * cache.J
+    cache.Jᵀf = cache.J' * fu1
+    # u = u - J \ fu
+    if linsolve === nothing
+        cache.du = fu1 / cache.J
+    else
+        linres = dolinsolve(alg.precs, linsolve; A = cache.JᵀJ, b = _vec(cache.Jᵀf),
+            linu = _vec(cache.du), p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+    end
+    cache.u = @. u - cache.du  # `u` might not support mutation
+    cache.fu_new = f(cache.u, p)
+
+    (cache.internalnorm(cache.fu_new) < cache.abstol) && (cache.force_stop = true)
+    cache.fu1 = cache.fu_new
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function SciMLBase.reinit!(cache::GaussNewtonCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu1, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu1 = cache.f(cache.u, p)
+    end
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end