Merge pull request #190 from ElOceanografo/sparsehess

ChrisRackauckas · web-flow · commit 27a9600bd652 · 2022-07-30T13:44:22.000-04:00
Add sparse Hessian decompression
diff --git a/Project.toml b/Project.toml
@@ -26,7 +26,6 @@ Compat = "2.2, 3, 4"
 DataStructures = "0.17, 0.18"
 FiniteDiff = "2.8.1"
 ForwardDiff = "0.10"
-Graphs = "1.4"
 Requires = "0.5, 1.0"
 StaticArrays = "1"
 VertexSafeGraphs = "0.2"
diff --git a/README.md b/README.md
@@ -207,6 +207,35 @@ function, otherwise it will become a dense matrix. If `jac_prototype` and
 function has a *square* Jacobian matrix. If it is not the case, please specify
 the shape of output by giving `dx`.
 
+Similar functionality is available for Hessians, using finite differences of forward derivatives. Given a scalar function `f(x)`, a vector value for `x`,
+and a color vector and sparsity pattern, this can be accomplished using
+`numauto_color_hessian` or its in-place form `numauto_color_hessian!`.
+
+```julia
+H = numauto_color_hessian(fscalar, x, colorvec, sparsity)
+numauto_color_hessian!(H, fscalar, x, colorvec, sparsity)
+```
+
+To avoid unnecessary allocations every time the Hessian is computed, 
+construct a `ForwardColorHesCache` beforehand:
+
+```julia
+hescache = ForwardColorHesCache(f, x, colorvec, sparsity)
+numauto_color_hessian!(H, f, x, hescache)
+```
+
+By default, these methods use a mix of numerical and automatic differentiation,
+namely by taking finite differences of gradients calculated via ForwardDiff.jl.
+Alternatively, if you have your own custom gradient function `g!`, you can specify 
+it as an argument to `ForwardColorHesCache`:
+
+```julia
+hescache = ForwardColorHesCache(fscalar, x, colorvec, sparsity, g!)
+```
+Note that any user-defined gradient needs to have the signature `g!(G, x)`,
+i.e. updating the gradient `G` in place.
+
+
 ### Jacobian-Vector and Hessian-Vector Products
 
 Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is
diff --git a/src/SparseDiffTools.jl b/src/SparseDiffTools.jl
@@ -28,6 +28,9 @@ export  contract_color,
         forwarddiff_color_jacobian!,
         forwarddiff_color_jacobian,
         ForwardColorJacCache,
+        numauto_color_hessian!,
+        numauto_color_hessian,
+        ForwardColorHesCache,
         auto_jacvec,auto_jacvec!,
         num_jacvec,num_jacvec!,
         num_vecjac,num_vecjac!,
@@ -48,6 +51,7 @@ include("coloring/greedy_star1_coloring.jl")
 include("coloring/greedy_star2_coloring.jl")
 include("coloring/matrix2graph.jl")
 include("differentiation/compute_jacobian_ad.jl")
+include("differentiation/compute_hessian_ad.jl")
 include("differentiation/jaches_products.jl")
 include("differentiation/vecjac_products.jl")
 
diff --git a/src/differentiation/compute_hessian_ad.jl b/src/differentiation/compute_hessian_ad.jl
@@ -0,0 +1,98 @@
+struct ForwardColorHesCache{THS, THC, TI<:Integer, TD, TGF, TGC, TG}
+    sparsity::THS
+    colors::THC
+    ncolors::TI
+    D::TD
+    buffer::TD
+    grad!::TGF
+    grad_config::TGC
+    G1::TG
+    G2::TG
+end
+
+function make_hessian_buffers(colorvec, x)
+    ncolors = maximum(colorvec)
+    D = hcat([float.(i .== colorvec) for i in 1:ncolors]...)
+    buffer = similar(D)
+    G1 = similar(x)
+    G2 = similar(x)
+    return (;ncolors, D, buffer, G1, G2)
+end
+
+function ForwardColorHesCache(f, 
+                              x::AbstractVector{<:Number}, 
+                              colorvec::AbstractVector{<:Integer}=eachindex(x), 
+                              sparsity::Union{AbstractMatrix, Nothing}=nothing,
+                              g! = (G, x, grad_config) -> ForwardDiff.gradient!(G, f, x, grad_config))
+    ncolors, D, buffer, G, G2 = make_hessian_buffers(colorvec, x)
+    grad_config = ForwardDiff.GradientConfig(f, x)
+    
+    # If user supplied their own gradient function, make sure it has the right
+    # signature (i.e. g!(G, x) or g!(G, x, grad_config::ForwardDiff.GradientConfig))
+    if ! hasmethod(g!, (typeof(G), typeof(G), typeof(grad_config)))
+        if ! hasmethod(g!, (typeof(G), typeof(G)))
+            throw(ArgumentError("Signature of `g!` must be either `g!(G, x)` or `g!(G, x, grad_config::ForwardDiff.GradientConfig)`"))
+        end
+        # define new method that takes a GradientConfig but doesn't use it
+        g1!(G, x, grad_config) = g!(G, x)
+    else
+        g1! = g!
+    end
+    
+    if sparsity === nothing
+        sparsity = sparse(ones(length(x), length(x)))
+    end
+    return ForwardColorHesCache(sparsity, colorvec, ncolors, D, buffer, g1!, grad_config, G, G2)
+end
+
+function numauto_color_hessian!(H::AbstractMatrix{<:Number}, 
+                                    f, 
+                                    x::AbstractArray{<:Number}, 
+                                    hes_cache::ForwardColorHesCache;
+                                    safe = true)
+    ϵ = cbrt(eps(eltype(x)))
+    for j in 1:hes_cache.ncolors
+        x .+= ϵ .* @view hes_cache.D[:, j]
+        hes_cache.grad!(hes_cache.G2, x, hes_cache.grad_config)
+        x .-= 2ϵ .* @view hes_cache.D[:, j]
+        hes_cache.grad!(hes_cache.G1, x, hes_cache.grad_config)
+        hes_cache.buffer[:, j] .= (hes_cache.G2 .- hes_cache.G1) ./ 2ϵ
+        x .+= ϵ .* @view hes_cache.D[:, j] #reset to original value
+    end
+    ii, jj, vv = findnz(hes_cache.sparsity)
+    if safe
+        fill!(H, false)
+    end
+    for (i, j) in zip(ii, jj)
+        H[i, j] = hes_cache.buffer[i, hes_cache.colors[j]]
+    end
+    return H
+end
+
+function numauto_color_hessian!(H::AbstractMatrix{<:Number}, 
+                                    f, 
+                                    x::AbstractArray{<:Number},
+                                    colorvec::AbstractVector{<:Integer}=eachindex(x), 
+                                    sparsity::Union{AbstractMatrix, Nothing}=nothing)
+    hes_cache = ForwardColorHesCache(f, x, colorvec, sparsity)
+    numauto_color_hessian!(H, f, x, hes_cache)
+    return H
+end
+
+function numauto_color_hessian(f, 
+                                   x::AbstractArray{<:Number}, 
+                                   hes_cache::ForwardColorHesCache)
+    H = convert.(eltype(x), hes_cache.sparsity)
+    numauto_color_hessian!(H, f, x, hes_cache)
+    return H
+end
+
+function numauto_color_hessian(f,
+                                   x::AbstractArray{<:Number},
+                                   colorvec::AbstractVector{<:Integer}=eachindex(x), 
+                                   sparsity::Union{AbstractMatrix, Nothing}=nothing)
+    hes_cache = ForwardColorHesCache(f, x, colorvec, sparsity)
+    H = convert.(eltype(x), hes_cache.sparsity)
+    numauto_color_hessian!(H, f, x, hes_cache)
+    return H
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -18,6 +18,7 @@ if GROUP == "All"
     @time @safetestset "Acyclic coloring" begin include("test_acyclic.jl") end
     @time @safetestset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end
     @time @safetestset "AD using colorvec vector" begin include("test_ad.jl") end
+    @time @safetestset "Hessian colorvecs" begin include("test_sparse_hessian.jl") end
     @time @safetestset "Integration test" begin include("test_integration.jl") end
     @time @safetestset "Special matrices" begin include("test_specialmatrices.jl") end
     @time @safetestset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end
diff --git a/test/test_sparse_hessian.jl b/test/test_sparse_hessian.jl
@@ -0,0 +1,100 @@
+## Hessian tests
+using SparsityDetection, SparseDiffTools
+using ForwardDiff
+using LinearAlgebra, SparseArrays
+
+function fscalar(x)
+    return -dot(x, x) + 2 * x[2] * x[3]^2
+end
+
+x = randn(5)
+sparsity = hessian_sparsity(fscalar, x)
+colors = matrix_colors(tril(sparsity))
+ncolors = maximum(colors)
+D = hcat([float.(i .== colors) for i in 1:ncolors]...)
+buffer = similar(D)
+G1 = zero(x)
+G2 = zero(x)
+
+buffers_tup = SparseDiffTools.make_hessian_buffers(colors, x)
+@test buffers_tup.ncolors == ncolors
+@test buffers_tup.D == D
+@test size(buffers_tup.buffer) == size(buffer)
+@test eltype(buffers_tup.buffer) == eltype(buffer)
+@test typeof(buffers_tup.buffer) == typeof(buffer)
+@test size(buffers_tup.G1) == size(G1)
+@test eltype(buffers_tup.G1) == eltype(G1)
+@test size(buffers_tup.G2) == size(G2)
+@test eltype(buffers_tup.G2) == eltype(G2)
+
+
+gconfig = ForwardDiff.GradientConfig(fscalar, x)
+g(x) = ForwardDiff.gradient(fscalar, x)           # allocating
+g!(G, x, gconfig) = ForwardDiff.gradient!(G, fscalar, x, gconfig)   # non-allocating
+
+hescache1 = ForwardColorHesCache(sparsity, colors, ncolors, D, buffer, g!, gconfig, G1, G2)
+hescache2 = ForwardColorHesCache(fscalar, x, colors, sparsity, g!)
+hescache3 = ForwardColorHesCache(fscalar, x, colors, sparsity)
+# custom gradient function
+hescache4 = ForwardColorHesCache(fscalar, x, colors, sparsity,
+    (G, x) -> ForwardDiff.gradient!(G, fscalar, x))
+hescache5 = ForwardColorHesCache(fscalar, x)
+# custom gradient has to have 2 or 3 arguments...
+@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a) -> 1.0)
+@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a, b, c, d) -> 1.0)
+# ...and needs to accept (Vector, Vector, ForwardDiff.GradientConfig)
+@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a::Int, b::Int) -> 1.0,)
+@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a::Int, b::Int, c::Int) -> 1.0)
+
+for name in [:sparsity, :colors, :ncolors, :D]
+    @eval @test hescache1.$name == hescache2.$name
+    @eval @test hescache1.$name == hescache3.$name
+    @eval @test hescache1.$name == hescache4.$name
+    # hescache5 is the default dense version, so only first axis will match
+    @eval @test size(hescache1.$name, 1) == size(hescache5.$name, 1)
+end
+for name in [:buffer, :G1, :G2]
+    @eval @test size(hescache1.$name) == size(hescache2.$name)
+    @eval @test size(hescache1.$name) == size(hescache3.$name)
+    @eval @test size(hescache1.$name) == size(hescache4.$name)
+    # hescache5 is the default dense version, so only first axis will match
+    @eval @test size(hescache1.$name, 1) == size(hescache5.$name, 1)
+
+    @eval @test eltype(hescache1.$name) == eltype(hescache2.$name)
+    @eval @test eltype(hescache1.$name) == eltype(hescache3.$name)
+    @eval @test eltype(hescache1.$name) == eltype(hescache4.$name)
+    @eval @test eltype(hescache1.$name) == eltype(hescache5.$name)
+end
+
+Hforward = ForwardDiff.hessian(fscalar, x)
+for (i, hescache) in enumerate([hescache1, hescache2, hescache3, hescache4, hescache5])
+    H = numauto_color_hessian(fscalar, x, colors, sparsity)
+    H1 = numauto_color_hessian(fscalar, x, hescache)
+    H2 = numauto_color_hessian(fscalar, x)
+    @test all(isapprox.(Hforward, H, rtol=1e-6))
+    @test all(isapprox.(H, H1, rtol=1e-6))
+    @test all(isapprox.(H2, H1, rtol=1e-6))
+
+    H1 = similar(H)
+    numauto_color_hessian!(H1, fscalar, x, collect(hescache.colors), hescache.sparsity)
+    @test all(isapprox.(H1, H))
+
+    numauto_color_hessian!(H2, fscalar, x)
+    @test all(isapprox.(H2, H))
+
+    numauto_color_hessian!(H1, fscalar, x, hescache)
+    @test all(isapprox.(H1, H))
+
+    numauto_color_hessian!(H1, fscalar, x, hescache, safe=false)
+    @test all(isapprox.(H1, H))
+
+    # the following tests usually pass, but once in a while don't (it's not a big difference
+    # in timing on these small matrices, and sometimes its less than the timing variability).
+    # Commenting out for now to avoid rare stochastic test failures.
+    # # confirm unsafe is faster
+    # t_safe = minimum(@elapsed(numauto_color_hessian!(H1, fscalar, x, hescache, safe=true))
+    #     for _ in 1:100)
+    # t_unsafe = minimum(@elapsed(numauto_color_hessian!(H1, fscalar, x, hescache, safe=false))
+    #     for _ in 1:100)
+    # @test t_unsafe <= t_safe
+end