diff --git a/Project.toml b/Project.toml index 5016fe91..79ae11b3 100644 --- a/Project.toml +++ b/Project.toml @@ -26,7 +26,6 @@ Compat = "2.2, 3, 4" DataStructures = "0.17, 0.18" FiniteDiff = "2.8.1" ForwardDiff = "0.10" -Graphs = "1.4" Requires = "0.5, 1.0" StaticArrays = "1" VertexSafeGraphs = "0.2" diff --git a/README.md b/README.md index f53021c4..3511ac6a 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,35 @@ function, otherwise it will become a dense matrix. If `jac_prototype` and function has a *square* Jacobian matrix. If it is not the case, please specify the shape of output by giving `dx`. +Similar functionality is available for Hessians, using finite differences of forward derivatives. Given a scalar function `f(x)`, a vector value for `x`, +and a color vector and sparsity pattern, this can be accomplished using +`numauto_color_hessian` or its in-place form `numauto_color_hessian!`. + +```julia +H = numauto_color_hessian(fscalar, x, colorvec, sparsity) +numauto_color_hessian!(H, fscalar, x, colorvec, sparsity) +``` + +To avoid unnecessary allocations every time the Hessian is computed, +construct a `ForwardColorHesCache` beforehand: + +```julia +hescache = ForwardColorHesCache(f, x, colorvec, sparsity) +numauto_color_hessian!(H, f, x, hescache) +``` + +By default, these methods use a mix of numerical and automatic differentiation, +namely by taking finite differences of gradients calculated via ForwardDiff.jl. +Alternatively, if you have your own custom gradient function `g!`, you can specify +it as an argument to `ForwardColorHesCache`: + +```julia +hescache = ForwardColorHesCache(fscalar, x, colorvec, sparsity, g!) +``` +Note that any user-defined gradient needs to have the signature `g!(G, x)`, +i.e. updating the gradient `G` in place. + + ### Jacobian-Vector and Hessian-Vector Products Matrix-free implementations of Jacobian-Vector and Hessian-Vector products is diff --git a/src/SparseDiffTools.jl b/src/SparseDiffTools.jl index bcebff4e..b9ae599f 100644 --- a/src/SparseDiffTools.jl +++ b/src/SparseDiffTools.jl @@ -28,6 +28,9 @@ export contract_color, forwarddiff_color_jacobian!, forwarddiff_color_jacobian, ForwardColorJacCache, + numauto_color_hessian!, + numauto_color_hessian, + ForwardColorHesCache, auto_jacvec,auto_jacvec!, num_jacvec,num_jacvec!, num_vecjac,num_vecjac!, @@ -48,6 +51,7 @@ include("coloring/greedy_star1_coloring.jl") include("coloring/greedy_star2_coloring.jl") include("coloring/matrix2graph.jl") include("differentiation/compute_jacobian_ad.jl") +include("differentiation/compute_hessian_ad.jl") include("differentiation/jaches_products.jl") include("differentiation/vecjac_products.jl") diff --git a/src/differentiation/compute_hessian_ad.jl b/src/differentiation/compute_hessian_ad.jl new file mode 100644 index 00000000..f9a07183 --- /dev/null +++ b/src/differentiation/compute_hessian_ad.jl @@ -0,0 +1,98 @@ +struct ForwardColorHesCache{THS, THC, TI<:Integer, TD, TGF, TGC, TG} + sparsity::THS + colors::THC + ncolors::TI + D::TD + buffer::TD + grad!::TGF + grad_config::TGC + G1::TG + G2::TG +end + +function make_hessian_buffers(colorvec, x) + ncolors = maximum(colorvec) + D = hcat([float.(i .== colorvec) for i in 1:ncolors]...) + buffer = similar(D) + G1 = similar(x) + G2 = similar(x) + return (;ncolors, D, buffer, G1, G2) +end + +function ForwardColorHesCache(f, + x::AbstractVector{<:Number}, + colorvec::AbstractVector{<:Integer}=eachindex(x), + sparsity::Union{AbstractMatrix, Nothing}=nothing, + g! = (G, x, grad_config) -> ForwardDiff.gradient!(G, f, x, grad_config)) + ncolors, D, buffer, G, G2 = make_hessian_buffers(colorvec, x) + grad_config = ForwardDiff.GradientConfig(f, x) + + # If user supplied their own gradient function, make sure it has the right + # signature (i.e. g!(G, x) or g!(G, x, grad_config::ForwardDiff.GradientConfig)) + if ! hasmethod(g!, (typeof(G), typeof(G), typeof(grad_config))) + if ! hasmethod(g!, (typeof(G), typeof(G))) + throw(ArgumentError("Signature of `g!` must be either `g!(G, x)` or `g!(G, x, grad_config::ForwardDiff.GradientConfig)`")) + end + # define new method that takes a GradientConfig but doesn't use it + g1!(G, x, grad_config) = g!(G, x) + else + g1! = g! + end + + if sparsity === nothing + sparsity = sparse(ones(length(x), length(x))) + end + return ForwardColorHesCache(sparsity, colorvec, ncolors, D, buffer, g1!, grad_config, G, G2) +end + +function numauto_color_hessian!(H::AbstractMatrix{<:Number}, + f, + x::AbstractArray{<:Number}, + hes_cache::ForwardColorHesCache; + safe = true) + ϵ = cbrt(eps(eltype(x))) + for j in 1:hes_cache.ncolors + x .+= ϵ .* @view hes_cache.D[:, j] + hes_cache.grad!(hes_cache.G2, x, hes_cache.grad_config) + x .-= 2ϵ .* @view hes_cache.D[:, j] + hes_cache.grad!(hes_cache.G1, x, hes_cache.grad_config) + hes_cache.buffer[:, j] .= (hes_cache.G2 .- hes_cache.G1) ./ 2ϵ + x .+= ϵ .* @view hes_cache.D[:, j] #reset to original value + end + ii, jj, vv = findnz(hes_cache.sparsity) + if safe + fill!(H, false) + end + for (i, j) in zip(ii, jj) + H[i, j] = hes_cache.buffer[i, hes_cache.colors[j]] + end + return H +end + +function numauto_color_hessian!(H::AbstractMatrix{<:Number}, + f, + x::AbstractArray{<:Number}, + colorvec::AbstractVector{<:Integer}=eachindex(x), + sparsity::Union{AbstractMatrix, Nothing}=nothing) + hes_cache = ForwardColorHesCache(f, x, colorvec, sparsity) + numauto_color_hessian!(H, f, x, hes_cache) + return H +end + +function numauto_color_hessian(f, + x::AbstractArray{<:Number}, + hes_cache::ForwardColorHesCache) + H = convert.(eltype(x), hes_cache.sparsity) + numauto_color_hessian!(H, f, x, hes_cache) + return H +end + +function numauto_color_hessian(f, + x::AbstractArray{<:Number}, + colorvec::AbstractVector{<:Integer}=eachindex(x), + sparsity::Union{AbstractMatrix, Nothing}=nothing) + hes_cache = ForwardColorHesCache(f, x, colorvec, sparsity) + H = convert.(eltype(x), hes_cache.sparsity) + numauto_color_hessian!(H, f, x, hes_cache) + return H +end diff --git a/test/runtests.jl b/test/runtests.jl index d80e6e84..869c7878 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,6 +18,7 @@ if GROUP == "All" @time @safetestset "Acyclic coloring" begin include("test_acyclic.jl") end @time @safetestset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end @time @safetestset "AD using colorvec vector" begin include("test_ad.jl") end + @time @safetestset "Hessian colorvecs" begin include("test_sparse_hessian.jl") end @time @safetestset "Integration test" begin include("test_integration.jl") end @time @safetestset "Special matrices" begin include("test_specialmatrices.jl") end @time @safetestset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end diff --git a/test/test_sparse_hessian.jl b/test/test_sparse_hessian.jl new file mode 100644 index 00000000..d0308da1 --- /dev/null +++ b/test/test_sparse_hessian.jl @@ -0,0 +1,100 @@ +## Hessian tests +using SparsityDetection, SparseDiffTools +using ForwardDiff +using LinearAlgebra, SparseArrays + +function fscalar(x) + return -dot(x, x) + 2 * x[2] * x[3]^2 +end + +x = randn(5) +sparsity = hessian_sparsity(fscalar, x) +colors = matrix_colors(tril(sparsity)) +ncolors = maximum(colors) +D = hcat([float.(i .== colors) for i in 1:ncolors]...) +buffer = similar(D) +G1 = zero(x) +G2 = zero(x) + +buffers_tup = SparseDiffTools.make_hessian_buffers(colors, x) +@test buffers_tup.ncolors == ncolors +@test buffers_tup.D == D +@test size(buffers_tup.buffer) == size(buffer) +@test eltype(buffers_tup.buffer) == eltype(buffer) +@test typeof(buffers_tup.buffer) == typeof(buffer) +@test size(buffers_tup.G1) == size(G1) +@test eltype(buffers_tup.G1) == eltype(G1) +@test size(buffers_tup.G2) == size(G2) +@test eltype(buffers_tup.G2) == eltype(G2) + + +gconfig = ForwardDiff.GradientConfig(fscalar, x) +g(x) = ForwardDiff.gradient(fscalar, x) # allocating +g!(G, x, gconfig) = ForwardDiff.gradient!(G, fscalar, x, gconfig) # non-allocating + +hescache1 = ForwardColorHesCache(sparsity, colors, ncolors, D, buffer, g!, gconfig, G1, G2) +hescache2 = ForwardColorHesCache(fscalar, x, colors, sparsity, g!) +hescache3 = ForwardColorHesCache(fscalar, x, colors, sparsity) +# custom gradient function +hescache4 = ForwardColorHesCache(fscalar, x, colors, sparsity, + (G, x) -> ForwardDiff.gradient!(G, fscalar, x)) +hescache5 = ForwardColorHesCache(fscalar, x) +# custom gradient has to have 2 or 3 arguments... +@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a) -> 1.0) +@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a, b, c, d) -> 1.0) +# ...and needs to accept (Vector, Vector, ForwardDiff.GradientConfig) +@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a::Int, b::Int) -> 1.0,) +@test_throws ArgumentError ForwardColorHesCache(fscalar, x, colors, sparsity, (a::Int, b::Int, c::Int) -> 1.0) + +for name in [:sparsity, :colors, :ncolors, :D] + @eval @test hescache1.$name == hescache2.$name + @eval @test hescache1.$name == hescache3.$name + @eval @test hescache1.$name == hescache4.$name + # hescache5 is the default dense version, so only first axis will match + @eval @test size(hescache1.$name, 1) == size(hescache5.$name, 1) +end +for name in [:buffer, :G1, :G2] + @eval @test size(hescache1.$name) == size(hescache2.$name) + @eval @test size(hescache1.$name) == size(hescache3.$name) + @eval @test size(hescache1.$name) == size(hescache4.$name) + # hescache5 is the default dense version, so only first axis will match + @eval @test size(hescache1.$name, 1) == size(hescache5.$name, 1) + + @eval @test eltype(hescache1.$name) == eltype(hescache2.$name) + @eval @test eltype(hescache1.$name) == eltype(hescache3.$name) + @eval @test eltype(hescache1.$name) == eltype(hescache4.$name) + @eval @test eltype(hescache1.$name) == eltype(hescache5.$name) +end + +Hforward = ForwardDiff.hessian(fscalar, x) +for (i, hescache) in enumerate([hescache1, hescache2, hescache3, hescache4, hescache5]) + H = numauto_color_hessian(fscalar, x, colors, sparsity) + H1 = numauto_color_hessian(fscalar, x, hescache) + H2 = numauto_color_hessian(fscalar, x) + @test all(isapprox.(Hforward, H, rtol=1e-6)) + @test all(isapprox.(H, H1, rtol=1e-6)) + @test all(isapprox.(H2, H1, rtol=1e-6)) + + H1 = similar(H) + numauto_color_hessian!(H1, fscalar, x, collect(hescache.colors), hescache.sparsity) + @test all(isapprox.(H1, H)) + + numauto_color_hessian!(H2, fscalar, x) + @test all(isapprox.(H2, H)) + + numauto_color_hessian!(H1, fscalar, x, hescache) + @test all(isapprox.(H1, H)) + + numauto_color_hessian!(H1, fscalar, x, hescache, safe=false) + @test all(isapprox.(H1, H)) + + # the following tests usually pass, but once in a while don't (it's not a big difference + # in timing on these small matrices, and sometimes its less than the timing variability). + # Commenting out for now to avoid rare stochastic test failures. + # # confirm unsafe is faster + # t_safe = minimum(@elapsed(numauto_color_hessian!(H1, fscalar, x, hescache, safe=true)) + # for _ in 1:100) + # t_unsafe = minimum(@elapsed(numauto_color_hessian!(H1, fscalar, x, hescache, safe=false)) + # for _ in 1:100) + # @test t_unsafe <= t_safe +end