diff --git a/Project.toml b/Project.toml index 01be4b92..db7d4973 100644 --- a/Project.toml +++ b/Project.toml @@ -6,26 +6,24 @@ version = "0.4.1" [deps] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" -Cassette = "7057c7e9-c182-5462-911a-8362d720325c" DiffEqDiffTools = "01453d9d-ee7c-5054-8395-0335cb756afa" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Requires = "ae029012-a4dd-5104-9daa-d747884805df" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" VertexSafeGraphs = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] julia = "1" [extras] -DiffEqDiffTools = "01453d9d-ee7c-5054-8395-0335cb756afa" +Cassette = "7057c7e9-c182-5462-911a-8362d720325c" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "DiffEqDiffTools", "IterativeSolvers", "Random"] +test = ["Test", "Cassette", "IterativeSolvers", "Random", "SafeTestsets", "Zygote"] diff --git a/README.md b/README.md index 17b49c8b..c0913018 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,12 @@ This package is for exploiting sparsity in Jacobians and Hessians to accelerate computations. Matrix-free Jacobian-vector product and Hessian-vector product operators are provided that are compatible with AbstractMatrix-based libraries -like IterativeSolvers.jl for easy and efficient Newton-Krylov implementation. -Automatic and numerical differentiation are utilized and optional. In addition, -the ability to automatically detect the sparsity of a function, perform matrix -coloring, and utilize coloring in Jacobian and Hessian construction is provided. +like IterativeSolvers.jl for easy and efficient Newton-Krylov implementation. It is +possible to perform matrix coloring, and utilize coloring in Jacobian and Hessian +construction. + +Optionally, automatic and numerical differentiation are utilized and the ability to +automatically detect the sparsity of a function is provided. ## Example @@ -31,11 +33,13 @@ end For this function, we know that the sparsity pattern of the Jacobian is a `Tridiagonal` matrix. However, if we didn't know the sparsity pattern for the Jacobian, we could use the `sparsity!` function to automatically -detect the sparsity pattern. We declare that it outputs a length 30 vector -and takes in a length 30 vector, and it spits out a `Sparsity` object -which we can turn into a `SparseMatrixCSC`: +detect the sparsity pattern. This function is only available if you +load Cassette.jl as well. We declare that the function `f` outputs a +vector of length 30 and takes in a vector of length 30, and `sparsity!` spits +out a `Sparsity` object which we can turn into a `SparseMatrixCSC`: ```julia +using Cassette sparsity_pattern = sparsity!(f,output,input) jac = Float64.(sparse(sparsity_pattern)) ``` @@ -88,36 +92,6 @@ gmres!(res,J,v) ## Documentation -### Automated Sparsity Detection - -Automated sparsity detection is provided by the `sparsity!` function whose -syntax is: - -```julia -`sparsity!(f, Y, X, args...; sparsity=Sparsity(length(X), length(Y)), verbose=true)` -``` - -The arguments are: - -- `f`: the function -- `Y`: the output array -- `X`: the input array -- `args`: trailing arguments to `f`. They are considered subject to change, unless wrapped as `Fixed(arg)` -- `S`: (optional) the sparsity pattern -- `verbose`: (optional) whether to describe the paths taken by the sparsity detection. - -The function `f` is assumed to take arguments of the form `f(dx,x,args...)`. -`sparsity!` returns a `Sparsity` object which describes where the non-zeros -of the Jacobian occur. `sparse(::Sparsity)` transforms the pattern into -a sparse matrix. - -This function utilizes non-standard interpretation, which we denote -combinatoric concolic analysis, to directly realize the sparsity pattern from the program's AST. It requires that the function `f` is a Julia function. It does not -work numerically, meaning that it is not prone to floating point error or -cancelation. It allows for branching and will automatically check all of the -branches. However, a while loop of indeterminate length which is dependent -on the input argument is not allowed. - ### Matrix Coloring Matrix coloring allows you to reduce the number of times finite differencing @@ -230,29 +204,8 @@ autonum_hesvec!(du,f,x,v, cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) autonum_hesvec(f,x,v) - - -numback_hesvec!(du,f,x,v, - cache1 = similar(v), - cache2 = similar(v)) - -numback_hesvec(f,x,v) - -# Currently errors! See https://github.com/FluxML/Zygote.jl/issues/241 -autoback_hesvec!(du,f,x,v, - cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), - cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) - -autoback_hesvec(f,x,v) ``` -`numauto` and `autonum` both mix numerical and automatic differentiation, with -the former almost always being more efficient and is thus recommended. `numback` and -`autoback` methods are numerical/ForwardDiff over reverse mode automatic differentiation -respectively, where the reverse-mode AD is provided by Zygote.jl. Currently these methods -are not competitive against `numauto`, but as Zygote.jl gets optimized these will likely -be the fastest. - In addition, the following forms allow you to provide a gradient function `g(dx,x)` or `dx=g(x)` respectively: @@ -271,6 +224,32 @@ auto_hesvecgrad!(du,g,x,v, auto_hesvecgrad(g,x,v) ``` +The `numauto` and `autonum` methods both mix numerical and automatic differentiation, with +the former almost always being more efficient and thus being recommended. + +Optionally, if you load Zygote.jl, the following `numback` +and `autoback` methods are available and allow numerical/ForwardDiff over reverse mode +automatic differentiation respectively, where the reverse-mode AD is provided by Zygote.jl. +Currently these methods are not competitive against `numauto`, but as Zygote.jl gets +optimized these will likely be the fastest. + +```julia +using Zygote # Required + +numback_hesvec!(du,f,x,v, + cache1 = similar(v), + cache2 = similar(v)) + +numback_hesvec(f,x,v) + +# Currently errors! See https://github.com/FluxML/Zygote.jl/issues/241 +autoback_hesvec!(du,f,x,v, + cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), + cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) + +autoback_hesvec(f,x,v) +``` + #### J*v and H*v Operators The following produce matrix-free operators which are used for calculating @@ -287,3 +266,33 @@ These all have the same interface, where `J*v` utilizes the out-of-place Jacobian-vector or Hessian-vector function, whereas `mul!(res,J,v)` utilizes the appropriate in-place versions. To update the location of differentiation in the operator, simply mutate the vector `u`: `J.u .= ...`. + +### Automated Sparsity Detection + +Automated sparsity detection is provided by the `sparsity!` function. This requires +`using Cassette` for Requires. The syntax is: + +```julia +`sparsity!(f, Y, X, args...; sparsity=Sparsity(length(X), length(Y)), verbose=true)` +``` + +The arguments are: + +- `f`: the function +- `Y`: the output array +- `X`: the input array +- `args`: trailing arguments to `f`. They are considered subject to change, unless wrapped as `Fixed(arg)` +- `S`: (optional) the sparsity pattern +- `verbose`: (optional) whether to describe the paths taken by the sparsity detection. + +The function `f` is assumed to take arguments of the form `f(dx,x,args...)`. +`sparsity!` returns a `Sparsity` object which describes where the non-zeros +of the Jacobian occur. `sparse(::Sparsity)` transforms the pattern into +a sparse matrix. + +This function utilizes non-standard interpretation, which we denote +combinatoric concolic analysis, to directly realize the sparsity pattern from the program's AST. It requires that the function `f` is a Julia function. It does not +work numerically, meaning that it is not prone to floating point error or +cancelation. It allows for branching and will automatically check all of the +branches. However, a while loop of indeterminate length which is dependent +on the input argument is not allowed. diff --git a/appveyor.yml b/appveyor.yml index a5091374..2fd7f769 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ environment: matrix: - - julia_version: 1 + - julia_version: 1.2 - julia_version: nightly platform: diff --git a/src/SparseDiffTools.jl b/src/SparseDiffTools.jl index b2b55c0a..05fc62a9 100644 --- a/src/SparseDiffTools.jl +++ b/src/SparseDiffTools.jl @@ -1,16 +1,19 @@ module SparseDiffTools -using SparseArrays, LinearAlgebra, BandedMatrices, BlockBandedMatrices, - LightGraphs, VertexSafeGraphs, DiffEqDiffTools, ForwardDiff, Zygote, - SparseArrays -using BlockBandedMatrices:blocksize,nblocks +using BandedMatrices +using BlockBandedMatrices +using DiffEqDiffTools +using ForwardDiff +using LightGraphs +using Requires +using VertexSafeGraphs + +using LinearAlgebra +using SparseArrays + +using BlockBandedMatrices: blocksize, nblocks using ForwardDiff: Dual, jacobian, partials, DEFAULT_CHUNK_THRESHOLD -using Requires -using Cassette -import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse -import Cassette: tagged_new_tuple, ContextTagged, BindingMeta, DisableHooks, nametype -import Core: SSAValue export contract_color, greedy_d1, @@ -27,10 +30,7 @@ export contract_color, autonum_hesvec,autonum_hesvec!, num_hesvecgrad,num_hesvecgrad!, auto_hesvecgrad,auto_hesvecgrad!, - numback_hesvec,numback_hesvec!, - autoback_hesvec,autoback_hesvec!, - JacVec,HesVec,HesVecGrad, - Sparsity, sparsity!, hsparsity + JacVec,HesVec,HesVecGrad include("coloring/high_level.jl") @@ -41,8 +41,17 @@ include("coloring/greedy_star2_coloring.jl") include("coloring/matrix2graph.jl") include("differentiation/compute_jacobian_ad.jl") include("differentiation/jaches_products.jl") + function __init__() @require Cassette="7057c7e9-c182-5462-911a-8362d720325c" begin + using .Cassette + using .Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse + using .Cassette: tagged_new_tuple, ContextTagged, BindingMeta, DisableHooks, nametype + + using Core: SSAValue + + export Sparsity, hsparsity, sparsity! + include("program_sparsity/program_sparsity.jl") include("program_sparsity/sparsity_tracker.jl") include("program_sparsity/path.jl") @@ -51,6 +60,18 @@ function __init__() include("program_sparsity/linearity.jl") include("program_sparsity/hessian.jl") include("program_sparsity/blas.jl") + + @require SpecialFunctions="276daf66-3868-5448-9aa4-cd146d93841b" begin + using .SpecialFunctions + + include("program_sparsity/linearity_special.jl") + end + end + + @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin + export numback_hesvec, numback_hesvec!, autoback_hesvec, autoback_hesvec! + + include("differentiation/jaches_products_zygote.jl") end end diff --git a/src/differentiation/jaches_products.jl b/src/differentiation/jaches_products.jl index 17b7776f..ea381544 100644 --- a/src/differentiation/jaches_products.jl +++ b/src/differentiation/jaches_products.jl @@ -1,287 +1,243 @@ -struct DeivVecTag end - -# J(f(x))*v -function auto_jacvec!(du, f, x, v, - cache1 = ForwardDiff.Dual{DeivVecTag}.(x, v), - cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v)) - cache1 .= Dual{DeivVecTag}.(x, v) - f(cache2,cache1) - du .= partials.(cache2, 1) -end -function auto_jacvec(f, x, v) - partials.(f(Dual{DeivVecTag}.(x, v)), 1) -end - -function num_jacvec!(du,f,x,v,cache1 = similar(v), - cache2 = similar(v); - compute_f0 = true) - compute_f0 && (f(cache1,x)) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ*v - f(cache2,x) - @. x -= ϵ*v - @. du = (cache2 - cache1)/ϵ -end - -function num_jacvec(f,x,v,f0=nothing) - f0 === nothing ? _f0 = f(x) : _f0 = f0 - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(minimum(x))) - (f(x.+ϵ.*v) .- f(x))./ϵ -end - -function num_hesvec!(du,f,x,v, - cache1 = similar(v), - cache2 = similar(v), - cache3 = similar(v)) - cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central}) - g = let f=f,cache=cache - (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache) - end - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ*v - g(cache2,x) - @. x -= 2ϵ*v - g(cache3,x) - @. du = (cache2 - cache3)/(2ϵ) -end - -function num_hesvec(f,x,v) - g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - x += ϵ*v - gxp = g(x) - x -= 2ϵ*v - gxm = g(x) - (gxp - gxm)/(2ϵ) -end - -function numauto_hesvec!(du,f,x,v, - cache = ForwardDiff.GradientConfig(f,v), - cache1 = similar(v), - cache2 = similar(v)) - g = let f=f,x=x,cache=cache - g = (dx,x) -> ForwardDiff.gradient!(dx,f,x,cache) - end - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ*v - g(cache1,x) - @. x -= 2ϵ*v - g(cache2,x) - @. du = (cache1 - cache2)/(2ϵ) -end - -function numauto_hesvec(f,x,v) - g = (x) -> ForwardDiff.gradient(f,x) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - x += ϵ*v - gxp = g(x) - x -= 2ϵ*v - gxm = g(x) - (gxp - gxm)/(2ϵ) -end - -function numback_hesvec!(du,f,x,v, - cache1 = similar(v), - cache2 = similar(v)) - g = let f=f - g = (dx,x) -> dx .= first(Zygote.gradient(f,x)) - end - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ*v - g(cache1,x) - @. x -= 2ϵ*v - g(cache2,x) - @. du = (cache1 - cache2)/(2ϵ) -end - -function numback_hesvec(f,x,v) - g = (x) -> first(Zygote.gradient(f,x)) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - x += ϵ*v - gxp = g(x) - x -= 2ϵ*v - gxm = g(x) - (gxp - gxm)/(2ϵ) -end - -function autonum_hesvec!(du,f,x,v, - cache1 = similar(v), - cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), - cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) - cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central}) - g = (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache) - cache2 .= Dual{DeivVecTag}.(x, v) - g(cache3,cache2) - du .= partials.(cache3, 1) -end - -function autonum_hesvec(f,x,v) - g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x) - partials.(g(Dual{DeivVecTag}.(x, v)), 1) -end - -function autoback_hesvec!(du,f,x,v, - cache2 = ForwardDiff.Dual{Nothing}.(x, v), - cache3 = ForwardDiff.Dual{Nothing}.(x, v)) - g = let f=f - g = (dx,x) -> dx .= first(Zygote.gradient(f,x)) - end - cache2 .= Dual{Nothing}.(x, v) - g(cache3,cache2) - du .= partials.(cache3, 1) -end - -function autoback_hesvec(f,x,v) - g = (x) -> first(Zygote.gradient(f,x)) - ForwardDiff.partials.(g(ForwardDiff.Dual{Nothing}.(x, v)), 1) -end - -function num_hesvecgrad!(du,g,x,v, - cache2 = similar(v), - cache3 = similar(v)) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - @. x += ϵ*v - g(cache2,x) - @. x -= 2ϵ*v - g(cache3,x) - @. du = (cache2 - cache3)/(2ϵ) -end - -function num_hesvecgrad(g,x,v) - T = eltype(x) - # Should it be min? max? mean? - ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) - x += ϵ*v - gxp = g(x) - x -= 2ϵ*v - gxm = g(x) - (gxp - gxm)/(2ϵ) -end - -function auto_hesvecgrad!(du,g,x,v, - cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), - cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) - cache2 .= Dual{DeivVecTag}.(x, v) - g(cache3,cache2) - du .= partials.(cache3, 1) -end - -function auto_hesvecgrad(g,x,v) - partials.(g(Dual{DeivVecTag}.(x, v)), 1) -end - -### Operator Forms - -struct JacVec{F,T1,T2,uType} - f::F - cache1::T1 - cache2::T2 - u::uType - autodiff::Bool -end - -function JacVec(f,u::AbstractArray;autodiff=true) - if autodiff - cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u) - cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u) - else - cache1 = similar(u) - cache2 = similar(u) - end - JacVec(f,cache1,cache2,u,autodiff) -end - -Base.size(L::JacVec) = (length(L.cache1),length(L.cache1)) -Base.size(L::JacVec,i::Int) = length(L.cache1) -Base.:*(L::JacVec,x::AbstractVector) = L.autodiff ? auto_jacvec(_u->L.f(_u),L.u,x) : num_jacvec(_u->L.f(_u),L.u,x) - -function LinearAlgebra.mul!(du::AbstractVector,L::JacVec,v::AbstractVector) - if L.autodiff - auto_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2) - else - num_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2) - end -end - -struct HesVec{F,T1,T2,uType} - f::F - cache1::T1 - cache2::T2 - cache3::T2 - u::uType - autodiff::Bool -end - -function HesVec(f,u::AbstractArray;autodiff=true) - if autodiff - cache1 = ForwardDiff.GradientConfig(f,u) - cache2 = similar(u) - cache3 = similar(u) - else - cache1 = similar(u) - cache2 = similar(u) - cache3 = similar(u) - end - HesVec(f,cache1,cache2,cache3,u,autodiff) -end - -Base.size(L::HesVec) = (length(L.cache2),length(L.cache2)) -Base.size(L::HesVec,i::Int) = length(L.cache2) -Base.:*(L::HesVec,x::AbstractVector) = L.autodiff ? numauto_hesvec(L.f,L.u,x) : num_hesvec(L.f,L.u,x) - -function LinearAlgebra.mul!(du::AbstractVector,L::HesVec,v::AbstractVector) - if L.autodiff - numauto_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3) - else - num_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3) - end -end - -struct HesVecGrad{G,T1,T2,uType} - g::G - cache1::T1 - cache2::T2 - u::uType - autodiff::Bool -end - -function HesVecGrad(g,u::AbstractArray;autodiff=false) - if autodiff - cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u) - cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u) - else - cache1 = similar(u) - cache2 = similar(u) - end - HesVecGrad(g,cache1,cache2,u,autodiff) -end - -Base.size(L::HesVecGrad) = (length(L.cache2),length(L.cache2)) -Base.size(L::HesVecGrad,i::Int) = length(L.cache2) -Base.:*(L::HesVecGrad,x::AbstractVector) = L.autodiff ? auto_hesvecgrad(L.g,L.u,x) : num_hesvecgrad(L.g,L.u,x) - -function LinearAlgebra.mul!(du::AbstractVector,L::HesVecGrad,v::AbstractVector) - if L.autodiff - auto_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2) - else - num_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2) - end -end +struct DeivVecTag end + +# J(f(x))*v +function auto_jacvec!(du, f, x, v, + cache1 = ForwardDiff.Dual{DeivVecTag}.(x, v), + cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v)) + cache1 .= Dual{DeivVecTag}.(x, v) + f(cache2,cache1) + du .= partials.(cache2, 1) +end +function auto_jacvec(f, x, v) + partials.(f(Dual{DeivVecTag}.(x, v)), 1) +end + +function num_jacvec!(du,f,x,v,cache1 = similar(v), + cache2 = similar(v); + compute_f0 = true) + compute_f0 && (f(cache1,x)) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + @. x += ϵ*v + f(cache2,x) + @. x -= ϵ*v + @. du = (cache2 - cache1)/ϵ +end + +function num_jacvec(f,x,v,f0=nothing) + f0 === nothing ? _f0 = f(x) : _f0 = f0 + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(minimum(x))) + (f(x.+ϵ.*v) .- f(x))./ϵ +end + +function num_hesvec!(du,f,x,v, + cache1 = similar(v), + cache2 = similar(v), + cache3 = similar(v)) + cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central}) + g = let f=f,cache=cache + (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache) + end + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + @. x += ϵ*v + g(cache2,x) + @. x -= 2ϵ*v + g(cache3,x) + @. du = (cache2 - cache3)/(2ϵ) +end + +function num_hesvec(f,x,v) + g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + x += ϵ*v + gxp = g(x) + x -= 2ϵ*v + gxm = g(x) + (gxp - gxm)/(2ϵ) +end + +function numauto_hesvec!(du,f,x,v, + cache = ForwardDiff.GradientConfig(f,v), + cache1 = similar(v), + cache2 = similar(v)) + g = let f=f,x=x,cache=cache + g = (dx,x) -> ForwardDiff.gradient!(dx,f,x,cache) + end + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + @. x += ϵ*v + g(cache1,x) + @. x -= 2ϵ*v + g(cache2,x) + @. du = (cache1 - cache2)/(2ϵ) +end + +function numauto_hesvec(f,x,v) + g = (x) -> ForwardDiff.gradient(f,x) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + x += ϵ*v + gxp = g(x) + x -= 2ϵ*v + gxm = g(x) + (gxp - gxm)/(2ϵ) +end + +function autonum_hesvec!(du,f,x,v, + cache1 = similar(v), + cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), + cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) + cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central}) + g = (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache) + cache2 .= Dual{DeivVecTag}.(x, v) + g(cache3,cache2) + du .= partials.(cache3, 1) +end + +function autonum_hesvec(f,x,v) + g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x) + partials.(g(Dual{DeivVecTag}.(x, v)), 1) +end + +function num_hesvecgrad!(du,g,x,v, + cache2 = similar(v), + cache3 = similar(v)) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + @. x += ϵ*v + g(cache2,x) + @. x -= 2ϵ*v + g(cache3,x) + @. du = (cache2 - cache3)/(2ϵ) +end + +function num_hesvecgrad(g,x,v) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + x += ϵ*v + gxp = g(x) + x -= 2ϵ*v + gxm = g(x) + (gxp - gxm)/(2ϵ) +end + +function auto_hesvecgrad!(du,g,x,v, + cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v), + cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v)) + cache2 .= Dual{DeivVecTag}.(x, v) + g(cache3,cache2) + du .= partials.(cache3, 1) +end + +function auto_hesvecgrad(g,x,v) + partials.(g(Dual{DeivVecTag}.(x, v)), 1) +end + +### Operator Forms + +struct JacVec{F,T1,T2,uType} + f::F + cache1::T1 + cache2::T2 + u::uType + autodiff::Bool +end + +function JacVec(f,u::AbstractArray;autodiff=true) + if autodiff + cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u) + cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u) + else + cache1 = similar(u) + cache2 = similar(u) + end + JacVec(f,cache1,cache2,u,autodiff) +end + +Base.size(L::JacVec) = (length(L.cache1),length(L.cache1)) +Base.size(L::JacVec,i::Int) = length(L.cache1) +Base.:*(L::JacVec,x::AbstractVector) = L.autodiff ? auto_jacvec(_u->L.f(_u),L.u,x) : num_jacvec(_u->L.f(_u),L.u,x) + +function LinearAlgebra.mul!(du::AbstractVector,L::JacVec,v::AbstractVector) + if L.autodiff + auto_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2) + else + num_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2) + end +end + +struct HesVec{F,T1,T2,uType} + f::F + cache1::T1 + cache2::T2 + cache3::T2 + u::uType + autodiff::Bool +end + +function HesVec(f,u::AbstractArray;autodiff=true) + if autodiff + cache1 = ForwardDiff.GradientConfig(f,u) + cache2 = similar(u) + cache3 = similar(u) + else + cache1 = similar(u) + cache2 = similar(u) + cache3 = similar(u) + end + HesVec(f,cache1,cache2,cache3,u,autodiff) +end + +Base.size(L::HesVec) = (length(L.cache2),length(L.cache2)) +Base.size(L::HesVec,i::Int) = length(L.cache2) +Base.:*(L::HesVec,x::AbstractVector) = L.autodiff ? numauto_hesvec(L.f,L.u,x) : num_hesvec(L.f,L.u,x) + +function LinearAlgebra.mul!(du::AbstractVector,L::HesVec,v::AbstractVector) + if L.autodiff + numauto_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3) + else + num_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3) + end +end + +struct HesVecGrad{G,T1,T2,uType} + g::G + cache1::T1 + cache2::T2 + u::uType + autodiff::Bool +end + +function HesVecGrad(g,u::AbstractArray;autodiff=false) + if autodiff + cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u) + cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u) + else + cache1 = similar(u) + cache2 = similar(u) + end + HesVecGrad(g,cache1,cache2,u,autodiff) +end + +Base.size(L::HesVecGrad) = (length(L.cache2),length(L.cache2)) +Base.size(L::HesVecGrad,i::Int) = length(L.cache2) +Base.:*(L::HesVecGrad,x::AbstractVector) = L.autodiff ? auto_hesvecgrad(L.g,L.u,x) : num_hesvecgrad(L.g,L.u,x) + +function LinearAlgebra.mul!(du::AbstractVector,L::HesVecGrad,v::AbstractVector) + if L.autodiff + auto_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2) + else + num_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2) + end +end diff --git a/src/differentiation/jaches_products_zygote.jl b/src/differentiation/jaches_products_zygote.jl new file mode 100644 index 00000000..17eb2123 --- /dev/null +++ b/src/differentiation/jaches_products_zygote.jl @@ -0,0 +1,40 @@ +function numback_hesvec!(du, f, x, v, cache1 = similar(v), cache2 = similar(v)) + g = let f=f + (dx, x) -> dx .= first(Zygote.gradient(f,x)) + end + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + @. x += ϵ*v + g(cache1,x) + @. x -= 2ϵ*v + g(cache2,x) + @. du = (cache1 - cache2)/(2ϵ) +end + +function numback_hesvec(f, x, v) + g = x -> first(Zygote.gradient(f,x)) + T = eltype(x) + # Should it be min? max? mean? + ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x))) + x += ϵ*v + gxp = g(x) + x -= 2ϵ*v + gxm = g(x) + (gxp - gxm)/(2ϵ) +end + +function autoback_hesvec!(du, f, x, v, cache2 = ForwardDiff.Dual{Nothing}.(x, v), + cache3 = ForwardDiff.Dual{Nothing}.(x, v)) + g = let f=f + (dx, x) -> dx .= first(Zygote.gradient(f,x)) + end + cache2 .= Dual{Nothing}.(x, v) + g(cache3,cache2) + du .= partials.(cache3, 1) +end + +function autoback_hesvec(f, x, v) + g = x -> first(Zygote.gradient(f,x)) + ForwardDiff.partials.(g(ForwardDiff.Dual{Nothing}.(x, v)), 1) +end diff --git a/src/program_sparsity/blas.jl b/src/program_sparsity/blas.jl index c031fb50..35151511 100644 --- a/src/program_sparsity/blas.jl +++ b/src/program_sparsity/blas.jl @@ -1,6 +1,3 @@ -using LinearAlgebra -import LinearAlgebra.BLAS - # generic implementations macro reroute(f, g) @@ -19,7 +16,7 @@ macro reroute(f, g) end end -@reroute BLAS.dot dot(Any, Any) -@reroute BLAS.axpy! axpy!(Any, - AbstractArray, - AbstractArray) +@reroute LinearAlgebra.BLAS.dot LinearAlgebra.dot(Any, Any) +@reroute LinearAlgebra.BLAS.axpy! LinearAlgebra.axpy!(Any, + AbstractArray, + AbstractArray) diff --git a/src/program_sparsity/hessian.jl b/src/program_sparsity/hessian.jl index 8d5734b9..0bcbbdb9 100644 --- a/src/program_sparsity/hessian.jl +++ b/src/program_sparsity/hessian.jl @@ -1,8 +1,3 @@ -using Cassette -import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse -import Core: SSAValue -using SparseArrays - # Tags: Cassette.@context HessianSparsityContext diff --git a/src/program_sparsity/linearity.jl b/src/program_sparsity/linearity.jl index f1d9ffb3..363f65f0 100644 --- a/src/program_sparsity/linearity.jl +++ b/src/program_sparsity/linearity.jl @@ -1,19 +1,14 @@ -using SpecialFunctions -import Base.Broadcast - const constant_funcs = [] const monadic_linear = [deg2rad, +, rad2deg, transpose, -, conj] -const monadic_nonlinear = [asind, log1p, acsch, erfc, digamma, acos, asec, acosh, airybiprime, acsc, cscd, log, tand, log10, csch, asinh, airyai, abs2, gamma, lgamma, erfcx, bessely0, cosh, sin, cos, atan, cospi, cbrt, acosd, bessely1, acoth, erfcinv, erf, dawson, inv, acotd, airyaiprime, erfinv, trigamma, asecd, besselj1, exp, acot, sqrt, sind, sinpi, asech, log2, tan, invdigamma, airybi, exp10, sech, erfi, coth, asin, cotd, cosd, sinh, abs, besselj0, csc, tanh, secd, atand, sec, acscd, cot, exp2, expm1, atanh] +const monadic_nonlinear = [asind, log1p, acsch, acos, asec, acosh, acsc, cscd, log, tand, log10, csch, asinh, abs2, cosh, sin, cos, atan, cospi, cbrt, acosd, acoth, inv, acotd, asecd, exp, acot, sqrt, sind, sinpi, asech, log2, tan, exp10, sech, coth, asin, cotd, cosd, sinh, abs, csc, tanh, secd, atand, sec, acscd, cot, exp2, expm1, atanh] -diadic_of_linearity(::Val{(true, true, true)}) = [+, rem2pi, -, >, isless, <, isequal, max, min, convert] -diadic_of_linearity(::Val{(true, true, false)}) = [*] -#diadic_of_linearit(::(Val{(true, false, true)}) = [besselk, hankelh2, bessely, besselj, besseli, polygamma, hankelh1] -diadic_of_linearity(::Val{(true, false, false)}) = [/] -diadic_of_linearity(::Val{(false, true, false)}) = [\] -diadic_of_linearity(::Val{(false, false, false)}) = [hypot, atan, mod, rem, lbeta, ^, beta] -diadic_of_linearity(::Val) = [] +const diadic_linear_true_true_true = [+, rem2pi, -, >, isless, <, isequal, max, min, convert] +const diadic_linear_true_true_false = [*] +const diadic_linear_true_false_false = [ / ] +const diadic_linear_false_true_false = [ \ ] +const diadic_linear_false_false_false = [hypot, atan, mod, rem, ^] haslinearity(f, nargs) = false @@ -27,34 +22,57 @@ for f in constant_funcs end # linearity of a single input function is either -# Val{true}() or Val{false}() +# Val(true) or Val(false} # for f in monadic_linear @eval begin haslinearity(::typeof($f), ::Val{1}) = true - linearity(::typeof($f), ::Val{1}) = Val{true}() + linearity(::typeof($f), ::Val{1}) = Val(true) end end + +for f in monadic_nonlinear + @eval begin + haslinearity(::typeof($f), ::Val{1}) = true + linearity(::typeof($f), ::Val{1}) = Val(false) + end +end + # linearity of a 2-arg function is: -# Val{(linear11, linear22, linear12)}() +# Val((linear11, linear22, linear12)) # # linearIJ refers to the zeroness of d^2/dxIxJ -for f in monadic_nonlinear +for f in diadic_linear_true_true_true @eval begin - haslinearity(::typeof($f), ::Val{1}) = true - linearity(::typeof($f), ::Val{1}) = Val{false}() + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((true, true, true)) + end +end + +for f in diadic_linear_true_true_false + @eval begin + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((true, true, false)) end end -for linearity_mask = 0:2^3-1 - lin = Val{map(x->x!=0, (linearity_mask & 4, - linearity_mask & 2, - linearity_mask & 1))}() +for f in diadic_linear_true_false_false + @eval begin + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((true, false, false)) + end +end - for f in diadic_of_linearity(lin) - @eval begin - haslinearity(::typeof($f), ::Val{2}) = true - linearity(::typeof($f), ::Val{2}) = $lin - end +for f in diadic_linear_false_true_false + @eval begin + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((false, true, false)) + end +end + +for f in diadic_linear_false_false_false + @eval begin + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((false, false, false)) end end diff --git a/src/program_sparsity/linearity_special.jl b/src/program_sparsity/linearity_special.jl new file mode 100644 index 00000000..c823fd30 --- /dev/null +++ b/src/program_sparsity/linearity_special.jl @@ -0,0 +1,29 @@ +const monadic_nonlinear_special = [airyai, airyaiprime, airybi, airybiprime, besselj0, besselj1, bessely0, bessely1, dawson, digamma, erf, erfc, erfcinv, erfi, erfinv, erfcx, gamma, invdigamma, lgamma, trigamma] + +const diadic_linear_false_false_false_special = [lbeta, beta] +# const diadic_linear_true_false_true_special = [besselk, hankelh2, bessely, besselj, besseli, polygamma, hankelh1] + +for f in monadic_nonlinear_special + @eval begin + haslinearity(::typeof($f), ::Val{1}) = true + linearity(::typeof($f), ::Val{1}) = Val(false) + end +end + +# linearity of a 2-arg function is: +# Val((linear11, linear22, linear12)) +# +# linearIJ refers to the zeroness of d^2/dxIxJ +for f in diadic_linear_false_false_false_special + @eval begin + haslinearity(::typeof($f), ::Val{2}) = true + linearity(::typeof($f), ::Val{2}) = Val((false,false,false)) + end +end + +# for f in diadic_linear_true_false_true_special +# @eval begin +# haslinearity(::typeof($f), ::Val{2}) = true +# linearity(::typeof($f), ::Val{2}) = Val((true,false,true)) +# end +# end diff --git a/test/program_sparsity/common.jl b/test/program_sparsity/common.jl index 61de55a0..d6652425 100644 --- a/test/program_sparsity/common.jl +++ b/test/program_sparsity/common.jl @@ -1,11 +1,12 @@ ### Rules -using Cassette -using SparseArrays -import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged -using SparseDiffTools -import SparseDiffTools: Path, BranchesPass, SparsityContext, Fixed, - Input, Output, ProvinanceSet, Tainted, istainted, - alldone, reset!, HessianSparsityContext +using Cassette, SparseDiffTools +using SparseArrays, Test + +using Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged +using SparseDiffTools: Path, BranchesPass, SparsityContext, Fixed, + Input, Output, ProvinanceSet, Tainted, istainted, + alldone, reset!, HessianSparsityContext +using SparseDiffTools: TermCombination function tester(f, Y, X, args...; sparsity=Sparsity(length(Y), length(X))) diff --git a/test/program_sparsity/hessian.jl b/test/program_sparsity/hessian.jl index 71371d07..64f89a48 100644 --- a/test/program_sparsity/hessian.jl +++ b/test/program_sparsity/hessian.jl @@ -1,6 +1,3 @@ -import SparseDiffTools: TermCombination -using Test - Term(i...) = TermCombination(Set([Dict(j=>1 for j in i)])) @test htesttag(x->x, [1,2]) == Input() diff --git a/test/runtests.jl b/test/runtests.jl index 1aeb06dd..604c3a17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,11 @@ -using SparseDiffTools -using Test - - -@testset "Exact coloring via contraction" begin include("test_contraction.jl") end -@testset "Greedy distance-1 coloring" begin include("test_greedy_d1.jl") end -@testset "Greedy star coloring" begin include("test_greedy_star.jl") end -@testset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end -@testset "AD using color vector" begin include("test_ad.jl") end -@testset "Integration test" begin include("test_integration.jl") end -@testset "Special matrices" begin include("test_specialmatrices.jl") end -@testset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end -@testset "Program sparsity computation" begin include("program_sparsity/testall.jl") end +using SafeTestsets +@time @safetestset "Exact coloring via contraction" begin include("test_contraction.jl") end +@time @safetestset "Greedy distance-1 coloring" begin include("test_greedy_d1.jl") end +@time @safetestset "Greedy star coloring" begin include("test_greedy_star.jl") end +@time @safetestset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end +@time @safetestset "AD using color vector" begin include("test_ad.jl") end +@time @safetestset "Integration test" begin include("test_integration.jl") end +@time @safetestset "Special matrices" begin include("test_specialmatrices.jl") end +@time @safetestset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end +@time @safetestset "Program sparsity computation" begin include("program_sparsity/testall.jl") end diff --git a/test/test_ad.jl b/test/test_ad.jl index 113eb795..3055ce4f 100644 --- a/test/test_ad.jl +++ b/test/test_ad.jl @@ -1,5 +1,6 @@ -using SparseDiffTools, SparseArrays, Test +using SparseDiffTools using ForwardDiff: Dual, jacobian +using SparseArrays, Test fcalls = 0 function f(dx,x) diff --git a/test/test_contraction.jl b/test/test_contraction.jl index 92d1311f..c8a2c162 100644 --- a/test/test_contraction.jl +++ b/test/test_contraction.jl @@ -1,7 +1,9 @@ using SparseDiffTools using VertexSafeGraphs using LightGraphs + using Random +Random.seed!(123) test_graphs = Array{VSafeGraph, 1}(undef, 0) diff --git a/test/test_greedy_d1.jl b/test/test_greedy_d1.jl index 808ae56f..9f717477 100644 --- a/test/test_greedy_d1.jl +++ b/test/test_greedy_d1.jl @@ -1,7 +1,10 @@ using SparseDiffTools using VertexSafeGraphs using LightGraphs +using Test + using Random +Random.seed!(123) test_graphs = Array{VSafeGraph, 1}(undef, 0) diff --git a/test/test_greedy_star.jl b/test/test_greedy_star.jl index 807d5bea..a1e54757 100644 --- a/test/test_greedy_star.jl +++ b/test/test_greedy_star.jl @@ -1,7 +1,8 @@ using SparseDiffTools using LightGraphs -using Random +using Test +using Random Random.seed!(123) #= Test data =# diff --git a/test/test_integration.jl b/test/test_integration.jl index e258ad8b..5d694f99 100644 --- a/test/test_integration.jl +++ b/test/test_integration.jl @@ -1,7 +1,8 @@ -using SparseDiffTools, SparseArrays -using LinearAlgebra -using DiffEqDiffTools -using Test +using SparseDiffTools +using DiffEqDiffTools: finite_difference_jacobian, finite_difference_jacobian! +using Cassette + +using LinearAlgebra, SparseArrays, Test fcalls = 0 function f(dx,x) @@ -31,20 +32,20 @@ colors = matrix_colors(true_jac) #Jacobian computed without coloring vector fcalls = 0 -J = DiffEqDiffTools.finite_difference_jacobian(f, rand(30)) +J = finite_difference_jacobian(f, rand(30)) @test J ≈ second_derivative_stencil(30) @test fcalls == 31 #Jacobian computed with coloring vectors fcalls = 0 _J = similar(true_jac) -DiffEqDiffTools.finite_difference_jacobian!(_J, f, rand(30), color = colors) +finite_difference_jacobian!(_J, f, rand(30), color = colors) @test fcalls == 4 @test _J ≈ J fcalls = 0 _J = similar(true_jac) _denseJ = collect(_J) -DiffEqDiffTools.finite_difference_jacobian!(_denseJ, f, rand(30), color = colors, sparsity=_J) +finite_difference_jacobian!(_denseJ, f, rand(30), color = colors, sparsity=_J) @test fcalls == 4 @test _denseJ ≈ J diff --git a/test/test_jaches_products.jl b/test/test_jaches_products.jl index 536bddcc..cb42f660 100644 --- a/test/test_jaches_products.jl +++ b/test/test_jaches_products.jl @@ -1,115 +1,118 @@ -using ForwardDiff, SparseDiffTools, LinearAlgebra, DiffEqDiffTools, - IterativeSolvers, Test - -const A = rand(300,300) -f(du,u) = mul!(du,A,u) -f(u) = A*u -x = rand(300) -v = rand(300) -du = similar(x) -g(u) = sum(abs2,u) -function h(x) - DiffEqDiffTools.finite_difference_gradient(g,x) -end -function h(dx,x) - DiffEqDiffTools.finite_difference_gradient!(dx,g,x) -end - -cache1 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v) -cache2 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v) -@test num_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 -@test num_jacvec!(du, f, x, v, similar(v), similar(v)) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 -@test num_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 - -@test auto_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v -@test auto_jacvec!(du, f, x, v, cache1, cache2) ≈ ForwardDiff.jacobian(f,similar(x),x)*v -@test auto_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v - -@test num_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test num_hesvec!(du, g, x, v, similar(v), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test num_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 - -@test numauto_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test numauto_hesvec!(du, g, x, v, ForwardDiff.GradientConfig(g,x), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test numauto_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 - -@test autonum_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test autonum_hesvec!(du, g, x, v, similar(v), cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test autonum_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 - -@test numback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test numback_hesvec!(du, g, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test numback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 - -cache3 = ForwardDiff.Dual{Nothing}.(x, v) -cache4 = ForwardDiff.Dual{Nothing}.(x, v) -@test autoback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test autoback_hesvec!(du, g, x, v, cache3, cache4) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 -@test autoback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 - -@test num_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test num_hesvecgrad!(du, h, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test num_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 - -@test auto_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test auto_hesvecgrad!(du, h, x, v, cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 -@test auto_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 - -L = JacVec(f,x) -@test L*x ≈ auto_jacvec(f, x, x) -@test L*v ≈ auto_jacvec(f, x, v) -@test mul!(du,L,v) ≈ auto_jacvec(f, x, v) -L.u .= v -@test mul!(du,L,v) ≈ auto_jacvec(f, v, v) - -L = JacVec(f,x,autodiff=false) -@test L*x ≈ num_jacvec(f, x, x) -@test L*v ≈ num_jacvec(f, x, v) -L.u == x -@test mul!(du,L,v) ≈ num_jacvec(f, x, v) rtol=1e-6 -L.u .= v -@test mul!(du,L,v) ≈ num_jacvec(f, v, v) rtol=1e-6 - -### Integration test with IterativeSolvers -out = similar(v) -gmres!(out, L, v) - -x = rand(300) -v = rand(300) -L = HesVec(g,x,autodiff=false) -@test L*x ≈ num_hesvec(g, x, x) -@test L*v ≈ num_hesvec(g, x, v) -@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2 -L.u .= v -@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2 - -L = HesVec(g,x) -@test L*x ≈ numauto_hesvec(g, x, x) -@test L*v ≈ numauto_hesvec(g, x, v) -@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8 -L.u .= v -@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8 - -### Integration test with IterativeSolvers -out = similar(v) -gmres!(out, L, v) - -x = rand(300) -v = rand(300) -L = HesVecGrad(h,x,autodiff=false) -@test L*x ≈ num_hesvec(g, x, x) -@test L*v ≈ num_hesvec(g, x, v) -@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2 -L.u .= v -@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2 - -L = HesVecGrad(h,x,autodiff=true) -@test L*x ≈ autonum_hesvec(g, x, x) -@test L*v ≈ numauto_hesvec(g, x, v) -@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8 -L.u .= v -@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8 - -### Integration test with IterativeSolvers -out = similar(v) -gmres!(out, L, v) +using SparseDiffTools, ForwardDiff, DiffEqDiffTools, Zygote, IterativeSolvers +using LinearAlgebra, Test + +using Random +Random.seed!(123) + +const A = rand(300,300) +f(du,u) = mul!(du,A,u) +f(u) = A*u +x = rand(300) +v = rand(300) +du = similar(x) +g(u) = sum(abs2,u) +function h(x) + DiffEqDiffTools.finite_difference_gradient(g,x) +end +function h(dx,x) + DiffEqDiffTools.finite_difference_gradient!(dx,g,x) +end + +cache1 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v) +cache2 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v) +@test num_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 +@test num_jacvec!(du, f, x, v, similar(v), similar(v)) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 +@test num_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6 + +@test auto_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v +@test auto_jacvec!(du, f, x, v, cache1, cache2) ≈ ForwardDiff.jacobian(f,similar(x),x)*v +@test auto_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v + +@test num_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test num_hesvec!(du, g, x, v, similar(v), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test num_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 + +@test numauto_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test numauto_hesvec!(du, g, x, v, ForwardDiff.GradientConfig(g,x), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test numauto_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 + +@test autonum_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test autonum_hesvec!(du, g, x, v, similar(v), cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test autonum_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 + +@test numback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test numback_hesvec!(du, g, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test numback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 + +cache3 = ForwardDiff.Dual{Nothing}.(x, v) +cache4 = ForwardDiff.Dual{Nothing}.(x, v) +@test autoback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test autoback_hesvec!(du, g, x, v, cache3, cache4) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 +@test autoback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8 + +@test num_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test num_hesvecgrad!(du, h, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test num_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 + +@test auto_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test auto_hesvecgrad!(du, h, x, v, cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 +@test auto_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2 + +L = JacVec(f,x) +@test L*x ≈ auto_jacvec(f, x, x) +@test L*v ≈ auto_jacvec(f, x, v) +@test mul!(du,L,v) ≈ auto_jacvec(f, x, v) +L.u .= v +@test mul!(du,L,v) ≈ auto_jacvec(f, v, v) + +L = JacVec(f,x,autodiff=false) +@test L*x ≈ num_jacvec(f, x, x) +@test L*v ≈ num_jacvec(f, x, v) +L.u == x +@test mul!(du,L,v) ≈ num_jacvec(f, x, v) rtol=1e-6 +L.u .= v +@test mul!(du,L,v) ≈ num_jacvec(f, v, v) rtol=1e-6 + +### Integration test with IterativeSolvers +out = similar(v) +gmres!(out, L, v) + +x = rand(300) +v = rand(300) +L = HesVec(g,x,autodiff=false) +@test L*x ≈ num_hesvec(g, x, x) +@test L*v ≈ num_hesvec(g, x, v) +@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2 +L.u .= v +@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2 + +L = HesVec(g,x) +@test L*x ≈ numauto_hesvec(g, x, x) +@test L*v ≈ numauto_hesvec(g, x, v) +@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8 +L.u .= v +@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8 + +### Integration test with IterativeSolvers +out = similar(v) +gmres!(out, L, v) + +x = rand(300) +v = rand(300) +L = HesVecGrad(h,x,autodiff=false) +@test L*x ≈ num_hesvec(g, x, x) +@test L*v ≈ num_hesvec(g, x, v) +@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2 +L.u .= v +@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2 + +L = HesVecGrad(h,x,autodiff=true) +@test L*x ≈ autonum_hesvec(g, x, x) +@test L*v ≈ numauto_hesvec(g, x, v) +@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8 +L.u .= v +@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8 + +### Integration test with IterativeSolvers +out = similar(v) +gmres!(out, L, v)