diff --git a/Project.toml b/Project.toml
index 01be4b92..db7d4973 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,26 +6,24 @@ version = "0.4.1"
 [deps]
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
-Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
 DiffEqDiffTools = "01453d9d-ee7c-5054-8395-0335cb756afa"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 VertexSafeGraphs = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 julia = "1"
 
 [extras]
-DiffEqDiffTools = "01453d9d-ee7c-5054-8395-0335cb756afa"
+Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "DiffEqDiffTools", "IterativeSolvers", "Random"]
+test = ["Test", "Cassette", "IterativeSolvers", "Random", "SafeTestsets", "Zygote"]
diff --git a/README.md b/README.md
index 17b49c8b..c0913018 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,12 @@
 This package is for exploiting sparsity in Jacobians and Hessians to accelerate
 computations. Matrix-free Jacobian-vector product and Hessian-vector product
 operators are provided that are compatible with AbstractMatrix-based libraries
-like IterativeSolvers.jl for easy and efficient Newton-Krylov implementation.
-Automatic and numerical differentiation are utilized and optional. In addition,
-the ability to automatically detect the sparsity of a function, perform matrix
-coloring, and utilize coloring in Jacobian and Hessian construction is provided.
+like IterativeSolvers.jl for easy and efficient Newton-Krylov implementation. It is
+possible to perform matrix coloring, and utilize coloring in Jacobian and Hessian
+construction.
+
+Optionally, automatic and numerical differentiation are utilized and the ability to
+automatically detect the sparsity of a function is provided.
 
 ## Example
 
@@ -31,11 +33,13 @@ end
 For this function, we know that the sparsity pattern of the Jacobian is a
 `Tridiagonal` matrix. However, if we didn't know the sparsity pattern for
 the Jacobian, we could use the `sparsity!` function to automatically
-detect the sparsity pattern. We declare that it outputs a length 30 vector
-and takes in a length 30 vector, and it spits out a `Sparsity` object
-which we can turn into a `SparseMatrixCSC`:
+detect the sparsity pattern. This function is only available if you
+load Cassette.jl as well. We declare that the function `f` outputs a
+vector of length 30 and takes in a vector of length 30, and `sparsity!` spits
+out a `Sparsity` object which we can turn into a `SparseMatrixCSC`:
 
 ```julia
+using Cassette
 sparsity_pattern = sparsity!(f,output,input)
 jac = Float64.(sparse(sparsity_pattern))
 ```
@@ -88,36 +92,6 @@ gmres!(res,J,v)
 
 ## Documentation
 
-### Automated Sparsity Detection
-
-Automated sparsity detection is provided by the `sparsity!` function whose
-syntax is:
-
-```julia
-`sparsity!(f, Y, X, args...; sparsity=Sparsity(length(X), length(Y)), verbose=true)`
-```
-
-The arguments are:
-
-- `f`: the function
-- `Y`: the output array
-- `X`: the input array
-- `args`: trailing arguments to `f`. They are considered subject to change, unless wrapped as `Fixed(arg)`
-- `S`: (optional) the sparsity pattern
-- `verbose`: (optional) whether to describe the paths taken by the sparsity detection.
-
-The function `f` is assumed to take arguments of the form `f(dx,x,args...)`.
-`sparsity!` returns a `Sparsity` object which describes where the non-zeros
-of the Jacobian occur. `sparse(::Sparsity)` transforms the pattern into
-a sparse matrix.
-
-This function utilizes non-standard interpretation, which we denote
-combinatoric concolic analysis, to directly realize the sparsity pattern from the program's AST. It requires that the function `f` is a Julia function. It does not
-work numerically, meaning that it is not prone to floating point error or
-cancelation. It allows for branching and will automatically check all of the
-branches. However, a while loop of indeterminate length which is dependent
-on the input argument is not allowed.
-
 ### Matrix Coloring
 
 Matrix coloring allows you to reduce the number of times finite differencing
@@ -230,29 +204,8 @@ autonum_hesvec!(du,f,x,v,
                  cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
 
 autonum_hesvec(f,x,v)
-
-
-numback_hesvec!(du,f,x,v,
-                     cache1 = similar(v),
-                     cache2 = similar(v))
-
-numback_hesvec(f,x,v)
-
-# Currently errors! See https://github.com/FluxML/Zygote.jl/issues/241
-autoback_hesvec!(du,f,x,v,
-                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
-                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
-
-autoback_hesvec(f,x,v)
 ```
 
-`numauto` and `autonum` both mix numerical and automatic differentiation, with
-the former almost always being more efficient and is thus recommended. `numback` and
-`autoback` methods are numerical/ForwardDiff over reverse mode automatic differentiation
-respectively, where the reverse-mode AD is provided by Zygote.jl. Currently these methods
-are not competitive against `numauto`, but as Zygote.jl gets optimized these will likely
-be the fastest.
-
 In addition,
 the following forms allow you to provide a gradient function `g(dx,x)` or `dx=g(x)`
 respectively:
@@ -271,6 +224,32 @@ auto_hesvecgrad!(du,g,x,v,
 auto_hesvecgrad(g,x,v)
 ```
 
+The `numauto` and `autonum` methods both mix numerical and automatic differentiation, with
+the former almost always being more efficient and thus being recommended.
+
+Optionally, if you load Zygote.jl, the following `numback`
+and `autoback` methods are available and allow numerical/ForwardDiff over reverse mode 
+automatic differentiation respectively, where the reverse-mode AD is provided by Zygote.jl.
+Currently these methods are not competitive against `numauto`, but as Zygote.jl gets 
+optimized these will likely be the fastest.
+
+```julia
+using Zygote # Required
+
+numback_hesvec!(du,f,x,v,
+                     cache1 = similar(v),
+                     cache2 = similar(v))
+
+numback_hesvec(f,x,v)
+
+# Currently errors! See https://github.com/FluxML/Zygote.jl/issues/241
+autoback_hesvec!(du,f,x,v,
+                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
+                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
+
+autoback_hesvec(f,x,v)
+```
+
 #### J*v and H*v Operators
 
 The following produce matrix-free operators which are used for calculating
@@ -287,3 +266,33 @@ These all have the same interface, where `J*v` utilizes the out-of-place
 Jacobian-vector or Hessian-vector function, whereas `mul!(res,J,v)` utilizes
 the appropriate in-place versions. To update the location of differentiation
 in the operator, simply mutate the vector `u`: `J.u .= ...`.
+
+### Automated Sparsity Detection
+
+Automated sparsity detection is provided by the `sparsity!` function. This requires
+`using Cassette` for Requires. The syntax is:
+
+```julia
+`sparsity!(f, Y, X, args...; sparsity=Sparsity(length(X), length(Y)), verbose=true)`
+```
+
+The arguments are:
+
+- `f`: the function
+- `Y`: the output array
+- `X`: the input array
+- `args`: trailing arguments to `f`. They are considered subject to change, unless wrapped as `Fixed(arg)`
+- `S`: (optional) the sparsity pattern
+- `verbose`: (optional) whether to describe the paths taken by the sparsity detection.
+
+The function `f` is assumed to take arguments of the form `f(dx,x,args...)`.
+`sparsity!` returns a `Sparsity` object which describes where the non-zeros
+of the Jacobian occur. `sparse(::Sparsity)` transforms the pattern into
+a sparse matrix.
+
+This function utilizes non-standard interpretation, which we denote
+combinatoric concolic analysis, to directly realize the sparsity pattern from the program's AST. It requires that the function `f` is a Julia function. It does not
+work numerically, meaning that it is not prone to floating point error or
+cancelation. It allows for branching and will automatically check all of the
+branches. However, a while loop of indeterminate length which is dependent
+on the input argument is not allowed.
diff --git a/appveyor.yml b/appveyor.yml
index a5091374..2fd7f769 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 environment:
   matrix:
-  - julia_version: 1
+  - julia_version: 1.2
   - julia_version: nightly
 
 platform:
diff --git a/src/SparseDiffTools.jl b/src/SparseDiffTools.jl
index b2b55c0a..05fc62a9 100644
--- a/src/SparseDiffTools.jl
+++ b/src/SparseDiffTools.jl
@@ -1,16 +1,19 @@
 module SparseDiffTools
 
-using SparseArrays, LinearAlgebra, BandedMatrices, BlockBandedMatrices,
-      LightGraphs, VertexSafeGraphs, DiffEqDiffTools, ForwardDiff, Zygote,
-      SparseArrays
-using BlockBandedMatrices:blocksize,nblocks
+using BandedMatrices
+using BlockBandedMatrices
+using DiffEqDiffTools
+using ForwardDiff
+using LightGraphs
+using Requires
+using VertexSafeGraphs
+
+using LinearAlgebra
+using SparseArrays
+
+using BlockBandedMatrices: blocksize, nblocks
 using ForwardDiff: Dual, jacobian, partials, DEFAULT_CHUNK_THRESHOLD
 
-using Requires
-using Cassette
-import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse
-import Cassette: tagged_new_tuple, ContextTagged, BindingMeta, DisableHooks, nametype
-import Core: SSAValue
 
 export  contract_color,
         greedy_d1,
@@ -27,10 +30,7 @@ export  contract_color,
         autonum_hesvec,autonum_hesvec!,
         num_hesvecgrad,num_hesvecgrad!,
         auto_hesvecgrad,auto_hesvecgrad!,
-        numback_hesvec,numback_hesvec!,
-        autoback_hesvec,autoback_hesvec!,
-        JacVec,HesVec,HesVecGrad,
-        Sparsity, sparsity!, hsparsity
+        JacVec,HesVec,HesVecGrad
 
 
 include("coloring/high_level.jl")
@@ -41,8 +41,17 @@ include("coloring/greedy_star2_coloring.jl")
 include("coloring/matrix2graph.jl")
 include("differentiation/compute_jacobian_ad.jl")
 include("differentiation/jaches_products.jl")
+
 function __init__()
     @require Cassette="7057c7e9-c182-5462-911a-8362d720325c" begin
+        using .Cassette
+        using .Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse
+        using .Cassette: tagged_new_tuple, ContextTagged, BindingMeta, DisableHooks, nametype
+
+        using Core: SSAValue
+
+        export Sparsity, hsparsity, sparsity!
+
         include("program_sparsity/program_sparsity.jl")
         include("program_sparsity/sparsity_tracker.jl")
         include("program_sparsity/path.jl")
@@ -51,6 +60,18 @@ function __init__()
         include("program_sparsity/linearity.jl")
         include("program_sparsity/hessian.jl")
         include("program_sparsity/blas.jl")
+
+        @require SpecialFunctions="276daf66-3868-5448-9aa4-cd146d93841b" begin
+            using .SpecialFunctions
+
+            include("program_sparsity/linearity_special.jl")
+        end
+    end
+
+    @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin
+        export numback_hesvec, numback_hesvec!, autoback_hesvec, autoback_hesvec!
+
+        include("differentiation/jaches_products_zygote.jl")
     end
 end
 
diff --git a/src/differentiation/jaches_products.jl b/src/differentiation/jaches_products.jl
index 17b7776f..ea381544 100644
--- a/src/differentiation/jaches_products.jl
+++ b/src/differentiation/jaches_products.jl
@@ -1,287 +1,243 @@
-struct DeivVecTag end
-
-# J(f(x))*v
-function auto_jacvec!(du, f, x, v,
-                      cache1 = ForwardDiff.Dual{DeivVecTag}.(x, v),
-                      cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v))
-    cache1 .= Dual{DeivVecTag}.(x, v)
-    f(cache2,cache1)
-    du .= partials.(cache2, 1)
-end
-function auto_jacvec(f, x, v)
-    partials.(f(Dual{DeivVecTag}.(x, v)), 1)
-end
-
-function num_jacvec!(du,f,x,v,cache1 = similar(v),
-                     cache2 = similar(v);
-                     compute_f0 = true)
-    compute_f0 && (f(cache1,x))
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ*v
-    f(cache2,x)
-    @. x -= ϵ*v
-    @. du = (cache2 - cache1)/ϵ
-end
-
-function num_jacvec(f,x,v,f0=nothing)
-    f0 === nothing ? _f0 = f(x) : _f0 = f0
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(minimum(x)))
-    (f(x.+ϵ.*v) .- f(x))./ϵ
-end
-
-function num_hesvec!(du,f,x,v,
-                     cache1 = similar(v),
-                     cache2 = similar(v),
-                     cache3 = similar(v))
-    cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central})
-    g = let f=f,cache=cache
-        (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache)
-    end
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ*v
-    g(cache2,x)
-    @. x -= 2ϵ*v
-    g(cache3,x)
-    @. du = (cache2 - cache3)/(2ϵ)
-end
-
-function num_hesvec(f,x,v)
-    g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x)
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    x += ϵ*v
-    gxp = g(x)
-    x -= 2ϵ*v
-    gxm = g(x)
-    (gxp - gxm)/(2ϵ)
-end
-
-function numauto_hesvec!(du,f,x,v,
-                     cache = ForwardDiff.GradientConfig(f,v),
-                     cache1 = similar(v),
-                     cache2 = similar(v))
-    g = let f=f,x=x,cache=cache
-        g = (dx,x) -> ForwardDiff.gradient!(dx,f,x,cache)
-    end
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ*v
-    g(cache1,x)
-    @. x -= 2ϵ*v
-    g(cache2,x)
-    @. du = (cache1 - cache2)/(2ϵ)
-end
-
-function numauto_hesvec(f,x,v)
-    g = (x) -> ForwardDiff.gradient(f,x)
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    x += ϵ*v
-    gxp = g(x)
-    x -= 2ϵ*v
-    gxm = g(x)
-    (gxp - gxm)/(2ϵ)
-end
-
-function numback_hesvec!(du,f,x,v,
-                     cache1 = similar(v),
-                     cache2 = similar(v))
-    g = let f=f
-        g = (dx,x) -> dx .= first(Zygote.gradient(f,x))
-    end
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ*v
-    g(cache1,x)
-    @. x -= 2ϵ*v
-    g(cache2,x)
-    @. du = (cache1 - cache2)/(2ϵ)
-end
-
-function numback_hesvec(f,x,v)
-    g = (x) -> first(Zygote.gradient(f,x))
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    x += ϵ*v
-    gxp = g(x)
-    x -= 2ϵ*v
-    gxm = g(x)
-    (gxp - gxm)/(2ϵ)
-end
-
-function autonum_hesvec!(du,f,x,v,
-                     cache1 = similar(v),
-                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
-                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
-    cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central})
-    g = (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache)
-    cache2 .= Dual{DeivVecTag}.(x, v)
-    g(cache3,cache2)
-    du .= partials.(cache3, 1)
-end
-
-function autonum_hesvec(f,x,v)
-    g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x)
-    partials.(g(Dual{DeivVecTag}.(x, v)), 1)
-end
-
-function autoback_hesvec!(du,f,x,v,
-                     cache2 = ForwardDiff.Dual{Nothing}.(x, v),
-                     cache3 = ForwardDiff.Dual{Nothing}.(x, v))
-    g = let f=f
-        g = (dx,x) -> dx .= first(Zygote.gradient(f,x))
-    end
-    cache2 .= Dual{Nothing}.(x, v)
-    g(cache3,cache2)
-    du .= partials.(cache3, 1)
-end
-
-function autoback_hesvec(f,x,v)
-    g = (x) -> first(Zygote.gradient(f,x))
-    ForwardDiff.partials.(g(ForwardDiff.Dual{Nothing}.(x, v)), 1)
-end
-
-function num_hesvecgrad!(du,g,x,v,
-                     cache2 = similar(v),
-                     cache3 = similar(v))
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    @. x += ϵ*v
-    g(cache2,x)
-    @. x -= 2ϵ*v
-    g(cache3,x)
-    @. du = (cache2 - cache3)/(2ϵ)
-end
-
-function num_hesvecgrad(g,x,v)
-    T = eltype(x)
-    # Should it be min? max? mean?
-    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
-    x += ϵ*v
-    gxp = g(x)
-    x -= 2ϵ*v
-    gxm = g(x)
-    (gxp - gxm)/(2ϵ)
-end
-
-function auto_hesvecgrad!(du,g,x,v,
-                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
-                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
-    cache2 .= Dual{DeivVecTag}.(x, v)
-    g(cache3,cache2)
-    du .= partials.(cache3, 1)
-end
-
-function auto_hesvecgrad(g,x,v)
-    partials.(g(Dual{DeivVecTag}.(x, v)), 1)
-end
-
-### Operator Forms
-
-struct JacVec{F,T1,T2,uType}
-    f::F
-    cache1::T1
-    cache2::T2
-    u::uType
-    autodiff::Bool
-end
-
-function JacVec(f,u::AbstractArray;autodiff=true)
-    if autodiff
-        cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u)
-        cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u)
-    else
-        cache1 = similar(u)
-        cache2 = similar(u)
-    end
-    JacVec(f,cache1,cache2,u,autodiff)
-end
-
-Base.size(L::JacVec) = (length(L.cache1),length(L.cache1))
-Base.size(L::JacVec,i::Int) = length(L.cache1)
-Base.:*(L::JacVec,x::AbstractVector) = L.autodiff ? auto_jacvec(_u->L.f(_u),L.u,x) : num_jacvec(_u->L.f(_u),L.u,x)
-
-function LinearAlgebra.mul!(du::AbstractVector,L::JacVec,v::AbstractVector)
-    if L.autodiff
-        auto_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2)
-    else
-        num_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2)
-    end
-end
-
-struct HesVec{F,T1,T2,uType}
-    f::F
-    cache1::T1
-    cache2::T2
-    cache3::T2
-    u::uType
-    autodiff::Bool
-end
-
-function HesVec(f,u::AbstractArray;autodiff=true)
-    if autodiff
-        cache1 = ForwardDiff.GradientConfig(f,u)
-        cache2 = similar(u)
-        cache3 = similar(u)
-    else
-        cache1 = similar(u)
-        cache2 = similar(u)
-        cache3 = similar(u)
-    end
-    HesVec(f,cache1,cache2,cache3,u,autodiff)
-end
-
-Base.size(L::HesVec) = (length(L.cache2),length(L.cache2))
-Base.size(L::HesVec,i::Int) = length(L.cache2)
-Base.:*(L::HesVec,x::AbstractVector) = L.autodiff ? numauto_hesvec(L.f,L.u,x) : num_hesvec(L.f,L.u,x)
-
-function LinearAlgebra.mul!(du::AbstractVector,L::HesVec,v::AbstractVector)
-    if L.autodiff
-        numauto_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3)
-    else
-        num_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3)
-    end
-end
-
-struct HesVecGrad{G,T1,T2,uType}
-    g::G
-    cache1::T1
-    cache2::T2
-    u::uType
-    autodiff::Bool
-end
-
-function HesVecGrad(g,u::AbstractArray;autodiff=false)
-    if autodiff
-        cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u)
-        cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u)
-    else
-        cache1 = similar(u)
-        cache2 = similar(u)
-    end
-    HesVecGrad(g,cache1,cache2,u,autodiff)
-end
-
-Base.size(L::HesVecGrad) = (length(L.cache2),length(L.cache2))
-Base.size(L::HesVecGrad,i::Int) = length(L.cache2)
-Base.:*(L::HesVecGrad,x::AbstractVector) = L.autodiff ? auto_hesvecgrad(L.g,L.u,x) : num_hesvecgrad(L.g,L.u,x)
-
-function LinearAlgebra.mul!(du::AbstractVector,L::HesVecGrad,v::AbstractVector)
-    if L.autodiff
-        auto_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2)
-    else
-        num_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2)
-    end
-end
+struct DeivVecTag end
+
+# J(f(x))*v
+function auto_jacvec!(du, f, x, v,
+                      cache1 = ForwardDiff.Dual{DeivVecTag}.(x, v),
+                      cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v))
+    cache1 .= Dual{DeivVecTag}.(x, v)
+    f(cache2,cache1)
+    du .= partials.(cache2, 1)
+end
+function auto_jacvec(f, x, v)
+    partials.(f(Dual{DeivVecTag}.(x, v)), 1)
+end
+
+function num_jacvec!(du,f,x,v,cache1 = similar(v),
+                     cache2 = similar(v);
+                     compute_f0 = true)
+    compute_f0 && (f(cache1,x))
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    @. x += ϵ*v
+    f(cache2,x)
+    @. x -= ϵ*v
+    @. du = (cache2 - cache1)/ϵ
+end
+
+function num_jacvec(f,x,v,f0=nothing)
+    f0 === nothing ? _f0 = f(x) : _f0 = f0
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(minimum(x)))
+    (f(x.+ϵ.*v) .- f(x))./ϵ
+end
+
+function num_hesvec!(du,f,x,v,
+                     cache1 = similar(v),
+                     cache2 = similar(v),
+                     cache3 = similar(v))
+    cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central})
+    g = let f=f,cache=cache
+        (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache)
+    end
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    @. x += ϵ*v
+    g(cache2,x)
+    @. x -= 2ϵ*v
+    g(cache3,x)
+    @. du = (cache2 - cache3)/(2ϵ)
+end
+
+function num_hesvec(f,x,v)
+    g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x)
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    x += ϵ*v
+    gxp = g(x)
+    x -= 2ϵ*v
+    gxm = g(x)
+    (gxp - gxm)/(2ϵ)
+end
+
+function numauto_hesvec!(du,f,x,v,
+                     cache = ForwardDiff.GradientConfig(f,v),
+                     cache1 = similar(v),
+                     cache2 = similar(v))
+    g = let f=f,x=x,cache=cache
+        g = (dx,x) -> ForwardDiff.gradient!(dx,f,x,cache)
+    end
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    @. x += ϵ*v
+    g(cache1,x)
+    @. x -= 2ϵ*v
+    g(cache2,x)
+    @. du = (cache1 - cache2)/(2ϵ)
+end
+
+function numauto_hesvec(f,x,v)
+    g = (x) -> ForwardDiff.gradient(f,x)
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    x += ϵ*v
+    gxp = g(x)
+    x -= 2ϵ*v
+    gxm = g(x)
+    (gxp - gxm)/(2ϵ)
+end
+
+function autonum_hesvec!(du,f,x,v,
+                     cache1 = similar(v),
+                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
+                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
+    cache = DiffEqDiffTools.GradientCache(v[1],cache1,Val{:central})
+    g = (dx,x) -> DiffEqDiffTools.finite_difference_gradient!(dx,f,x,cache)
+    cache2 .= Dual{DeivVecTag}.(x, v)
+    g(cache3,cache2)
+    du .= partials.(cache3, 1)
+end
+
+function autonum_hesvec(f,x,v)
+    g = (x) -> DiffEqDiffTools.finite_difference_gradient(f,x)
+    partials.(g(Dual{DeivVecTag}.(x, v)), 1)
+end
+
+function num_hesvecgrad!(du,g,x,v,
+                     cache2 = similar(v),
+                     cache3 = similar(v))
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    @. x += ϵ*v
+    g(cache2,x)
+    @. x -= 2ϵ*v
+    g(cache3,x)
+    @. du = (cache2 - cache3)/(2ϵ)
+end
+
+function num_hesvecgrad(g,x,v)
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    x += ϵ*v
+    gxp = g(x)
+    x -= 2ϵ*v
+    gxm = g(x)
+    (gxp - gxm)/(2ϵ)
+end
+
+function auto_hesvecgrad!(du,g,x,v,
+                     cache2 = ForwardDiff.Dual{DeivVecTag}.(x, v),
+                     cache3 = ForwardDiff.Dual{DeivVecTag}.(x, v))
+    cache2 .= Dual{DeivVecTag}.(x, v)
+    g(cache3,cache2)
+    du .= partials.(cache3, 1)
+end
+
+function auto_hesvecgrad(g,x,v)
+    partials.(g(Dual{DeivVecTag}.(x, v)), 1)
+end
+
+### Operator Forms
+
+struct JacVec{F,T1,T2,uType}
+    f::F
+    cache1::T1
+    cache2::T2
+    u::uType
+    autodiff::Bool
+end
+
+function JacVec(f,u::AbstractArray;autodiff=true)
+    if autodiff
+        cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u)
+        cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u)
+    else
+        cache1 = similar(u)
+        cache2 = similar(u)
+    end
+    JacVec(f,cache1,cache2,u,autodiff)
+end
+
+Base.size(L::JacVec) = (length(L.cache1),length(L.cache1))
+Base.size(L::JacVec,i::Int) = length(L.cache1)
+Base.:*(L::JacVec,x::AbstractVector) = L.autodiff ? auto_jacvec(_u->L.f(_u),L.u,x) : num_jacvec(_u->L.f(_u),L.u,x)
+
+function LinearAlgebra.mul!(du::AbstractVector,L::JacVec,v::AbstractVector)
+    if L.autodiff
+        auto_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2)
+    else
+        num_jacvec!(du,(_du,_u)->L.f(_du,_u),L.u,v,L.cache1,L.cache2)
+    end
+end
+
+struct HesVec{F,T1,T2,uType}
+    f::F
+    cache1::T1
+    cache2::T2
+    cache3::T2
+    u::uType
+    autodiff::Bool
+end
+
+function HesVec(f,u::AbstractArray;autodiff=true)
+    if autodiff
+        cache1 = ForwardDiff.GradientConfig(f,u)
+        cache2 = similar(u)
+        cache3 = similar(u)
+    else
+        cache1 = similar(u)
+        cache2 = similar(u)
+        cache3 = similar(u)
+    end
+    HesVec(f,cache1,cache2,cache3,u,autodiff)
+end
+
+Base.size(L::HesVec) = (length(L.cache2),length(L.cache2))
+Base.size(L::HesVec,i::Int) = length(L.cache2)
+Base.:*(L::HesVec,x::AbstractVector) = L.autodiff ? numauto_hesvec(L.f,L.u,x) : num_hesvec(L.f,L.u,x)
+
+function LinearAlgebra.mul!(du::AbstractVector,L::HesVec,v::AbstractVector)
+    if L.autodiff
+        numauto_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3)
+    else
+        num_hesvec!(du,L.f,L.u,v,L.cache1,L.cache2,L.cache3)
+    end
+end
+
+struct HesVecGrad{G,T1,T2,uType}
+    g::G
+    cache1::T1
+    cache2::T2
+    u::uType
+    autodiff::Bool
+end
+
+function HesVecGrad(g,u::AbstractArray;autodiff=false)
+    if autodiff
+        cache1 = ForwardDiff.Dual{DeivVecTag}.(u, u)
+        cache2 = ForwardDiff.Dual{DeivVecTag}.(u, u)
+    else
+        cache1 = similar(u)
+        cache2 = similar(u)
+    end
+    HesVecGrad(g,cache1,cache2,u,autodiff)
+end
+
+Base.size(L::HesVecGrad) = (length(L.cache2),length(L.cache2))
+Base.size(L::HesVecGrad,i::Int) = length(L.cache2)
+Base.:*(L::HesVecGrad,x::AbstractVector) = L.autodiff ? auto_hesvecgrad(L.g,L.u,x) : num_hesvecgrad(L.g,L.u,x)
+
+function LinearAlgebra.mul!(du::AbstractVector,L::HesVecGrad,v::AbstractVector)
+    if L.autodiff
+        auto_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2)
+    else
+        num_hesvecgrad!(du,L.g,L.u,v,L.cache1,L.cache2)
+    end
+end
diff --git a/src/differentiation/jaches_products_zygote.jl b/src/differentiation/jaches_products_zygote.jl
new file mode 100644
index 00000000..17eb2123
--- /dev/null
+++ b/src/differentiation/jaches_products_zygote.jl
@@ -0,0 +1,40 @@
+function numback_hesvec!(du, f, x, v, cache1 = similar(v), cache2 = similar(v))
+    g = let f=f
+        (dx, x) -> dx .= first(Zygote.gradient(f,x))
+    end
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    @. x += ϵ*v
+    g(cache1,x)
+    @. x -= 2ϵ*v
+    g(cache2,x)
+    @. du = (cache1 - cache2)/(2ϵ)
+end
+
+function numback_hesvec(f, x, v)
+    g = x -> first(Zygote.gradient(f,x))
+    T = eltype(x)
+    # Should it be min? max? mean?
+    ϵ = sqrt(eps(real(T))) * max(one(real(T)), abs(norm(x)))
+    x += ϵ*v
+    gxp = g(x)
+    x -= 2ϵ*v
+    gxm = g(x)
+    (gxp - gxm)/(2ϵ)
+end
+
+function autoback_hesvec!(du, f, x, v, cache2 = ForwardDiff.Dual{Nothing}.(x, v),
+                          cache3 = ForwardDiff.Dual{Nothing}.(x, v))
+    g = let f=f
+        (dx, x) -> dx .= first(Zygote.gradient(f,x))
+    end
+    cache2 .= Dual{Nothing}.(x, v)
+    g(cache3,cache2)
+    du .= partials.(cache3, 1)
+end
+
+function autoback_hesvec(f, x, v)
+    g = x -> first(Zygote.gradient(f,x))
+    ForwardDiff.partials.(g(ForwardDiff.Dual{Nothing}.(x, v)), 1)
+end
diff --git a/src/program_sparsity/blas.jl b/src/program_sparsity/blas.jl
index c031fb50..35151511 100644
--- a/src/program_sparsity/blas.jl
+++ b/src/program_sparsity/blas.jl
@@ -1,6 +1,3 @@
-using LinearAlgebra
-import LinearAlgebra.BLAS
-
 # generic implementations
 
 macro reroute(f, g)
@@ -19,7 +16,7 @@ macro reroute(f, g)
     end
 end
 
-@reroute BLAS.dot dot(Any, Any)
-@reroute BLAS.axpy! axpy!(Any,
-                          AbstractArray,
-                          AbstractArray)
+@reroute LinearAlgebra.BLAS.dot LinearAlgebra.dot(Any, Any)
+@reroute LinearAlgebra.BLAS.axpy! LinearAlgebra.axpy!(Any,
+                                                      AbstractArray,
+                                                      AbstractArray)
diff --git a/src/program_sparsity/hessian.jl b/src/program_sparsity/hessian.jl
index 8d5734b9..0bcbbdb9 100644
--- a/src/program_sparsity/hessian.jl
+++ b/src/program_sparsity/hessian.jl
@@ -1,8 +1,3 @@
-using Cassette
-import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged, canrecurse
-import Core: SSAValue
-using SparseArrays
-
 # Tags:
 Cassette.@context HessianSparsityContext
 
diff --git a/src/program_sparsity/linearity.jl b/src/program_sparsity/linearity.jl
index f1d9ffb3..363f65f0 100644
--- a/src/program_sparsity/linearity.jl
+++ b/src/program_sparsity/linearity.jl
@@ -1,19 +1,14 @@
-using SpecialFunctions
-import Base.Broadcast
-
 const constant_funcs = []
 
 const monadic_linear = [deg2rad, +, rad2deg, transpose, -, conj]
 
-const monadic_nonlinear = [asind, log1p, acsch, erfc, digamma, acos, asec, acosh, airybiprime, acsc, cscd, log, tand, log10, csch, asinh, airyai, abs2, gamma, lgamma, erfcx, bessely0, cosh, sin, cos, atan, cospi, cbrt, acosd, bessely1, acoth, erfcinv, erf, dawson, inv, acotd, airyaiprime, erfinv, trigamma, asecd, besselj1, exp, acot, sqrt, sind, sinpi, asech, log2, tan, invdigamma, airybi, exp10, sech, erfi, coth, asin, cotd, cosd, sinh, abs, besselj0, csc, tanh, secd, atand, sec, acscd, cot, exp2, expm1, atanh]
+const monadic_nonlinear = [asind, log1p, acsch, acos, asec, acosh, acsc, cscd, log, tand, log10, csch, asinh, abs2, cosh, sin, cos, atan, cospi, cbrt, acosd, acoth, inv, acotd, asecd, exp, acot, sqrt, sind, sinpi, asech, log2, tan, exp10, sech, coth, asin, cotd, cosd, sinh, abs, csc, tanh, secd, atand, sec, acscd, cot, exp2, expm1, atanh]
 
-diadic_of_linearity(::Val{(true, true, true)}) = [+, rem2pi, -, >, isless, <, isequal, max, min, convert]
-diadic_of_linearity(::Val{(true, true, false)}) = [*]
-#diadic_of_linearit(::(Val{(true, false, true)}) = [besselk, hankelh2, bessely, besselj, besseli, polygamma, hankelh1]
-diadic_of_linearity(::Val{(true, false, false)}) = [/]
-diadic_of_linearity(::Val{(false, true, false)}) = [\]
-diadic_of_linearity(::Val{(false, false, false)}) = [hypot, atan, mod, rem, lbeta, ^, beta]
-diadic_of_linearity(::Val) = []
+const diadic_linear_true_true_true = [+, rem2pi, -, >, isless, <, isequal, max, min, convert]
+const diadic_linear_true_true_false = [*]
+const diadic_linear_true_false_false = [ / ]
+const diadic_linear_false_true_false = [ \ ]
+const diadic_linear_false_false_false = [hypot, atan, mod, rem, ^]
 
 haslinearity(f, nargs) = false
 
@@ -27,34 +22,57 @@ for f in constant_funcs
 end
 
 # linearity of a single input function is either
-# Val{true}() or Val{false}()
+# Val(true) or Val(false}
 #
 for f in monadic_linear
     @eval begin
         haslinearity(::typeof($f), ::Val{1}) = true
-        linearity(::typeof($f), ::Val{1}) = Val{true}()
+        linearity(::typeof($f), ::Val{1}) = Val(true)
     end
 end
+
+for f in monadic_nonlinear
+    @eval begin
+        haslinearity(::typeof($f), ::Val{1}) = true
+        linearity(::typeof($f), ::Val{1}) = Val(false)
+    end
+end
+
 # linearity of a 2-arg function is:
-# Val{(linear11, linear22, linear12)}()
+# Val((linear11, linear22, linear12))
 #
 # linearIJ refers to the zeroness of d^2/dxIxJ
-for f in monadic_nonlinear
+for f in diadic_linear_true_true_true
     @eval begin
-        haslinearity(::typeof($f), ::Val{1}) = true
-        linearity(::typeof($f), ::Val{1}) = Val{false}()
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((true, true, true))
+    end
+end
+
+for f in diadic_linear_true_true_false
+    @eval begin
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((true, true, false))
     end
 end
 
-for linearity_mask = 0:2^3-1
-    lin = Val{map(x->x!=0, (linearity_mask & 4,
-                            linearity_mask & 2,
-                            linearity_mask & 1))}()
+for f in diadic_linear_true_false_false
+    @eval begin
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((true, false, false))
+    end
+end
 
-    for f in diadic_of_linearity(lin)
-        @eval begin
-            haslinearity(::typeof($f), ::Val{2}) = true
-            linearity(::typeof($f), ::Val{2}) = $lin
-        end
+for f in diadic_linear_false_true_false
+    @eval begin
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((false, true, false))
+    end
+end
+
+for f in diadic_linear_false_false_false
+    @eval begin
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((false, false, false))
     end
 end
diff --git a/src/program_sparsity/linearity_special.jl b/src/program_sparsity/linearity_special.jl
new file mode 100644
index 00000000..c823fd30
--- /dev/null
+++ b/src/program_sparsity/linearity_special.jl
@@ -0,0 +1,29 @@
+const monadic_nonlinear_special = [airyai, airyaiprime, airybi, airybiprime, besselj0, besselj1, bessely0, bessely1, dawson, digamma, erf, erfc, erfcinv, erfi, erfinv, erfcx, gamma, invdigamma, lgamma, trigamma]
+
+const diadic_linear_false_false_false_special = [lbeta, beta]
+# const diadic_linear_true_false_true_special = [besselk, hankelh2, bessely, besselj, besseli, polygamma, hankelh1]
+
+for f in monadic_nonlinear_special
+    @eval begin
+        haslinearity(::typeof($f), ::Val{1}) = true
+        linearity(::typeof($f), ::Val{1}) = Val(false)
+    end
+end
+
+# linearity of a 2-arg function is:
+# Val((linear11, linear22, linear12))
+#
+# linearIJ refers to the zeroness of d^2/dxIxJ
+for f in diadic_linear_false_false_false_special
+    @eval begin
+        haslinearity(::typeof($f), ::Val{2}) = true
+        linearity(::typeof($f), ::Val{2}) = Val((false,false,false))
+    end
+end
+
+# for f in diadic_linear_true_false_true_special
+#     @eval begin
+#         haslinearity(::typeof($f), ::Val{2}) = true
+#         linearity(::typeof($f), ::Val{2}) = Val((true,false,true))
+#     end
+# end
diff --git a/test/program_sparsity/common.jl b/test/program_sparsity/common.jl
index 61de55a0..d6652425 100644
--- a/test/program_sparsity/common.jl
+++ b/test/program_sparsity/common.jl
@@ -1,11 +1,12 @@
 ### Rules
-using Cassette
-using SparseArrays
-import Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged
-using SparseDiffTools
-import SparseDiffTools: Path, BranchesPass, SparsityContext, Fixed,
-                        Input, Output, ProvinanceSet, Tainted, istainted,
-                        alldone, reset!, HessianSparsityContext
+using Cassette, SparseDiffTools
+using SparseArrays, Test
+
+using Cassette: tag, untag, Tagged, metadata, hasmetadata, istagged
+using SparseDiffTools: Path, BranchesPass, SparsityContext, Fixed,
+    Input, Output, ProvinanceSet, Tainted, istainted,
+    alldone, reset!, HessianSparsityContext
+using SparseDiffTools: TermCombination
 
 function tester(f, Y, X, args...; sparsity=Sparsity(length(Y), length(X)))
 
diff --git a/test/program_sparsity/hessian.jl b/test/program_sparsity/hessian.jl
index 71371d07..64f89a48 100644
--- a/test/program_sparsity/hessian.jl
+++ b/test/program_sparsity/hessian.jl
@@ -1,6 +1,3 @@
-import SparseDiffTools: TermCombination
-using Test
-
 Term(i...) = TermCombination(Set([Dict(j=>1 for j in i)]))
 
 @test htesttag(x->x, [1,2]) == Input()
diff --git a/test/runtests.jl b/test/runtests.jl
index 1aeb06dd..604c3a17 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,14 +1,11 @@
-using SparseDiffTools
-using Test
-
-
-@testset "Exact coloring via contraction" begin include("test_contraction.jl") end
-@testset "Greedy distance-1 coloring" begin include("test_greedy_d1.jl") end
-@testset "Greedy star coloring" begin include("test_greedy_star.jl") end
-@testset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end
-@testset "AD using color vector" begin include("test_ad.jl") end
-@testset "Integration test" begin include("test_integration.jl") end
-@testset "Special matrices" begin include("test_specialmatrices.jl") end
-@testset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end
-@testset "Program sparsity computation" begin include("program_sparsity/testall.jl") end
+using SafeTestsets
 
+@time @safetestset "Exact coloring via contraction" begin include("test_contraction.jl") end
+@time @safetestset "Greedy distance-1 coloring" begin include("test_greedy_d1.jl") end
+@time @safetestset "Greedy star coloring" begin include("test_greedy_star.jl") end
+@time @safetestset "Matrix to graph conversion" begin include("test_matrix2graph.jl") end
+@time @safetestset "AD using color vector" begin include("test_ad.jl") end
+@time @safetestset "Integration test" begin include("test_integration.jl") end
+@time @safetestset "Special matrices" begin include("test_specialmatrices.jl") end
+@time @safetestset "Jac Vecs and Hes Vecs" begin include("test_jaches_products.jl") end
+@time @safetestset "Program sparsity computation" begin include("program_sparsity/testall.jl") end
diff --git a/test/test_ad.jl b/test/test_ad.jl
index 113eb795..3055ce4f 100644
--- a/test/test_ad.jl
+++ b/test/test_ad.jl
@@ -1,5 +1,6 @@
-using SparseDiffTools, SparseArrays, Test
+using SparseDiffTools
 using ForwardDiff: Dual, jacobian
+using SparseArrays, Test
 
 fcalls = 0
 function f(dx,x)
diff --git a/test/test_contraction.jl b/test/test_contraction.jl
index 92d1311f..c8a2c162 100644
--- a/test/test_contraction.jl
+++ b/test/test_contraction.jl
@@ -1,7 +1,9 @@
 using SparseDiffTools
 using VertexSafeGraphs
 using LightGraphs
+
 using Random
+Random.seed!(123)
 
 test_graphs = Array{VSafeGraph, 1}(undef, 0)
 
diff --git a/test/test_greedy_d1.jl b/test/test_greedy_d1.jl
index 808ae56f..9f717477 100644
--- a/test/test_greedy_d1.jl
+++ b/test/test_greedy_d1.jl
@@ -1,7 +1,10 @@
 using SparseDiffTools
 using VertexSafeGraphs
 using LightGraphs
+using Test
+
 using Random
+Random.seed!(123)
 
 test_graphs = Array{VSafeGraph, 1}(undef, 0)
 
diff --git a/test/test_greedy_star.jl b/test/test_greedy_star.jl
index 807d5bea..a1e54757 100644
--- a/test/test_greedy_star.jl
+++ b/test/test_greedy_star.jl
@@ -1,7 +1,8 @@
 using SparseDiffTools
 using LightGraphs
-using Random
+using Test
 
+using Random
 Random.seed!(123)
 
 #= Test data =#
diff --git a/test/test_integration.jl b/test/test_integration.jl
index e258ad8b..5d694f99 100644
--- a/test/test_integration.jl
+++ b/test/test_integration.jl
@@ -1,7 +1,8 @@
-using SparseDiffTools, SparseArrays
-using LinearAlgebra
-using DiffEqDiffTools
-using Test
+using SparseDiffTools
+using DiffEqDiffTools: finite_difference_jacobian, finite_difference_jacobian!
+using Cassette
+
+using LinearAlgebra, SparseArrays, Test
 
 fcalls = 0
 function f(dx,x)
@@ -31,20 +32,20 @@ colors = matrix_colors(true_jac)
 
 #Jacobian computed without coloring vector
 fcalls = 0
-J = DiffEqDiffTools.finite_difference_jacobian(f, rand(30))
+J = finite_difference_jacobian(f, rand(30))
 @test J ≈ second_derivative_stencil(30)
 @test fcalls == 31
 
 #Jacobian computed with coloring vectors
 fcalls = 0
 _J = similar(true_jac)
-DiffEqDiffTools.finite_difference_jacobian!(_J, f, rand(30), color = colors)
+finite_difference_jacobian!(_J, f, rand(30), color = colors)
 @test fcalls == 4
 @test _J ≈ J
 
 fcalls = 0
 _J = similar(true_jac)
 _denseJ = collect(_J)
-DiffEqDiffTools.finite_difference_jacobian!(_denseJ, f, rand(30), color = colors, sparsity=_J)
+finite_difference_jacobian!(_denseJ, f, rand(30), color = colors, sparsity=_J)
 @test fcalls == 4
 @test _denseJ ≈ J
diff --git a/test/test_jaches_products.jl b/test/test_jaches_products.jl
index 536bddcc..cb42f660 100644
--- a/test/test_jaches_products.jl
+++ b/test/test_jaches_products.jl
@@ -1,115 +1,118 @@
-using ForwardDiff, SparseDiffTools, LinearAlgebra, DiffEqDiffTools,
-      IterativeSolvers, Test
-
-const A = rand(300,300)
-f(du,u) = mul!(du,A,u)
-f(u) = A*u
-x = rand(300)
-v = rand(300)
-du = similar(x)
-g(u) = sum(abs2,u)
-function h(x)
-      DiffEqDiffTools.finite_difference_gradient(g,x)
-end
-function h(dx,x)
-      DiffEqDiffTools.finite_difference_gradient!(dx,g,x)
-end
-
-cache1 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v)
-cache2 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v)
-@test num_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
-@test num_jacvec!(du, f, x, v, similar(v), similar(v)) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
-@test num_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
-
-@test auto_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
-@test auto_jacvec!(du, f, x, v, cache1, cache2) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
-@test auto_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
-
-@test num_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test num_hesvec!(du, g, x, v, similar(v), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test num_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-
-@test numauto_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test numauto_hesvec!(du, g, x, v, ForwardDiff.GradientConfig(g,x), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test numauto_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-
-@test autonum_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test autonum_hesvec!(du, g, x, v, similar(v), cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test autonum_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-
-@test numback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test numback_hesvec!(du, g, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test numback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-
-cache3 = ForwardDiff.Dual{Nothing}.(x, v)
-cache4 = ForwardDiff.Dual{Nothing}.(x, v)
-@test autoback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test autoback_hesvec!(du, g, x, v, cache3, cache4) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-@test autoback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
-
-@test num_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test num_hesvecgrad!(du, h, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test num_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-
-@test auto_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test auto_hesvecgrad!(du, h, x, v, cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-@test auto_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
-
-L = JacVec(f,x)
-@test L*x ≈ auto_jacvec(f, x, x)
-@test L*v ≈ auto_jacvec(f, x, v)
-@test mul!(du,L,v) ≈ auto_jacvec(f, x, v)
-L.u .= v
-@test mul!(du,L,v) ≈ auto_jacvec(f, v, v)
-
-L = JacVec(f,x,autodiff=false)
-@test L*x ≈ num_jacvec(f, x, x)
-@test L*v ≈ num_jacvec(f, x, v)
-L.u == x
-@test mul!(du,L,v) ≈ num_jacvec(f, x, v) rtol=1e-6
-L.u .= v
-@test mul!(du,L,v) ≈ num_jacvec(f, v, v) rtol=1e-6
-
-### Integration test with IterativeSolvers
-out = similar(v)
-gmres!(out, L, v)
-
-x = rand(300)
-v = rand(300)
-L = HesVec(g,x,autodiff=false)
-@test L*x ≈ num_hesvec(g, x, x)
-@test L*v ≈ num_hesvec(g, x, v)
-@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2
-L.u .= v
-@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2
-
-L = HesVec(g,x)
-@test L*x ≈ numauto_hesvec(g, x, x)
-@test L*v ≈ numauto_hesvec(g, x, v)
-@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8
-L.u .= v
-@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8
-
-### Integration test with IterativeSolvers
-out = similar(v)
-gmres!(out, L, v)
-
-x = rand(300)
-v = rand(300)
-L = HesVecGrad(h,x,autodiff=false)
-@test L*x ≈ num_hesvec(g, x, x)
-@test L*v ≈ num_hesvec(g, x, v)
-@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2
-L.u .= v
-@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2
-
-L = HesVecGrad(h,x,autodiff=true)
-@test L*x ≈ autonum_hesvec(g, x, x)
-@test L*v ≈ numauto_hesvec(g, x, v)
-@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8
-L.u .= v
-@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8
-
-### Integration test with IterativeSolvers
-out = similar(v)
-gmres!(out, L, v)
+using SparseDiffTools, ForwardDiff, DiffEqDiffTools, Zygote, IterativeSolvers
+using LinearAlgebra, Test
+
+using Random
+Random.seed!(123)
+
+const A = rand(300,300)
+f(du,u) = mul!(du,A,u)
+f(u) = A*u
+x = rand(300)
+v = rand(300)
+du = similar(x)
+g(u) = sum(abs2,u)
+function h(x)
+      DiffEqDiffTools.finite_difference_gradient(g,x)
+end
+function h(dx,x)
+      DiffEqDiffTools.finite_difference_gradient!(dx,g,x)
+end
+
+cache1 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v)
+cache2 = ForwardDiff.Dual{SparseDiffTools.DeivVecTag}.(x, v)
+@test num_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
+@test num_jacvec!(du, f, x, v, similar(v), similar(v)) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
+@test num_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v rtol=1e-6
+
+@test auto_jacvec!(du, f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
+@test auto_jacvec!(du, f, x, v, cache1, cache2) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
+@test auto_jacvec(f, x, v) ≈ ForwardDiff.jacobian(f,similar(x),x)*v
+
+@test num_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test num_hesvec!(du, g, x, v, similar(v), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test num_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+
+@test numauto_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test numauto_hesvec!(du, g, x, v, ForwardDiff.GradientConfig(g,x), similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test numauto_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+
+@test autonum_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test autonum_hesvec!(du, g, x, v, similar(v), cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test autonum_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+
+@test numback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test numback_hesvec!(du, g, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test numback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+
+cache3 = ForwardDiff.Dual{Nothing}.(x, v)
+cache4 = ForwardDiff.Dual{Nothing}.(x, v)
+@test autoback_hesvec!(du, g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test autoback_hesvec!(du, g, x, v, cache3, cache4) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+@test autoback_hesvec(g, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-8
+
+@test num_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test num_hesvecgrad!(du, h, x, v, similar(v), similar(v)) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test num_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+
+@test auto_hesvecgrad!(du, h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test auto_hesvecgrad!(du, h, x, v, cache1, cache2) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+@test auto_hesvecgrad(h, x, v) ≈ ForwardDiff.hessian(g,x)*v rtol=1e-2
+
+L = JacVec(f,x)
+@test L*x ≈ auto_jacvec(f, x, x)
+@test L*v ≈ auto_jacvec(f, x, v)
+@test mul!(du,L,v) ≈ auto_jacvec(f, x, v)
+L.u .= v
+@test mul!(du,L,v) ≈ auto_jacvec(f, v, v)
+
+L = JacVec(f,x,autodiff=false)
+@test L*x ≈ num_jacvec(f, x, x)
+@test L*v ≈ num_jacvec(f, x, v)
+L.u == x
+@test mul!(du,L,v) ≈ num_jacvec(f, x, v) rtol=1e-6
+L.u .= v
+@test mul!(du,L,v) ≈ num_jacvec(f, v, v) rtol=1e-6
+
+### Integration test with IterativeSolvers
+out = similar(v)
+gmres!(out, L, v)
+
+x = rand(300)
+v = rand(300)
+L = HesVec(g,x,autodiff=false)
+@test L*x ≈ num_hesvec(g, x, x)
+@test L*v ≈ num_hesvec(g, x, v)
+@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2
+L.u .= v
+@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2
+
+L = HesVec(g,x)
+@test L*x ≈ numauto_hesvec(g, x, x)
+@test L*v ≈ numauto_hesvec(g, x, v)
+@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8
+L.u .= v
+@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8
+
+### Integration test with IterativeSolvers
+out = similar(v)
+gmres!(out, L, v)
+
+x = rand(300)
+v = rand(300)
+L = HesVecGrad(h,x,autodiff=false)
+@test L*x ≈ num_hesvec(g, x, x)
+@test L*v ≈ num_hesvec(g, x, v)
+@test mul!(du,L,v) ≈ num_hesvec(g, x, v) rtol=1e-2
+L.u .= v
+@test mul!(du,L,v) ≈ num_hesvec(g, v, v) rtol=1e-2
+
+L = HesVecGrad(h,x,autodiff=true)
+@test L*x ≈ autonum_hesvec(g, x, x)
+@test L*v ≈ numauto_hesvec(g, x, v)
+@test mul!(du,L,v) ≈ numauto_hesvec(g, x, v) rtol=1e-8
+L.u .= v
+@test mul!(du,L,v) ≈ numauto_hesvec(g, v, v) rtol=1e-8
+
+### Integration test with IterativeSolvers
+out = similar(v)
+gmres!(out, L, v)