From 76ec61a4fd8c8e51ee8b27d481612e12e5334ffc Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Thu, 16 Feb 2017 13:29:57 -0500
Subject: [PATCH 01/26] add method for making single seed and improve
 performance of multi-seed constructor

---
 src/api_utils.jl | 7 +------
 src/partials.jl  | 6 +++++-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/api_utils.jl b/src/api_utils.jl
index 26839b41..5badd076 100644
--- a/src/api_utils.jl
+++ b/src/api_utils.jl
@@ -55,12 +55,7 @@ end
 ##################################
 
 @generated function construct_seeds{N,T}(::Type{Partials{N,T}})
-    ex = Expr(:tuple, [:(setindex(zero_partials, seed_unit, $i)) for i in 1:N]...)
-    return quote
-        seed_unit = one(T)
-        zero_partials = zero(Partials{$N,T})
-        return $ex
-    end
+    return Expr(:tuple, [:(single_seed(Partials{N,T}, Val{$i})) for i in 1:N]...)
 end
 
 function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x,
diff --git a/src/partials.jl b/src/partials.jl
index 03f87be2..8798ceeb 100644
--- a/src/partials.jl
+++ b/src/partials.jl
@@ -6,6 +6,11 @@ end
 # Utility/Accessor Functions #
 ##############################
 
+@generated function single_seed{N,T,i}(::Type{Partials{N,T}}, ::Type{Val{i}})
+    ex = Expr(:tuple, [ifelse(i === j, :(one(T)), :(zero(T))) for j in 1:N]...)
+    return :(Partials($(ex)))
+end
+
 @inline valtype{N,T}(::Partials{N,T}) = T
 @inline valtype{N,T}(::Type{Partials{N,T}}) = T
 
@@ -16,7 +21,6 @@ end
 @inline Base.size{N}(::Partials{N}) = (N,)
 
 @inline Base.getindex(partials::Partials, i::Int) = partials.values[i]
-setindex{N,T}(partials::Partials{N,T}, v, i) = Partials{N,T}((partials[1:i-1]..., v, partials[i+1:N]...))
 
 Base.start(partials::Partials) = start(partials.values)
 Base.next(partials::Partials, i) = next(partials.values, i)

From dbc90818ef314416c211837f0aa0065fe49c2449 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Thu, 16 Feb 2017 15:22:18 -0500
Subject: [PATCH 02/26] implement multi-arg form of derivative

---
 src/derivative.jl | 14 +++++++++++---
 src/dual.jl       |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/derivative.jl b/src/derivative.jl
index 3466e293..ba74770b 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -2,9 +2,14 @@
 # API methods #
 ###############
 
-derivative{F}(f::F, x) = extract_derivative(f(Dual(x, one(x))))
+derivative{F}(f::F, x::Real) = extract_derivative(f(Dual(x, one(x))))
 
-function derivative!{F}(out, f::F, x)
+@generated function derivative{F,N}(f::F, x::NTuple{N,Real})
+    args = [:(Dual(x[$i], Val{N}, Val{$i})) for i in 1:N]
+    return :(extract_derivative(f($(args...))))
+end
+
+function derivative!{F}(out, f::F, x::Real)
     y = f(Dual(x, one(x)))
     extract_derivative!(out, y)
     return out
@@ -14,7 +19,10 @@ end
 # result extraction #
 #####################
 
-@inline extract_derivative(y::Real) = partials(y, 1)
+@generated extract_derivative{N}(y::Dual{N}) = Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...)
+
+@inline extract_derivative(y::Dual{1}) = partials(y, 1)
+@inline extract_derivative(y::Real) = zero(y)
 @inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
 
 extract_derivative!(out::AbstractArray, y::AbstractArray) = map!(extract_derivative, out, y)
diff --git a/src/dual.jl b/src/dual.jl
index 8610bd0f..72d167a3 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -23,6 +23,7 @@ end
 Dual(value::Real, partials::Tuple) = Dual(value, Partials(partials))
 Dual(value::Real, partials::Tuple{}) = Dual(value, Partials{0,typeof(value)}(partials))
 Dual(value::Real, partials::Real...) = Dual(value, partials)
+Dual{T<:Real,N,i}(value::T, ::Type{Val{N}}, ::Type{Val{i}}) = Dual(value, single_seed(Partials{N,T}, Val{i}))
 
 ##############################
 # Utility/Accessor Functions #

From 2e8d016d534b9c482667fd9514fd8c00a4e89aa0 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 17 Feb 2017 08:50:44 -0500
Subject: [PATCH 03/26] refactor Dual to include a tag type parameter

This enables us to throw the proper error(s) in case of perturbation confusion
---
 src/dual.jl | 475 +++++++++++++++++++++++++---------------------------
 1 file changed, 228 insertions(+), 247 deletions(-)

diff --git a/src/dual.jl b/src/dual.jl
index 72d167a3..098e6b45 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -1,200 +1,158 @@
-const ExternalReal = Union{subtypes(Real)...}
-
 ########
 # Dual #
 ########
 
-immutable Dual{N,T<:Real} <: Real
-    value::T
-    partials::Partials{N,T}
+immutable Dual{T,V<:Real,N} <: Real
+    value::V
+    partials::Partials{N,V}
+end
+
+####################
+# TagMismatchError #
+####################
+
+immutable TagMismatchError{X,Y} <: Exception
+    x::Dual{X}
+    y::Dual{Y}
+end
+
+function Base.showerror{X,Y}(io::IO, e::TagMismatchError{X,Y})
+    print(io, "potential perturbation confusion detected when computing binary operation ",
+              "on $(e.x) and $(e.y) (tag $X != tag $Y). ForwardDiff cannot safely perform ",
+              "differentiation in this context; see the following issue for details: ",
+              "https://github.com/JuliaDiff/ForwardDiff.jl/issues/83")
 end
 
 ################
 # Constructors #
 ################
 
-Dual{N,T}(value::T, partials::Partials{N,T}) = Dual{N,T}(value, partials)
+(::Type{Dual{T}}){T,N,V}(value::V, partials::Partials{N,V}) = Dual{T,V,N}(value, partials)
 
-function Dual{N,A,B}(value::A, partials::Partials{N,B})
-    T = promote_type(A, B)
-    return Dual(convert(T, value), convert(Partials{N,T}, partials))
+function (::Type{Dual{T}}){T,N,A,B}(value::A, partials::Partials{N,B})
+    C = promote_type(A, B)
+    return Dual{T}(convert(C, value), convert(Partials{N,C}, partials))
 end
 
-Dual(value::Real, partials::Tuple) = Dual(value, Partials(partials))
-Dual(value::Real, partials::Tuple{}) = Dual(value, Partials{0,typeof(value)}(partials))
-Dual(value::Real, partials::Real...) = Dual(value, partials)
-Dual{T<:Real,N,i}(value::T, ::Type{Val{N}}, ::Type{Val{i}}) = Dual(value, single_seed(Partials{N,T}, Val{i}))
+(::Type{Dual{T}}){T}(value::Real, partials::Tuple) = Dual{T}(value, Partials(partials))
+(::Type{Dual{T}}){T}(value::Real, partials::Tuple{}) = Dual{T}(value, Partials{0,typeof(value)}(partials))
+(::Type{Dual{T}}){T}(value::Real, partials::Real...) = Dual{T}(value, partials)
+(::Type{Dual{T}}){T,V<:Real,N,i}(value::V, ::Type{Val{N}}, ::Type{Val{i}}) = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
+
+Dual(args...) = Dual{Void}(args...)
 
 ##############################
 # Utility/Accessor Functions #
 ##############################
 
 @inline value(x::Real) = x
-@inline value(n::Dual) = n.value
+@inline value(d::Dual) = d.value
 
 @inline partials(x::Real) = Partials{0,typeof(x)}(tuple())
-@inline partials(n::Dual) = n.partials
+@inline partials(d::Dual) = d.partials
 @inline partials(x::Real, i...) = zero(x)
-@inline partials(n::Dual, i) = n.partials[i]
-@inline partials(n::Dual, i, j) = partials(n, i).partials[j]
-@inline partials(n::Dual, i, j, k...) = partials(partials(n, i, j), k...)
-
-@inline npartials{N}(::Dual{N}) = N
-@inline npartials{N,T}(::Type{Dual{N,T}}) = N
-
-@inline degree{T}(::T) = degree(T)
-@inline degree{T}(::Type{T}) = 0
-degree{N,T}(::Type{Dual{N,T}}) = 1 + degree(T)
+@inline partials(d::Dual, i) = d.partials[i]
+@inline partials(d::Dual, i, j) = partials(d, i).partials[j]
+@inline partials(d::Dual, i, j, k...) = partials(partials(d, i, j), k...)
 
-@inline valtype{T}(::T) = T
-@inline valtype{T}(::Type{T}) = T
-@inline valtype{N,T}(::Dual{N,T}) = T
-@inline valtype{N,T}(::Type{Dual{N,T}}) = T
+@inline valtype{V}(::V) = V
+@inline valtype{V}(::Type{V}) = V
+@inline valtype{T,V,N}(::Dual{T,V,N}) = V
+@inline valtype{T,V,N}(::Type{Dual{T,V,N}}) = V
 
 #####################
 # Generic Functions #
 #####################
 
-macro ambiguous(ex)
-    def = ex.head == :macrocall ? ex.args[2] : ex
-    sig = def.args[1]
-    body = def.args[2]
-    f = isa(sig.args[1], Expr) && sig.args[1].head == :curly ? sig.args[1].args[1] : sig.args[1]
-    a, b = sig.args[2].args[1], sig.args[3].args[1]
-    Ta, Tb = sig.args[2].args[2], sig.args[3].args[2]
-    if isa(a, Symbol) && isa(b, Symbol) && isa(Ta, Symbol) && isa(Tb, Symbol)
-        if Ta == :Real && Tb == :Dual
-            return quote
-                @inline $(f){A<:ExternalReal,B<:Dual}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-                @inline $(f){M,A<:ExternalReal,B<:Dual}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-                @inline $(f){N,A<:ExternalReal,B<:Dual}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
-                @inline $(f){N,A<:ExternalReal,B<:Dual}($(a)::Dual{N,A}, $(b)::Dual{N,B}) = $(body)
-                @inline $(f){N,M,A<:ExternalReal,B<:Dual}($(a)::Dual{N,A}, $(b)::Dual{M,B}) = $(body)
-                $(esc(ex))
-            end
-        elseif Ta == :Dual && Tb == :Real
-            return quote
-                @inline $(f){A<:Dual,B<:ExternalReal}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-                @inline $(f){M,A<:Dual,B<:ExternalReal}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-                @inline $(f){N,A<:Dual,B<:ExternalReal}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
-                @inline $(f){N,A<:Dual,B<:ExternalReal}($(a)::Dual{N,A}, $(b)::Dual{N,B}) = $(body)
-                @inline $(f){N,M,A<:Dual,B<:ExternalReal}($(a)::Dual{N,A}, $(b)::Dual{M,B}) = $(body)
-                $(esc(ex))
-            end
-        else
-            return esc(ex)
-        end
-    end
-    return quote
-        @inline $(f){N,M,A<:Real,B<:Real}(a::Dual{N,A}, b::Dual{M,B}) = error("npartials($(typeof(a))) != npartials($(typeof(b)))")
-        if !(in($f, (isequal, ==, isless, <, <=, <)))
-            @inline $(f){A<:Real,B<:Real}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-            @inline $(f){M,A<:Real,B<:Real}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-            @inline $(f){N,A<:Real,B<:Real}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
-        end
-        $(esc(ex))
-    end
-end
+Base.copy(d::Dual) = d
 
-Base.copy(n::Dual) = n
-
-Base.eps(n::Dual) = eps(value(n))
+Base.eps(d::Dual) = eps(value(d))
 Base.eps{D<:Dual}(::Type{D}) = eps(valtype(D))
 
-Base.rtoldefault{N, T <: Real}(::Type{Dual{N,T}}) = Base.rtoldefault(T)
+Base.rtoldefault{D<:Dual}(::Type{D}) = Base.rtoldefault(valtype(D))
 
-Base.floor{T<:Real}(::Type{T}, n::Dual) = floor(T, value(n))
-Base.floor(n::Dual) = floor(value(n))
+Base.floor{R<:Real}(::Type{R}, d::Dual) = floor(R, value(d))
+Base.floor(d::Dual) = floor(value(d))
 
-Base.ceil{T<:Real}(::Type{T}, n::Dual) = ceil(T, value(n))
-Base.ceil(n::Dual) = ceil(value(n))
+Base.ceil{R<:Real}(::Type{R}, d::Dual) = ceil(R, value(d))
+Base.ceil(d::Dual) = ceil(value(d))
 
-Base.trunc{T<:Real}(::Type{T}, n::Dual) = trunc(T, value(n))
-Base.trunc(n::Dual) = trunc(value(n))
+Base.trunc{R<:Real}(::Type{R}, d::Dual) = trunc(R, value(d))
+Base.trunc(d::Dual) = trunc(value(d))
 
-Base.round{T<:Real}(::Type{T}, n::Dual) = round(T, value(n))
-Base.round(n::Dual) = round(value(n))
+Base.round{R<:Real}(::Type{R}, d::Dual) = round(R, value(d))
+Base.round(d::Dual) = round(value(d))
 
-Base.hash(n::Dual) = hash(value(n))
-Base.hash(n::Dual, hsh::UInt64) = hash(value(n), hsh)
+Base.hash(d::Dual) = hash(value(d))
+Base.hash(d::Dual, hsh::UInt64) = hash(value(d), hsh)
 
-function Base.read{N,T}(io::IO, ::Type{Dual{N,T}})
-    value = read(io, T)
-    partials = read(io, Partials{N,T})
-    return Dual{N,T}(value, partials)
+function Base.read{T,V,N}(io::IO, ::Type{Dual{T,V,N}})
+    value = read(io, V)
+    partials = read(io, Partials{N,V})
+    return Dual{T,N,V}(value, partials)
 end
 
-function Base.write(io::IO, n::Dual)
-    write(io, value(n))
-    write(io, partials(n))
+function Base.write(io::IO, d::Dual)
+    write(io, value(d))
+    write(io, partials(d))
 end
 
-@inline Base.zero(n::Dual) = zero(typeof(n))
-@inline Base.zero{N,T}(::Type{Dual{N,T}}) = Dual(zero(T), zero(Partials{N,T}))
+@inline Base.zero(d::Dual) = zero(typeof(d))
+@inline Base.zero{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(zero(V), zero(Partials{N,V}))
 
-@inline Base.one(n::Dual) = one(typeof(n))
-@inline Base.one{N,T}(::Type{Dual{N,T}}) = Dual(one(T), zero(Partials{N,T}))
+@inline Base.one(d::Dual) = one(typeof(d))
+@inline Base.one{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(one(V), zero(Partials{N,V}))
 
-@inline Base.rand(n::Dual) = rand(typeof(n))
-@inline Base.rand{N,T}(::Type{Dual{N,T}}) = Dual(rand(T), zero(Partials{N,T}))
-@inline Base.rand(rng::AbstractRNG, n::Dual) = rand(rng, typeof(n))
-@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{Dual{N,T}}) = Dual(rand(rng, T), zero(Partials{N,T}))
+@inline Base.rand(d::Dual) = rand(typeof(d))
+@inline Base.rand{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(rand(V), zero(Partials{N,V}))
+@inline Base.rand(rng::AbstractRNG, d::Dual) = rand(rng, typeof(d))
+@inline Base.rand{T,V,N}(rng::AbstractRNG, ::Type{Dual{T,V,N}}) = Dual{T}(rand(rng, V), zero(Partials{N,V}))
 
 # Predicates #
 #------------#
 
-isconstant(n::Dual) = iszero(partials(n))
-
-@ambiguous Base.isequal{N}(a::Dual{N}, b::Dual{N}) = isequal(value(a), value(b))
-@ambiguous Base.:(==){N}(a::Dual{N}, b::Dual{N}) = value(a) == value(b)
-@ambiguous Base.isless{N}(a::Dual{N}, b::Dual{N}) = value(a) < value(b)
-@ambiguous Base.:<{N}(a::Dual{N}, b::Dual{N}) = isless(a, b)
-@ambiguous Base.:(<=){N}(a::Dual{N}, b::Dual{N}) = <=(value(a), value(b))
-
-for T in (AbstractFloat, Irrational, Real)
-    Base.isequal(n::Dual, x::T) = isequal(value(n), x)
-    Base.isequal(x::T, n::Dual) = isequal(n, x)
-
-    Base.:(==)(n::Dual, x::T) = (value(n) == x)
-    Base.:(==)(x::T, n::Dual) = ==(n, x)
+isconstant(d::Dual) = iszero(partials(d))
 
-    Base.isless(n::Dual, x::T) = value(n) < x
-    Base.isless(x::T, n::Dual) = x < value(n)
-
-    Base.:<(n::Dual, x::T) = isless(n, x)
-    Base.:<(x::T, n::Dual) = isless(x, n)
-
-    Base.:(<=)(n::Dual, x::T) = <=(value(n), x)
-    Base.:(<=)(x::T, n::Dual) = <=(x, value(n))
+for pred in (:isequal, :(==), :isless, :(<=), :<)
+    @eval begin
+        Base.$(pred)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+        Base.$(pred){T}(x::Dual{T}, y::Dual{T}) = $(pred)(value(x), value(y))
+        Base.$(pred){X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = $(pred)(x, value(y))
+        Base.$(pred){X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = $(pred)(value(x), y)
+    end
+    for R in (:AbstractFloat, :Irrational, :Real)
+        @eval begin
+            Base.$(pred)(x::Dual, y::$R) = $(pred)(value(x), y)
+            Base.$(pred)(x::$R, y::Dual) = $(pred)(x, value(y))
+        end
+    end
 end
 
-Base.isnan(n::Dual) = isnan(value(n))
-Base.isfinite(n::Dual) = isfinite(value(n))
-Base.isinf(n::Dual) = isinf(value(n))
-Base.isreal(n::Dual) = isreal(value(n))
-Base.isinteger(n::Dual) = isinteger(value(n))
-Base.iseven(n::Dual) = iseven(value(n))
-Base.isodd(n::Dual) = isodd(value(n))
+Base.isnan(d::Dual) = isnan(value(d))
+Base.isfinite(d::Dual) = isfinite(value(d))
+Base.isinf(d::Dual) = isinf(value(d))
+Base.isreal(d::Dual) = isreal(value(d))
+Base.isinteger(d::Dual) = isinteger(value(d))
+Base.iseven(d::Dual) = iseven(value(d))
+Base.isodd(d::Dual) = isodd(value(d))
 
 ########################
 # Promotion/Conversion #
 ########################
 
-Base.promote_rule{N1,N2,A<:Real,B<:Real}(D1::Type{Dual{N1,A}}, D2::Type{Dual{N2,B}}) = error("can't promote $(D1) and $(D2)")
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{Dual{N,A}}, ::Type{Dual{N,B}}) = Dual{N,promote_type(A, B)}
-Base.promote_rule{N,T<:Real}(::Type{Dual{N,T}}, ::Type{BigFloat}) = Dual{N,promote_type(T, BigFloat)}
-Base.promote_rule{N,T<:Real}(::Type{BigFloat}, ::Type{Dual{N,T}}) = Dual{N,promote_type(BigFloat, T)}
-Base.promote_rule{N,T<:Real}(::Type{Dual{N,T}}, ::Type{Bool}) = Dual{N,promote_type(T, Bool)}
-Base.promote_rule{N,T<:Real}(::Type{Bool}, ::Type{Dual{N,T}}) = Dual{N,promote_type(Bool, T)}
-Base.promote_rule{N,T<:Real,s}(::Type{Dual{N,T}}, ::Type{Irrational{s}}) = Dual{N,promote_type(T, Irrational{s})}
-Base.promote_rule{N,s,T<:Real}(::Type{Irrational{s}}, ::Type{Dual{N,T}}) = Dual{N,promote_type(Irrational{s}, T)}
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{Dual{N,A}}, ::Type{B}) = Dual{N,promote_type(A, B)}
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{A}, ::Type{Dual{N,B}}) = Dual{N,promote_type(A, B)}
-
-Base.convert(::Type{Dual}, n::Dual) = n
-Base.convert{N,T<:Real}(::Type{Dual{N,T}}, n::Dual{N}) = Dual(convert(T, value(n)), convert(Partials{N,T}, partials(n)))
-Base.convert{D<:Dual}(::Type{D}, n::D) = n
-Base.convert{N,T<:Real}(::Type{Dual{N,T}}, x::Real) = Dual(convert(T, x), zero(Partials{N,T}))
+Base.promote_rule{T,A<:Real,B<:Real,N}(::Type{Dual{T,A,N}}, ::Type{Dual{T,B,N}}) = Dual{T,promote_type(A, B),N}
+
+for R in (:BigFloat, :Bool, :Irrational, :Real)
+    @eval begin
+        Base.promote_rule{R<:$R,T,V<:Real,N}(::Type{R}, ::Type{Dual{T,V,N}}) = Dual{T,promote_type(R, V),N}
+        Base.promote_rule{T,V<:Real,N,R<:$R}(::Type{Dual{T,V,N}}, ::Type{R}) = Dual{T,promote_type(V, R),N}
+    end
+end
+
+Base.convert(::Type{Dual}, d::Dual) = d
+Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, d::Dual{T}) = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
+Base.convert{D<:Dual}(::Type{D}, n::D) = d
+Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, x::Real) = Dual{T}(V(x), zero(Partials{N,V}))
 Base.convert(::Type{Dual}, x::Real) = Dual(x)
 
 Base.promote_array_type{D<:Dual, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = promote_type(D, A)
@@ -202,7 +160,8 @@ Base.promote_array_type{D<:Dual, A<:AbstractFloat, P}(F, ::Type{D}, ::Type{A}, :
 Base.promote_array_type{A<:AbstractFloat, D<:Dual}(F, ::Type{A}, ::Type{D}) = promote_type(D, A)
 Base.promote_array_type{A<:AbstractFloat, D<:Dual, P}(F, ::Type{A}, ::Type{D}, ::Type{P}) = P
 
-Base.float{N,T}(n::Dual{N,T}) = Dual{N,promote_type(T, Float16)}(n)
+Base.float{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d)
+Base.AbstractFloat{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d)
 
 ########
 # Math #
@@ -211,76 +170,113 @@ Base.float{N,T}(n::Dual{N,T}) = Dual{N,promote_type(T, Float16)}(n)
 # Addition/Subtraction #
 #----------------------#
 
-@ambiguous @inline Base.:+{N}(n1::Dual{N}, n2::Dual{N}) = Dual(value(n1) + value(n2), partials(n1) + partials(n2))
-@ambiguous @inline Base.:+(n::Dual, x::Real) = Dual(value(n) + x, partials(n))
-@ambiguous @inline Base.:+(x::Real, n::Dual) = n + x
+@inline Base.:+(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+@inline Base.:+{T}(x::Dual{T}, y::Dual{T}) = Dual{T}(value(x) + value(y), partials(x) + partials(y))
+@inline Base.:+{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) + y, partials(x))
+@inline Base.:+{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) + y, partials(x))
+@inline Base.:+{T}(x::Real, y::Dual{T}) = Dual{T}(x + value(y), partials(y))
+@inline Base.:+{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x + value(y), partials(y))
 
-@ambiguous @inline Base.:-{N}(n1::Dual{N}, n2::Dual{N}) = Dual(value(n1) - value(n2), partials(n1) - partials(n2))
-@ambiguous @inline Base.:-(n::Dual, x::Real) = Dual(value(n) - x, partials(n))
-@ambiguous @inline Base.:-(x::Real, n::Dual) = Dual(x - value(n), -(partials(n)))
-@inline Base.:-(n::Dual) = Dual(-(value(n)), -(partials(n)))
+@inline Base.:-(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+@inline Base.:-{T}(x::Dual{T}, y::Dual{T}) = Dual{T}(value(x) - value(y), partials(x) - partials(y))
+@inline Base.:-{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) - y, partials(x))
+@inline Base.:-{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) - y, partials(x))
+@inline Base.:-{T}(x::Real, y::Dual{T}) = Dual{T}(x - value(y), -partials(y))
+@inline Base.:-{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x - value(y), -partials(y))
+
+@inline Base.:-(d::Dual) = Dual(-value(d), -partials(d))
 
 # Multiplication #
 #----------------#
 
-@inline Base.:*(n::Dual, x::Bool) = x ? n : (signbit(value(n))==0 ? zero(n) : -zero(n))
-@inline Base.:*(x::Bool, n::Dual) = n * x
+@inline Base.:*(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
 
-@ambiguous @inline function Base.:*{N}(n1::Dual{N}, n2::Dual{N})
-    v1, v2 = value(n1), value(n2)
-    return Dual(v1 * v2, _mul_partials(partials(n1), partials(n2), v2, v1))
+@inline function Base.:*{T}(x::Dual{T}, y::Dual{T})
+    vx, vy = value(x), value(y)
+    return Dual{T}(vx * vy, _mul_partials(partials(x), partials(y), vy, vx))
 end
 
-@ambiguous @inline Base.:*(n::Dual, x::Real) = Dual(value(n) * x, partials(n) * x)
-@ambiguous @inline Base.:*(x::Real, n::Dual) = n * x
+@inline Base.:*{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) * y, partials(x) * y)
+@inline Base.:*{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) * y, partials(x) * y)
+
+@inline Base.:*{T}(x::Real, y::Dual{T}) = Dual{T}(x * value(y), x * partials(y))
+@inline Base.:*{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x * value(y), x * partials(y))
+
+@inline Base.:*(d::Dual, x::Bool) = x ? d : (signbit(value(d))==0 ? zero(d) : -zero(d))
+@inline Base.:*(x::Bool, d::Dual) = d * x
 
 # Division #
 #----------#
 
-@ambiguous @inline function Base.:/{N}(n1::Dual{N}, n2::Dual{N})
-    v1, v2 = value(n1), value(n2)
-    return Dual(v1 / v2, _div_partials(partials(n1), partials(n2), v1, v2))
+@inline Base.:/(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+
+@inline function Base.:/{T}(x::Dual{T}, y::Dual{T})
+    vx, vy = value(x), value(y)
+    return Dual{T}(vx / vy, _div_partials(partials(x), partials(y), vx, vy))
 end
 
-@ambiguous @inline function Base.:/(x::Real, n::Dual)
-    v = value(n)
+@inline Base.:/{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) / y, partials(x) / y)
+@inline Base.:/{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) / y, partials(x) / y)
+
+@inline function Base.:/{T}(x::Real, y::Dual{T})
+    v = value(y)
     divv = x / v
-    return Dual(divv, -(divv / v) * partials(n))
+    return Dual{T}(divv, -(divv / v) * partials(y))
 end
 
-@ambiguous @inline Base.:/(n::Dual, x::Real) = Dual(value(n) / x, partials(n) / x)
+@inline function Base.:/{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}})
+    v = value(y)
+    divv = x / v
+    return Dual{Y}(divv, -(divv / v) * partials(y))
+end
 
 # Exponentiation #
 #----------------#
 
 for f in (:(Base.:^), :(NaNMath.pow))
     @eval begin
-        @ambiguous @inline function ($f){N}(n1::Dual{N}, n2::Dual{N})
-            v1, v2 = value(n1), value(n2)
-            expv = ($f)(v1, v2)
-            powval = v2 * ($f)(v1, v2 - 1)
-            logval = isconstant(n2) ? one(expv) : expv * log(v1)
-            new_partials = _mul_partials(partials(n1), partials(n2), powval, logval)
-            return Dual(expv, new_partials)
+        @inline ($f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+
+        @inline function ($f){T}(x::Dual{T}, y::Dual{T})
+            vx, vy = value(x), value(y)
+            expv = ($f)(vx, vy)
+            powval = vy * ($f)(vx, vy - 1)
+            logval = isconstant(y) ? one(expv) : expv * log(vx)
+            new_partials = _mul_partials(partials(x), partials(y), powval, logval)
+            return Dual{T}(expv, new_partials)
+        end
+
+        @inline function ($f){X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y})
+            v = value(x)
+            expv = ($f)(v, y)
+            deriv = y * ($f)(v, y - 1)
+            return Dual{X}(expv, deriv * partials(x))
+        end
+
+        @inline function ($f){X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}})
+            v = value(y)
+            expv = ($f)(x, v)
+            deriv = expv*log(x)
+            return Dual{Y}(expv, deriv * partials(y))
         end
 
-        @inline ($f)(::Base.Irrational{:e}, n::Dual) = exp(n)
+        @inline ($f)(::Base.Irrational{:e}, d::Dual) = exp(d)
     end
 
-    for T in (:Integer, :Rational, :Real)
+    for R in (:Integer, :Rational, :Real)
         @eval begin
-            @ambiguous @inline function ($f)(n::Dual, x::$(T))
-                v = value(n)
-                expv = ($f)(v, x)
-                deriv = x * ($f)(v, x - 1)
-                return Dual(expv, deriv * partials(n))
+            @inline function ($f){T}(x::Dual{T}, y::$R)
+                v = value(x)
+                expv = ($f)(v, y)
+                deriv = y * ($f)(v, y - 1)
+                return Dual{T}(expv, deriv * partials(x))
             end
 
-            @ambiguous @inline function ($f)(x::$(T), n::Dual)
-                v = value(n)
+            @inline function ($f){T}(x::$R, y::Dual{T})
+                v = value(y)
                 expv = ($f)(x, v)
                 deriv = expv*log(x)
-                return Dual(expv, deriv * partials(n))
+                return Dual{T}(expv, deriv * partials(y))
             end
         end
     end
@@ -291,19 +287,21 @@ end
 
 function to_nanmath(x::Expr)
     if x.head == :call
-        funsym = Expr(:.,:NaNMath,Base.Meta.quot(x.args[1]))
-        return Expr(:call,funsym,[to_nanmath(z) for z in x.args[2:end]]...)
+        funsym = Expr(:., :NaNMath, Base.Meta.quot(x.args[1]))
+        return Expr(:call, funsym, [to_nanmath(z) for z in x.args[2:end]]...)
     else
-        return Expr(:call,[to_nanmath(z) for z in x.args]...)
+        return Expr(:call, [to_nanmath(z) for z in x.args]...)
     end
 end
 
 to_nanmath(x) = x
 
-@inline Base.conj(n::Dual) = n
-@inline Base.transpose(n::Dual) = n
-@inline Base.ctranspose(n::Dual) = n
-@inline Base.abs(n::Dual) = signbit(value(n)) ? -n : n
+@inline Base.conj(d::Dual) = d
+@inline Base.transpose(d::Dual) = d
+@inline Base.ctranspose(d::Dual) = d
+@inline Base.abs(d::Dual) = signbit(value(d)) ? -d : d
+
+
 
 for fsym in AUTO_DEFINED_UNARY_FUNCS
     v = :v
@@ -311,20 +309,15 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
 
     # exp and sqrt are manually defined below
     if !(in(fsym, (:exp, :sqrt)))
+        funcs = Vector{Expr}(0)
         is_special_function = in(fsym, SPECIAL_FUNCS)
-        if is_special_function
-            @eval begin
-                @inline function SpecialFunctions.$(fsym)(n::Dual)
-                    $(v) = value(n)
-                    return Dual(SpecialFunctions.$(fsym)($v), $(deriv) * partials(n))
-                end
-            end
-        end
-        if !(is_special_function) || VERSION < v"0.6.0-dev.2767"
+        is_special_function && push!(funcs, :(SpecialFunctions.$(fsym)))
+        (!(is_special_function) || VERSION < v"0.6.0-dev.2767") && push!(funcs, :(Base.$(fsym)))
+        for func in funcs
             @eval begin
-                @inline function Base.$(fsym)(n::Dual)
-                    $(v) = value(n)
-                    return Dual(Base.$(fsym)($v), $(deriv) * partials(n))
+                @inline function $(func){T}(d::Dual{T})
+                    $(v) = value(d)
+                    return Dual{T}($(func)($v), $(deriv) * partials(d))
                 end
             end
         end
@@ -334,9 +327,9 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
     if fsym in NANMATH_FUNCS
         nan_deriv = to_nanmath(deriv)
         @eval begin
-            @inline function NaNMath.$(fsym)(n::Dual)
-                v = value(n)
-                return Dual(NaNMath.$(fsym)($v), $(nan_deriv) * partials(n))
+            @inline function NaNMath.$(fsym){T}(d::Dual{T})
+                v = value(d)
+                return Dual{T}(NaNMath.$(fsym)($v), $(nan_deriv) * partials(d))
             end
         end
     end
@@ -349,67 +342,55 @@ end
 # Manually Optimized Functions #
 #------------------------------#
 
-@inline function Base.exp{N}(n::Dual{N})
-    expv = exp(value(n))
-    return Dual(expv, expv * partials(n))
+@inline function Base.exp{T}(d::Dual{T})
+    expv = exp(value(d))
+    return Dual{T}(expv, expv * partials(d))
 end
 
-@inline function Base.sqrt{N}(n::Dual{N})
-    sqrtv = sqrt(value(n))
+@inline function Base.sqrt{T}(d::Dual{T})
+    sqrtv = sqrt(value(d))
     deriv = inv(sqrtv + sqrtv)
-    return Dual(sqrtv, deriv * partials(n))
+    return Dual{T}(sqrtv, deriv * partials(d))
 end
 
-@inline function calc_hypot(x, y)
-    vx = value(x)
-    vy = value(y)
-    h = hypot(vx, vy)
-    return Dual(h, (vx/h) * partials(x) + (vy/h) * partials(y))
-end
+# Other Functions #
+#-----------------#
 
-@inline function calc_hypot(x, y, z)
+@inline function calc_hypot{T}(x, y, ::Type{T})
     vx = value(x)
     vy = value(y)
-    vz = value(z)
-    h = hypot(vx, vy, vz)
-    return Dual(h, (vx/h) * partials(x) + (vy/h) * partials(y) + (vz/h) * partials(z))
+    h = hypot(vx, vy)
+    return Dual{T}(h, (vx/h) * partials(x) + (vy/h) * partials(y))
 end
 
-@ambiguous @inline Base.hypot{N}(x::Dual{N}, y::Dual{N}) = calc_hypot(x, y)
-@ambiguous @inline Base.hypot(x::Dual, y::Real) = calc_hypot(x, y)
-@ambiguous @inline Base.hypot(x::Real, y::Dual) = calc_hypot(x, y)
-
-@inline Base.hypot(x::Dual, y::Dual, z::Dual) = calc_hypot(x, y, z)
-
-@inline Base.hypot(x::Real, y::Dual, z::Dual) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Dual, y::Real, z::Dual) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Dual, y::Dual, z::Real) = calc_hypot(x, y, z)
+@inline Base.hypot(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+@inline Base.hypot{T}(x::Dual{T}, y::Dual{T}) = calc_hypot(x, y, T)
+@inline Base.hypot{T}(x::Dual{T}, y::Real) = calc_hypot(x, y, T)
+@inline Base.hypot{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = calc_hypot(x, y, X)
+@inline Base.hypot{T}(x::Real, y::Dual{T}) = calc_hypot(x, y, T)
+@inline Base.hypot{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = calc_hypot(x, y, Y)
 
-@inline Base.hypot(x::Dual, y::Real, z::Real) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Real, y::Dual, z::Real) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Real, y::Real, z::Dual) = calc_hypot(x, y, z)
+@inline sincos(x) = (sin(x), cos(x))
 
-@inline sincos(n) = (sin(n), cos(n))
-
-@inline function sincos(n::Dual)
-    sn, cn = sincos(value(n))
-    return (Dual(sn, cn * partials(n)), Dual(cn, -sn * partials(n)))
+@inline function sincos{T}(d::Dual{T})
+    sd, cd = sincos(value(d))
+    return (Dual{T}(sd, cd * partials(d)), Dual{T}(cd, -sd * partials(d)))
 end
 
-# Other Functions #
-#-----------------#
-
-@inline function calc_atan2(y, x)
+@inline function calc_atan2{T}(y, x, ::Type{T})
     z = y / x
     v = value(z)
     atan2v = atan2(value(y), value(x))
     deriv = inv(one(v) + v*v)
-    return Dual(atan2v, deriv * partials(z))
+    return Dual{T}(atan2v, deriv * partials(z))
 end
 
-@ambiguous @inline Base.atan2{N}(y::Dual{N}, x::Dual{N}) = calc_atan2(y, x)
-@ambiguous @inline Base.atan2(y::Real, x::Dual) = calc_atan2(y, x)
-@ambiguous @inline Base.atan2(y::Dual, x::Real) = calc_atan2(y, x)
+@inline Base.atan2(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+@inline Base.atan2{T}(x::Dual{T}, y::Dual{T}) = calc_atan2(x, y, T)
+@inline Base.atan2{T}(x::Dual{T}, y::Real) = calc_atan2(x, y, T)
+@inline Base.atan2{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = calc_atan2(x, y, X)
+@inline Base.atan2{T}(x::Real, y::Dual{T}) = calc_atan2(x, y, T)
+@inline Base.atan2{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = calc_atan2(x, y, Y)
 
 @generated function Base.fma{N}(x::Dual{N}, y::Dual{N}, z::Dual{N})
     ex = Expr(:tuple, [:(fma(value(x), partials(y)[$i], fma(value(y), partials(x)[$i], partials(z)[$i]))) for i in 1:N]...)
@@ -452,10 +433,10 @@ end
 # Pretty Printing #
 ###################
 
-function Base.show{N}(io::IO, n::Dual{N})
-    print(io, "Dual(", value(n))
+function Base.show{T,V,N}(io::IO, d::Dual{T,V,N})
+    print(io, "Dual{$T}(", value(d))
     for i in 1:N
-        print(io, ",", partials(n, i))
+        print(io, ",", partials(d, i))
     end
     print(io, ")")
 end

From 672d9aad9eacfa262d065a95e09a1d43034a7176 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Feb 2017 11:51:02 -0500
Subject: [PATCH 04/26] unroll config type definitions for readability

---
 src/config.jl | 87 ++++++++++++++++++++++++++++++---------------------
 src/dual.jl   |  2 --
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/src/config.jl b/src/config.jl
index fc4be998..9911e574 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -1,40 +1,49 @@
 @compat abstract type AbstractConfig end
 
-###########
-# Config #
-###########
-
 @inline chunksize(::Tuple{}) = error("empty tuple passed to `chunksize`")
 
-# Define a few different AbstractConfig types. All these types share the same structure,
-# but feature different constructors and dispatch restrictions in downstream code.
-for Config in (:GradientConfig, :JacobianConfig)
-    @eval begin
-        @compat immutable $Config{N,T,D} <: AbstractConfig
-            seeds::NTuple{N,Partials{N,T}}
-            duals::D
-            # disable default outer constructor
-            (::Type{$Config{N,T,D}}){N,T,D}(seeds, duals) = new{N,T,D}(seeds, duals)
-        end
-
-        # This is type-unstable, which is why our docs advise users to manually enter a chunk size
-        # when possible. The type instability here doesn't really hurt performance, since most of
-        # the heavy lifting happens behind a function barrier, but it can cause inference to give up
-        # when predicting the final output type of API functions.
-        $Config(x::AbstractArray) = $Config{pickchunksize(length(x))}(x)
-
-        function (::Type{$Config{N}}){N,T}(x::AbstractArray{T})
-            seeds = construct_seeds(Partials{N,T})
-            duals = similar(x, Dual{N,T})
-            return $Config{N,T,typeof(duals)}(seeds, duals)
-        end
-
-        Base.copy{N,T,D}(cfg::$Config{N,T,D}) = $Config{N,T,D}(cfg.seeds, copy(cfg.duals))
-        Base.copy{N,T,D<:Tuple}(cfg::$Config{N,T,D}) = $Config{N,T,D}(cfg.seeds, map(copy, cfg.duals))
-
-        @inline chunksize{N}(::$Config{N}) = N
-        @inline chunksize{N}(::Tuple{Vararg{$Config{N}}}) = N
-    end
+##################
+# GradientConfig #
+##################
+
+@compat immutable GradientConfig{N,V,D} <: AbstractConfig
+    seeds::NTuple{N,Partials{N,V}}
+    duals::D
+    # disable default outer constructor
+    (::Type{GradientConfig{N,V,D}}){N,V,D}(seeds, duals) = new{N,V,D}(seeds, duals)
+end
+
+GradientConfig(x::AbstractArray) = GradientConfig{pickchunksize(length(x))}(x)
+
+function (::Type{GradientConfig{N}}){N,V}(x::AbstractArray{V})
+    seeds = construct_seeds(Partials{N,V})
+    duals = similar(x, Dual{N,V})
+    return GradientConfig{N,V,typeof(duals)}(seeds, duals)
+end
+
+Base.copy{N,V,D}(cfg::GradientConfig{N,V,D}) = GradientConfig{N,V,D}(cfg.seeds, copy(cfg.duals))
+Base.copy{N,V,D<:Tuple}(cfg::GradientConfig{N,V,D}) = GradientConfig{N,V,D}(cfg.seeds, map(copy, cfg.duals))
+
+@inline chunksize{N}(::GradientConfig{N}) = N
+@inline chunksize{N}(::Tuple{Vararg{GradientConfig{N}}}) = N
+
+##################
+# JacobianConfig #
+##################
+
+@compat immutable JacobianConfig{N,V,D} <: AbstractConfig
+    seeds::NTuple{N,Partials{N,V}}
+    duals::D
+    # disable default outer constructor
+    (::Type{JacobianConfig{N,V,D}}){N,V,D}(seeds, duals) = new{N,V,D}(seeds, duals)
+end
+
+JacobianConfig(x::AbstractArray) = JacobianConfig{pickchunksize(length(x))}(x)
+
+function (::Type{JacobianConfig{N}}){N,V}(x::AbstractArray{V})
+    seeds = construct_seeds(Partials{N,V})
+    duals = similar(x, Dual{N,V})
+    return JacobianConfig{N,V,typeof(duals)}(seeds, duals)
 end
 
 JacobianConfig(y::AbstractArray, x::AbstractArray) = JacobianConfig{pickchunksize(length(x))}(y, x)
@@ -47,9 +56,15 @@ function (::Type{JacobianConfig{N}}){N,Y,X}(y::AbstractArray{Y}, x::AbstractArra
     return JacobianConfig{N,X,typeof(duals)}(seeds, duals)
 end
 
-##################
+Base.copy{N,T,D}(cfg::JacobianConfig{N,T,D}) = JacobianConfig{N,T,D}(cfg.seeds, copy(cfg.duals))
+Base.copy{N,T,D<:Tuple}(cfg::JacobianConfig{N,T,D}) = JacobianConfig{N,T,D}(cfg.seeds, map(copy, cfg.duals))
+
+@inline chunksize{N}(::JacobianConfig{N}) = N
+@inline chunksize{N}(::Tuple{Vararg{JacobianConfig{N}}}) = N
+
+#################
 # HessianConfig #
-##################
+#################
 
 immutable HessianConfig{N,J,JD,G,GD} <: AbstractConfig
     gradient_config::GradientConfig{N,G,GD}
@@ -73,7 +88,7 @@ function (::Type{HessianConfig{N}}){N}(out::DiffResult, x::AbstractArray)
 end
 
 Base.copy(cfg::HessianConfig) = HessianConfig(copy(cfg.gradient_config),
-                                                 copy(cfg.jacobian_config))
+                                              copy(cfg.jacobian_config))
 
 @inline chunksize{N}(::HessianConfig{N}) = N
 @inline chunksize{N}(::Tuple{Vararg{HessianConfig{N}}}) = N
diff --git a/src/dual.jl b/src/dual.jl
index 098e6b45..3818e203 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -301,8 +301,6 @@ to_nanmath(x) = x
 @inline Base.ctranspose(d::Dual) = d
 @inline Base.abs(d::Dual) = signbit(value(d)) ? -d : d
 
-
-
 for fsym in AUTO_DEFINED_UNARY_FUNCS
     v = :v
     deriv = Calculus.differentiate(:($(fsym)($v)), v)

From acd73c15b668d035cd20ec5fbe1145785ea4d7d1 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Feb 2017 15:11:21 -0500
Subject: [PATCH 05/26] fix binary Dual ambiguities

---
 src/ForwardDiff.jl |  14 ++--
 src/dual.jl        | 205 ++++++++++++++++++++-------------------------
 2 files changed, 100 insertions(+), 119 deletions(-)

diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 3b65d67a..85f6f3bd 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -58,13 +58,13 @@ const CHUNK_THRESHOLD = 10
 
 include("partials.jl")
 include("dual.jl")
-include("config.jl")
-include("api_utils.jl")
-include("derivative.jl")
-include("gradient.jl")
-include("jacobian.jl")
-include("hessian.jl")
-include("deprecated.jl")
+# include("config.jl")
+# include("api_utils.jl")
+# include("derivative.jl")
+# include("gradient.jl")
+# include("jacobian.jl")
+# include("hessian.jl")
+# include("deprecated.jl")
 
 export DiffBase
 
diff --git a/src/dual.jl b/src/dual.jl
index 3818e203..68f71cee 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -27,19 +27,19 @@ end
 # Constructors #
 ################
 
-(::Type{Dual{T}}){T,N,V}(value::V, partials::Partials{N,V}) = Dual{T,V,N}(value, partials)
+@inline (::Type{Dual{T}}){T,N,V}(value::V, partials::Partials{N,V}) = Dual{T,V,N}(value, partials)
 
-function (::Type{Dual{T}}){T,N,A,B}(value::A, partials::Partials{N,B})
+@inline function (::Type{Dual{T}}){T,N,A,B}(value::A, partials::Partials{N,B})
     C = promote_type(A, B)
     return Dual{T}(convert(C, value), convert(Partials{N,C}, partials))
 end
 
-(::Type{Dual{T}}){T}(value::Real, partials::Tuple) = Dual{T}(value, Partials(partials))
-(::Type{Dual{T}}){T}(value::Real, partials::Tuple{}) = Dual{T}(value, Partials{0,typeof(value)}(partials))
-(::Type{Dual{T}}){T}(value::Real, partials::Real...) = Dual{T}(value, partials)
-(::Type{Dual{T}}){T,V<:Real,N,i}(value::V, ::Type{Val{N}}, ::Type{Val{i}}) = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
+@inline (::Type{Dual{T}}){T}(value::Real, partials::Tuple) = Dual{T}(value, Partials(partials))
+@inline (::Type{Dual{T}}){T}(value::Real, partials::Tuple{}) = Dual{T}(value, Partials{0,typeof(value)}(partials))
+@inline (::Type{Dual{T}}){T}(value::Real, partials::Real...) = Dual{T}(value, partials)
+@inline (::Type{Dual{T}}){T,V<:Real,N,i}(value::V, ::Type{Val{N}}, ::Type{Val{i}}) = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
 
-Dual(args...) = Dual{Void}(args...)
+@inline Dual(args...) = Dual{Void}(args...)
 
 ##############################
 # Utility/Accessor Functions #
@@ -64,6 +64,25 @@ Dual(args...) = Dual{Void}(args...)
 # Generic Functions #
 #####################
 
+macro define_binary_dual_op(f, both_body, left_body, right_body)
+    return esc(quote
+        @inline $(f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+        @inline $(f){T}(x::Dual{T}, y::Dual{T}) = $both_body
+
+        # define on all these types to avoid various ambiguities
+        for R in (:AbstractFloat, :Irrational, :Integer, :Rational, :Real)
+            @eval begin
+                @inline $(f){T}(x::Dual{T}, y::$(Expr(:$, :R))) = $left_body
+                @inline $(f){T}(x::$(Expr(:$, :R)), y::Dual{T}) = $right_body
+            end
+        end
+
+        @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}) = $both_body
+        @inline $(f){T,S,V,N}(x::Dual{S,Dual{T,V,N}}, y::Dual{T}) = $left_body
+        @inline $(f){T,S,V,N}(x::Dual{T}, y::Dual{S,Dual{T,V,N}}) = $right_body
+    end)
+end
+
 Base.copy(d::Dual) = d
 
 Base.eps(d::Dual) = eps(value(d))
@@ -115,16 +134,12 @@ isconstant(d::Dual) = iszero(partials(d))
 
 for pred in (:isequal, :(==), :isless, :(<=), :<)
     @eval begin
-        Base.$(pred)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-        Base.$(pred){T}(x::Dual{T}, y::Dual{T}) = $(pred)(value(x), value(y))
-        Base.$(pred){X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = $(pred)(x, value(y))
-        Base.$(pred){X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = $(pred)(value(x), y)
-    end
-    for R in (:AbstractFloat, :Irrational, :Real)
-        @eval begin
-            Base.$(pred)(x::Dual, y::$R) = $(pred)(value(x), y)
-            Base.$(pred)(x::$R, y::Dual) = $(pred)(x, value(y))
-        end
+        @define_binary_dual_op(
+            Base.$(pred),
+            $(pred)(value(x), value(y)),
+            $(pred)(x, value(y)),
+            $(pred)(value(x), y)
+        )
     end
 end
 
@@ -149,11 +164,9 @@ for R in (:BigFloat, :Bool, :Irrational, :Real)
     end
 end
 
-Base.convert(::Type{Dual}, d::Dual) = d
 Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, d::Dual{T}) = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
-Base.convert{D<:Dual}(::Type{D}, n::D) = d
+Base.convert{D<:Dual}(::Type{D}, d::D) = d
 Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, x::Real) = Dual{T}(V(x), zero(Partials{N,V}))
-Base.convert(::Type{Dual}, x::Real) = Dual(x)
 
 Base.promote_array_type{D<:Dual, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = promote_type(D, A)
 Base.promote_array_type{D<:Dual, A<:AbstractFloat, P}(F, ::Type{D}, ::Type{A}, ::Type{P}) = P
@@ -170,37 +183,34 @@ Base.AbstractFloat{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d
 # Addition/Subtraction #
 #----------------------#
 
-@inline Base.:+(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-@inline Base.:+{T}(x::Dual{T}, y::Dual{T}) = Dual{T}(value(x) + value(y), partials(x) + partials(y))
-@inline Base.:+{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) + y, partials(x))
-@inline Base.:+{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) + y, partials(x))
-@inline Base.:+{T}(x::Real, y::Dual{T}) = Dual{T}(x + value(y), partials(y))
-@inline Base.:+{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x + value(y), partials(y))
+@define_binary_dual_op(
+    Base.:+,
+    Dual{T}(value(x) + value(y), partials(x) + partials(y)),
+    Dual{T}(value(x) + y, partials(x)),
+    Dual{T}(x + value(y), partials(y))
+)
 
-@inline Base.:-(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-@inline Base.:-{T}(x::Dual{T}, y::Dual{T}) = Dual{T}(value(x) - value(y), partials(x) - partials(y))
-@inline Base.:-{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) - y, partials(x))
-@inline Base.:-{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) - y, partials(x))
-@inline Base.:-{T}(x::Real, y::Dual{T}) = Dual{T}(x - value(y), -partials(y))
-@inline Base.:-{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x - value(y), -partials(y))
+@define_binary_dual_op(
+    Base.:-,
+    Dual{T}(value(x) - value(y), partials(x) - partials(y)),
+    Dual{T}(value(x) - y, partials(x)),
+    Dual{T}(x - value(y), -partials(y))
+)
 
 @inline Base.:-(d::Dual) = Dual(-value(d), -partials(d))
 
 # Multiplication #
 #----------------#
 
-@inline Base.:*(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-
-@inline function Base.:*{T}(x::Dual{T}, y::Dual{T})
-    vx, vy = value(x), value(y)
-    return Dual{T}(vx * vy, _mul_partials(partials(x), partials(y), vy, vx))
-end
-
-@inline Base.:*{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) * y, partials(x) * y)
-@inline Base.:*{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) * y, partials(x) * y)
-
-@inline Base.:*{T}(x::Real, y::Dual{T}) = Dual{T}(x * value(y), x * partials(y))
-@inline Base.:*{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = Dual{Y}(x * value(y), x * partials(y))
+@define_binary_dual_op(
+    Base.:*,
+    begin
+        vx, vy = value(x), value(y)
+        Dual{T}(vx * vy, _mul_partials(partials(x), partials(y), vy, vx))
+    end,
+    Dual{T}(value(x) * y, partials(x) * y),
+    Dual{T}(x * value(y), x * partials(y))
+)
 
 @inline Base.:*(d::Dual, x::Bool) = x ? d : (signbit(value(d))==0 ? zero(d) : -zero(d))
 @inline Base.:*(x::Bool, d::Dual) = d * x
@@ -208,77 +218,48 @@ end
 # Division #
 #----------#
 
-@inline Base.:/(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-
-@inline function Base.:/{T}(x::Dual{T}, y::Dual{T})
-    vx, vy = value(x), value(y)
-    return Dual{T}(vx / vy, _div_partials(partials(x), partials(y), vx, vy))
-end
-
-@inline Base.:/{T}(x::Dual{T}, y::Real) = Dual{T}(value(x) / y, partials(x) / y)
-@inline Base.:/{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = Dual{X}(value(x) / y, partials(x) / y)
-
-@inline function Base.:/{T}(x::Real, y::Dual{T})
-    v = value(y)
-    divv = x / v
-    return Dual{T}(divv, -(divv / v) * partials(y))
-end
-
-@inline function Base.:/{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}})
-    v = value(y)
-    divv = x / v
-    return Dual{Y}(divv, -(divv / v) * partials(y))
-end
+@define_binary_dual_op(
+    Base.:/,
+    begin
+        vx, vy = value(x), value(y)
+        Dual{T}(vx / vy, _div_partials(partials(x), partials(y), vx, vy))
+    end,
+    Dual{T}(value(x) / y, partials(x) / y),
+    begin
+        v = value(y)
+        divv = x / v
+        Dual{T}(divv, -(divv / v) * partials(y))
+    end
+)
 
 # Exponentiation #
 #----------------#
 
 for f in (:(Base.:^), :(NaNMath.pow))
     @eval begin
-        @inline ($f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-
-        @inline function ($f){T}(x::Dual{T}, y::Dual{T})
-            vx, vy = value(x), value(y)
-            expv = ($f)(vx, vy)
-            powval = vy * ($f)(vx, vy - 1)
-            logval = isconstant(y) ? one(expv) : expv * log(vx)
-            new_partials = _mul_partials(partials(x), partials(y), powval, logval)
-            return Dual{T}(expv, new_partials)
-        end
-
-        @inline function ($f){X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y})
-            v = value(x)
-            expv = ($f)(v, y)
-            deriv = y * ($f)(v, y - 1)
-            return Dual{X}(expv, deriv * partials(x))
-        end
-
-        @inline function ($f){X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}})
-            v = value(y)
-            expv = ($f)(x, v)
-            deriv = expv*log(x)
-            return Dual{Y}(expv, deriv * partials(y))
-        end
-
-        @inline ($f)(::Base.Irrational{:e}, d::Dual) = exp(d)
-    end
-
-    for R in (:Integer, :Rational, :Real)
-        @eval begin
-            @inline function ($f){T}(x::Dual{T}, y::$R)
+        @define_binary_dual_op(
+            $f,
+            begin
+                vx, vy = value(x), value(y)
+                expv = ($f)(vx, vy)
+                powval = vy * ($f)(vx, vy - 1)
+                logval = isconstant(y) ? one(expv) : expv * log(vx)
+                new_partials = _mul_partials(partials(x), partials(y), powval, logval)
+                return Dual{T}(expv, new_partials)
+            end,
+            begin
                 v = value(x)
                 expv = ($f)(v, y)
                 deriv = y * ($f)(v, y - 1)
                 return Dual{T}(expv, deriv * partials(x))
-            end
-
-            @inline function ($f){T}(x::$R, y::Dual{T})
+            end,
+            begin
                 v = value(y)
                 expv = ($f)(x, v)
                 deriv = expv*log(x)
                 return Dual{T}(expv, deriv * partials(y))
             end
-        end
+        )
     end
 end
 
@@ -361,12 +342,12 @@ end
     return Dual{T}(h, (vx/h) * partials(x) + (vy/h) * partials(y))
 end
 
-@inline Base.hypot(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-@inline Base.hypot{T}(x::Dual{T}, y::Dual{T}) = calc_hypot(x, y, T)
-@inline Base.hypot{T}(x::Dual{T}, y::Real) = calc_hypot(x, y, T)
-@inline Base.hypot{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = calc_hypot(x, y, X)
-@inline Base.hypot{T}(x::Real, y::Dual{T}) = calc_hypot(x, y, T)
-@inline Base.hypot{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = calc_hypot(x, y, Y)
+@define_binary_dual_op(
+    Base.hypot,
+    calc_hypot(x, y, T),
+    calc_hypot(x, y, T),
+    calc_hypot(x, y, T)
+)
 
 @inline sincos(x) = (sin(x), cos(x))
 
@@ -383,12 +364,12 @@ end
     return Dual{T}(atan2v, deriv * partials(z))
 end
 
-@inline Base.atan2(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-@inline Base.atan2{T}(x::Dual{T}, y::Dual{T}) = calc_atan2(x, y, T)
-@inline Base.atan2{T}(x::Dual{T}, y::Real) = calc_atan2(x, y, T)
-@inline Base.atan2{X,Y,V,N}(x::Dual{X,Dual{Y,V,N}}, y::Dual{Y}) = calc_atan2(x, y, X)
-@inline Base.atan2{T}(x::Real, y::Dual{T}) = calc_atan2(x, y, T)
-@inline Base.atan2{X,Y,V,N}(x::Dual{X}, y::Dual{Y,Dual{X,V,N}}) = calc_atan2(x, y, Y)
+@define_binary_dual_op(
+    Base.atan2,
+    calc_atan2(x, y, T),
+    calc_atan2(x, y, T),
+    calc_atan2(x, y, T)
+)
 
 @generated function Base.fma{N}(x::Dual{N}, y::Dual{N}, z::Dual{N})
     ex = Expr(:tuple, [:(fma(value(x), partials(y)[$i], fma(value(y), partials(x)[$i], partials(z)[$i]))) for i in 1:N]...)

From f486b71e47ad94dbd6134d8017da135cd1210f82 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Feb 2017 15:51:18 -0500
Subject: [PATCH 06/26] fix predicate definition order

---
 src/dual.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dual.jl b/src/dual.jl
index 68f71cee..4c247b0d 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -137,8 +137,8 @@ for pred in (:isequal, :(==), :isless, :(<=), :<)
         @define_binary_dual_op(
             Base.$(pred),
             $(pred)(value(x), value(y)),
-            $(pred)(x, value(y)),
-            $(pred)(value(x), y)
+            $(pred)(value(x), y),
+            $(pred)(x, value(y))
         )
     end
 end

From 6265392459796ae9c8d810f2285c1cc94ae0acc9 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Mar 2017 13:31:30 -0400
Subject: [PATCH 07/26] fix type parameter switchup in Dual implementation

---
 src/dual.jl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/dual.jl b/src/dual.jl
index 4c247b0d..652ffa7d 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -2,7 +2,7 @@
 # Dual #
 ########
 
-immutable Dual{T,V<:Real,N} <: Real
+@compat immutable Dual{T,V<:Real,N} <: Real
     value::V
     partials::Partials{N,V}
 end
@@ -11,7 +11,7 @@ end
 # TagMismatchError #
 ####################
 
-immutable TagMismatchError{X,Y} <: Exception
+@compat immutable TagMismatchError{X,Y} <: Exception
     x::Dual{X}
     y::Dual{Y}
 end
@@ -55,6 +55,9 @@ end
 @inline partials(d::Dual, i, j) = partials(d, i).partials[j]
 @inline partials(d::Dual, i, j, k...) = partials(partials(d, i, j), k...)
 
+@inline npartials{T,V,N}(::Dual{T,V,N}) = N
+@inline npartials{T,V,N}(::Type{Dual{T,V,N}}) = N
+
 @inline valtype{V}(::V) = V
 @inline valtype{V}(::Type{V}) = V
 @inline valtype{T,V,N}(::Dual{T,V,N}) = V
@@ -78,8 +81,8 @@ macro define_binary_dual_op(f, both_body, left_body, right_body)
         end
 
         @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}) = $both_body
-        @inline $(f){T,S,V,N}(x::Dual{S,Dual{T,V,N}}, y::Dual{T}) = $left_body
-        @inline $(f){T,S,V,N}(x::Dual{T}, y::Dual{S,Dual{T,V,N}}) = $right_body
+        @inline $(f){T,S,V,N}(x::Dual{T,Dual{S,V,N}}, y::Dual{S}) = $left_body
+        @inline $(f){T,S,V,N}(x::Dual{S}, y::Dual{T,Dual{S,V,N}}) = $right_body
     end)
 end
 
@@ -108,7 +111,7 @@ Base.hash(d::Dual, hsh::UInt64) = hash(value(d), hsh)
 function Base.read{T,V,N}(io::IO, ::Type{Dual{T,V,N}})
     value = read(io, V)
     partials = read(io, Partials{N,V})
-    return Dual{T,N,V}(value, partials)
+    return Dual{T,V,N}(value, partials)
 end
 
 function Base.write(io::IO, d::Dual)

From 1502c5df8a8b223b5a8adfad694d97d4da840d9a Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Mar 2017 13:34:08 -0400
Subject: [PATCH 08/26] update configuration API with new tagging
 infrastructure and begin removing experimental multithreading functionality

If you have a use case where the input is large enough for multithreading to be useful,
then you should probably be using ReverseDiff instead of ForwardDiff. The exception to
this is jacobian calculation, but ForwardDiff.jacobian never supported multithreading
anyway. Once Julia's multithreading model matures, we can consider implementing
multithreading for ForwardDiff.jacobian, but we probably should never again support
it for gradients.
---
 src/ForwardDiff.jl             |  30 ++----
 src/config.jl                  | 183 ++++++++++++++++++---------------
 src/{api_utils.jl => utils.jl} |  23 +----
 3 files changed, 109 insertions(+), 127 deletions(-)
 rename src/{api_utils.jl => utils.jl} (71%)

diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 85f6f3bd..cc2320cd 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -9,6 +9,7 @@ using DiffBase: DiffResult
 import Calculus
 import NaNMath
 import SpecialFunctions
+import Base.Threads
 
 #############################
 # types/functions/constants #
@@ -19,19 +20,6 @@ import SpecialFunctions
 
 const NANSAFE_MODE_ENABLED = false
 
-# multithreading #
-#----------------#
-
-const IS_MULTITHREADED_JULIA = VERSION >= v"0.5.0-dev+923"
-
-if IS_MULTITHREADED_JULIA
-    const NTHREADS = Base.Threads.nthreads()
-    @inline compat_threadid() = Base.Threads.threadid()
-else
-    const NTHREADS = 1
-    @inline compat_threadid() = 1
-end
-
 # function generation #
 #---------------------#
 
@@ -50,7 +38,7 @@ const SPECIAL_FUNCS = (:erf, :erfc, :erfinv, :erfcinv, :erfi, :erfcx,
 # chunk settings #
 #----------------#
 
-const CHUNK_THRESHOLD = 10
+const DEFAULT_CHUNK_THRESHOLD = 10
 
 ############
 # includes #
@@ -58,13 +46,13 @@ const CHUNK_THRESHOLD = 10
 
 include("partials.jl")
 include("dual.jl")
-# include("config.jl")
-# include("api_utils.jl")
-# include("derivative.jl")
-# include("gradient.jl")
-# include("jacobian.jl")
-# include("hessian.jl")
-# include("deprecated.jl")
+include("config.jl")
+include("utils.jl")
+include("derivative.jl")
+include("gradient.jl")
+include("jacobian.jl")
+include("hessian.jl")
+include("deprecated.jl")
 
 export DiffBase
 
diff --git a/src/config.jl b/src/config.jl
index 9911e574..662576bb 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -1,122 +1,135 @@
-@compat abstract type AbstractConfig end
+#########
+# Chunk #
+#########
 
-@inline chunksize(::Tuple{}) = error("empty tuple passed to `chunksize`")
+@compat immutable Chunk{N} end
+
+function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
+    N = pickchunksize(input_length, threshold)
+    return Chunk{N}()
+end
+
+function Chunk(x::AbstractArray, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
+    return Chunk(length(x), threshold)
+end
+
+# Constrained to `N <= threshold`, minimize (in order of priority):
+#   1. the number of chunks that need to be computed
+#   2. the number of "left over" perturbations in the final chunk
+function pickchunksize(input_length, threshold = DEFAULT_CHUNK_THRESHOLD)
+    if input_length <= threshold
+        return input_length
+    else
+        nchunks = round(Int, input_length / DEFAULT_CHUNK_THRESHOLD, RoundUp)
+        return round(Int, input_length / nchunks, RoundUp)
+    end
+end
+
+#######
+# Tag #
+#######
+
+@compat immutable Tag{F,M} end
+
+Base.@pure order{V}(::Type{V}) = 0
+Base.@pure order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
+
+##################
+# AbstractConfig #
+##################
+
+@compat abstract type AbstractConfig{T<:Tag,N} end
+
+@compat immutable ConfigMismatchError{F,G,M} <: Exception
+    f::F
+    cfg::AbstractConfig{Tag{G,M}}
+end
+
+function Base.showerror{F,G}(io::IO, e::ConfigMismatchError{F,G})
+    print(io, "The provided configuration (of type $(typeof(e.cfg))) was constructed for a",
+              " function ($G), not the current target function ($F). ForwardDiff cannot safely",
+              " perform differentiation in this context; see the following issue for details:",
+              " https://github.com/JuliaDiff/ForwardDiff.jl/issues/83. You can resolve this",
+              " problem by constructing and using a configuration with the appropriate target",
+              " function, e.g. `ForwardDiff.GradientConfig($f, x)`")
+end
+
+Base.copy(cfg::AbstractConfig) = deepcopy(cfg)
+
+@inline chunksize(::AbstractConfig{T,N}) = N
 
 ##################
 # GradientConfig #
 ##################
 
-@compat immutable GradientConfig{N,V,D} <: AbstractConfig
+@compat immutable GradientConfig{T,V,N,D} <: AbstractConfig{T,N}
     seeds::NTuple{N,Partials{N,V}}
     duals::D
-    # disable default outer constructor
-    (::Type{GradientConfig{N,V,D}}){N,V,D}(seeds, duals) = new{N,V,D}(seeds, duals)
 end
 
-GradientConfig(x::AbstractArray) = GradientConfig{pickchunksize(length(x))}(x)
-
-function (::Type{GradientConfig{N}}){N,V}(x::AbstractArray{V})
+function GradientConfig{V,N,F,T}(::F,
+                                 x::AbstractArray{V},
+                                 ::Chunk{N} = Chunk(x),
+                                 ::T = Tag{F,order(V)}())
     seeds = construct_seeds(Partials{N,V})
-    duals = similar(x, Dual{N,V})
-    return GradientConfig{N,V,typeof(duals)}(seeds, duals)
+    duals = similar(x, Dual{T,V,N})
+    return GradientConfig{T,V,N,typeof(duals)}(seeds, duals)
 end
 
-Base.copy{N,V,D}(cfg::GradientConfig{N,V,D}) = GradientConfig{N,V,D}(cfg.seeds, copy(cfg.duals))
-Base.copy{N,V,D<:Tuple}(cfg::GradientConfig{N,V,D}) = GradientConfig{N,V,D}(cfg.seeds, map(copy, cfg.duals))
-
-@inline chunksize{N}(::GradientConfig{N}) = N
-@inline chunksize{N}(::Tuple{Vararg{GradientConfig{N}}}) = N
-
 ##################
 # JacobianConfig #
 ##################
 
-@compat immutable JacobianConfig{N,V,D} <: AbstractConfig
+@compat immutable JacobianConfig{T,V,N,D} <: AbstractConfig{T,N}
     seeds::NTuple{N,Partials{N,V}}
     duals::D
-    # disable default outer constructor
-    (::Type{JacobianConfig{N,V,D}}){N,V,D}(seeds, duals) = new{N,V,D}(seeds, duals)
 end
 
-JacobianConfig(x::AbstractArray) = JacobianConfig{pickchunksize(length(x))}(x)
-
-function (::Type{JacobianConfig{N}}){N,V}(x::AbstractArray{V})
+function JacobianConfig{V,N,F,T}(::F,
+                                 x::AbstractArray{V},
+                                 ::Chunk{N} = Chunk(x),
+                                 ::T = Tag{F,order(V)}())
     seeds = construct_seeds(Partials{N,V})
-    duals = similar(x, Dual{N,V})
-    return JacobianConfig{N,V,typeof(duals)}(seeds, duals)
+    duals = similar(x, Dual{T,V,N})
+    return JacobianConfig{T,V,N,typeof(duals)}(seeds, duals)
 end
 
-JacobianConfig(y::AbstractArray, x::AbstractArray) = JacobianConfig{pickchunksize(length(x))}(y, x)
-
-function (::Type{JacobianConfig{N}}){N,Y,X}(y::AbstractArray{Y}, x::AbstractArray{X})
+function JacobianConfig{Y,X,N,F,T}(::F,
+                                   y::AbstractArray{Y},
+                                   x::AbstractArray{X},
+                                   ::Chunk{N} = Chunk(x),
+                                   ::T = Tag{F,order(X)}())
     seeds = construct_seeds(Partials{N,X})
-    yduals = similar(y, Dual{N,Y})
-    xduals = similar(x, Dual{N,X})
+    yduals = similar(y, Dual{T,Y,N})
+    xduals = similar(x, Dual{T,X,N})
     duals = (yduals, xduals)
-    return JacobianConfig{N,X,typeof(duals)}(seeds, duals)
+    return JacobianConfig{T,X,N,typeof(duals)}(seeds, duals)
 end
 
-Base.copy{N,T,D}(cfg::JacobianConfig{N,T,D}) = JacobianConfig{N,T,D}(cfg.seeds, copy(cfg.duals))
-Base.copy{N,T,D<:Tuple}(cfg::JacobianConfig{N,T,D}) = JacobianConfig{N,T,D}(cfg.seeds, map(copy, cfg.duals))
-
-@inline chunksize{N}(::JacobianConfig{N}) = N
-@inline chunksize{N}(::Tuple{Vararg{JacobianConfig{N}}}) = N
-
 #################
 # HessianConfig #
 #################
 
-immutable HessianConfig{N,J,JD,G,GD} <: AbstractConfig
-    gradient_config::GradientConfig{N,G,GD}
-    jacobian_config::JacobianConfig{N,J,JD}
+@compat immutable HessianConfig{T,V,N,D,TJ,DJ} <: AbstractConfig{T,N}
+    jacobian_config::JacobianConfig{TJ,V,N,DJ}
+    gradient_config::GradientConfig{T,Dual{T,V,N},D}
 end
 
-HessianConfig(x::AbstractArray) = HessianConfig{pickchunksize(length(x))}(x)
-HessianConfig(out, x::AbstractArray) = HessianConfig{pickchunksize(length(x))}(out, x)
-
-function (::Type{HessianConfig{N}}){N}(x::AbstractArray)
-    jacobian_config = JacobianConfig{N}(x)
-    gradient_config = GradientConfig{N}(jacobian_config.duals)
-    return HessianConfig(gradient_config, jacobian_config)
+function HessianConfig{F,V}(f::F,
+                            x::AbstractArray{V},
+                            chunk::Chunk = Chunk(x),
+                            tag::Tag = Tag{F,order(V)}())
+    jacobian_config = JacobianConfig(f, x, chunk, tag)
+    gradient_config = GradientConfig(f, jacobian_config.duals, chunk)
+    return HessianConfig(jacobian_config, gradient_config)
 end
 
-function (::Type{HessianConfig{N}}){N}(out::DiffResult, x::AbstractArray)
-    jacobian_config = JacobianConfig{N}(DiffBase.gradient(out), x)
-    yduals, xduals = jacobian_config.duals
-    gradient_config = GradientConfig{N}(xduals)
-    return HessianConfig(gradient_config, jacobian_config)
+function HessianConfig{F,V}(result::DiffResult,
+                            f::F,
+                            x::AbstractArray{V},
+                            chunk::Chunk = Chunk(x),
+                            tag::Tag = Tag{F,order(V)}())
+    jacobian_config = JacobianConfig(f, DiffBase.gradient(result), x, chunk, tag)
+    gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk)
+    return HessianConfig(jacobian_config, gradient_config)
 end
-
-Base.copy(cfg::HessianConfig) = HessianConfig(copy(cfg.gradient_config),
-                                              copy(cfg.jacobian_config))
-
-@inline chunksize{N}(::HessianConfig{N}) = N
-@inline chunksize{N}(::Tuple{Vararg{HessianConfig{N}}}) = N
-
-gradient_config(cfg::HessianConfig) = cfg.gradient_config
-jacobian_config(cfg::HessianConfig) = cfg.jacobian_config
-
-#####################
-# MultithreadConfig #
-#####################
-
-immutable MultithreadConfig{A,B} <: AbstractConfig
-    config1::A
-    config2::B
-end
-
-@eval function MultithreadConfig(cfg::Union{GradientConfig,JacobianConfig})
-    config1 = ntuple(n -> copy(cfg), Val{$NTHREADS})
-    return MultithreadConfig(config1, nothing)
-end
-
-function MultithreadConfig(cfg::HessianConfig)
-    config1 = MultithreadConfig(gradient_config(cfg))
-    config2 = copy(jacobian_config(cfg))
-    return MultithreadConfig(config1, config2)
-end
-
-gradient_config(cfg::MultithreadConfig) = cfg.config1
-jacobian_config(cfg::MultithreadConfig) = cfg.config2
-
-@inline chunksize(cfg::MultithreadConfig) = chunksize(gradient_config(cfg))
diff --git a/src/api_utils.jl b/src/utils.jl
similarity index 71%
rename from src/api_utils.jl
rename to src/utils.jl
index 5badd076..147890bd 100644
--- a/src/api_utils.jl
+++ b/src/utils.jl
@@ -1,19 +1,3 @@
-##########################
-# picking the chunk size #
-##########################
-
-# Constrained to chunk <= CHUNK_THRESHOLD, minimize (in order of priority):
-#   1. the number of chunks that need to be computed
-#   2. the number of "left over" perturbations in the final chunk
-function pickchunksize(k)
-    if k <= CHUNK_THRESHOLD
-        return k
-    else
-        nchunks = round(Int, k / CHUNK_THRESHOLD, RoundUp)
-        return round(Int, k / nchunks, RoundUp)
-    end
-end
-
 ####################
 # value extraction #
 ####################
@@ -33,16 +17,13 @@ end
 # vector mode function evaluation #
 ###################################
 
-vector_mode_dual_eval{F}(f::F, x, cfg::MultithreadConfig) = vector_mode_dual_eval(f, x, gradient_config(cfg))
-vector_mode_dual_eval{F}(f::F, x, cfg::Tuple) = vector_mode_dual_eval(f, x, first(cfg))
-
-function vector_mode_dual_eval{F}(f::F, x, cfg)
+function vector_mode_dual_eval{F}(f::F, x, cfg::Union{JacobianConfig,GradientConfig})
     xdual = cfg.duals
     seed!(xdual, x, cfg.seeds)
     return f(xdual)
 end
 
-function vector_mode_dual_eval{F}(f!::F, y, x, cfg)
+function vector_mode_dual_eval{F}(f!::F, y, x, cfg::JacobianConfig)
     ydual, xdual = cfg.duals
     seed!(xdual, x, cfg.seeds)
     seed!(ydual, y)

From 188052c3775c3065e82fa70e99cb8a620c2feeb6 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 24 Mar 2017 16:20:14 -0400
Subject: [PATCH 09/26] get all tests passing except for deprecation layer +
 SIMD tests

---
 src/ForwardDiff.jl     |   2 +-
 src/config.jl          |  28 ++++----
 src/derivative.jl      |  36 +++++++---
 src/dual.jl            |  26 ++++----
 src/gradient.jl        |  86 +++---------------------
 src/hessian.jl         |  24 ++++---
 src/jacobian.jl        |  31 +++++----
 src/utils.jl           |  28 ++++----
 test/DeprecatedTest.jl |  74 +++++++++------------
 test/DualTest.jl       | 147 ++++++++++++++++++++---------------------
 test/GradientTest.jl   |  52 ++-------------
 test/HessianTest.jl    |  59 +++--------------
 test/JacobianTest.jl   |  23 +++----
 test/runtests.jl       |  32 ++++-----
 test/utils.jl          |   8 +--
 15 files changed, 264 insertions(+), 392 deletions(-)

diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index cc2320cd..7e4450e2 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -52,7 +52,7 @@ include("derivative.jl")
 include("gradient.jl")
 include("jacobian.jl")
 include("hessian.jl")
-include("deprecated.jl")
+# include("deprecated.jl")
 
 export DiffBase
 
diff --git a/src/config.jl b/src/config.jl
index 662576bb..9a17a9c4 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -31,8 +31,8 @@ end
 
 @compat immutable Tag{F,M} end
 
-Base.@pure order{V}(::Type{V}) = 0
-Base.@pure order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
+@inline order{V}(::Type{V}) = 0
+@inline order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
 
 ##################
 # AbstractConfig #
@@ -47,16 +47,16 @@ end
 
 function Base.showerror{F,G}(io::IO, e::ConfigMismatchError{F,G})
     print(io, "The provided configuration (of type $(typeof(e.cfg))) was constructed for a",
-              " function ($G), not the current target function ($F). ForwardDiff cannot safely",
+              " function other than the current target function. ForwardDiff cannot safely",
               " perform differentiation in this context; see the following issue for details:",
               " https://github.com/JuliaDiff/ForwardDiff.jl/issues/83. You can resolve this",
               " problem by constructing and using a configuration with the appropriate target",
-              " function, e.g. `ForwardDiff.GradientConfig($f, x)`")
+              " function, e.g. `ForwardDiff.GradientConfig($(e.f), x)`")
 end
 
 Base.copy(cfg::AbstractConfig) = deepcopy(cfg)
 
-@inline chunksize(::AbstractConfig{T,N}) = N
+@inline chunksize{T,N}(::AbstractConfig{T,N}) = N
 
 ##################
 # GradientConfig #
@@ -110,17 +110,17 @@ end
 # HessianConfig #
 #################
 
-@compat immutable HessianConfig{T,V,N,D,TJ,DJ} <: AbstractConfig{T,N}
-    jacobian_config::JacobianConfig{TJ,V,N,DJ}
-    gradient_config::GradientConfig{T,Dual{T,V,N},D}
+@compat immutable HessianConfig{T,V,N,D,MJ,DJ} <: AbstractConfig{T,N}
+    jacobian_config::JacobianConfig{Tag{Void,MJ},V,N,DJ}
+    gradient_config::GradientConfig{T,Dual{Tag{Void,MJ},V,N},D}
 end
 
 function HessianConfig{F,V}(f::F,
                             x::AbstractArray{V},
                             chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag{F,order(V)}())
-    jacobian_config = JacobianConfig(f, x, chunk, tag)
-    gradient_config = GradientConfig(f, jacobian_config.duals, chunk)
+                            tag::Tag = Tag{F,order(Dual{Void,V,0})}())
+    jacobian_config = JacobianConfig(nothing, x, chunk)
+    gradient_config = GradientConfig(f, jacobian_config.duals, chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
 end
 
@@ -128,8 +128,8 @@ function HessianConfig{F,V}(result::DiffResult,
                             f::F,
                             x::AbstractArray{V},
                             chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag{F,order(V)}())
-    jacobian_config = JacobianConfig(f, DiffBase.gradient(result), x, chunk, tag)
-    gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk)
+                            tag::Tag = Tag{F,order(Dual{Void,V,0})}())
+    jacobian_config = JacobianConfig(nothing, DiffBase.gradient(result), x, chunk)
+    gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
 end
diff --git a/src/derivative.jl b/src/derivative.jl
index ba74770b..1cb83797 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -2,26 +2,44 @@
 # API methods #
 ###############
 
-derivative{F}(f::F, x::Real) = extract_derivative(f(Dual(x, one(x))))
+@generated function derivative{F,R<:Real}(f::F, x::R)
+    T = Tag{F,order(R)}
+    return quote
+        $(Expr(:meta, :inline))
+        return extract_derivative(f(Dual{$T}(x, one(x))))
+    end
+end
 
 @generated function derivative{F,N}(f::F, x::NTuple{N,Real})
-    args = [:(Dual(x[$i], Val{N}, Val{$i})) for i in 1:N]
-    return :(extract_derivative(f($(args...))))
+    T = Tag{F,maximum(order(R) for R in x.parameters)}
+    args = [:(Dual{$T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
+    return quote
+        $(Expr(:meta, :inline))
+        extract_derivative(f($(args...)))
+    end
 end
 
-function derivative!{F}(out, f::F, x::Real)
-    y = f(Dual(x, one(x)))
-    extract_derivative!(out, y)
-    return out
+@generated function derivative!{F,R<:Real}(out, f::F, x::R)
+    T = Tag{F,order(R)}
+    return quote
+        $(Expr(:meta, :inline))
+        extract_derivative!(out, f(Dual{$T}(x, one(x))))
+        return out
+    end
 end
 
 #####################
 # result extraction #
 #####################
 
-@generated extract_derivative{N}(y::Dual{N}) = Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...)
+@generated function extract_derivative{T,V,N}(y::Dual{T,V,N})
+    return quote
+        $(Expr(:meta, :inline))
+        $(Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...))
+    end
+end
 
-@inline extract_derivative(y::Dual{1}) = partials(y, 1)
+@inline extract_derivative{T,V}(y::Dual{T,V,1}) = partials(y, 1)
 @inline extract_derivative(y::Real) = zero(y)
 @inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
 
diff --git a/src/dual.jl b/src/dual.jl
index 652ffa7d..119f7d29 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -18,9 +18,9 @@ end
 
 function Base.showerror{X,Y}(io::IO, e::TagMismatchError{X,Y})
     print(io, "potential perturbation confusion detected when computing binary operation ",
-              "on $(e.x) and $(e.y) (tag $X != tag $Y). ForwardDiff cannot safely perform ",
-              "differentiation in this context; see the following issue for details: ",
-              "https://github.com/JuliaDiff/ForwardDiff.jl/issues/83")
+              "on $(e.x) and $(e.y) (tag mismatch: $X != $Y). ForwardDiff cannot safely ",
+              "perform differentiation in this context; see the following issue for ",
+              "details: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83")
 end
 
 ################
@@ -200,7 +200,7 @@ Base.AbstractFloat{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d
     Dual{T}(x - value(y), -partials(y))
 )
 
-@inline Base.:-(d::Dual) = Dual(-value(d), -partials(d))
+@inline Base.:-{T}(d::Dual{T}) = Dual{T}(-value(d), -partials(d))
 
 # Multiplication #
 #----------------#
@@ -338,6 +338,8 @@ end
 # Other Functions #
 #-----------------#
 
+# hypot
+
 @inline function calc_hypot{T}(x, y, ::Type{T})
     vx = value(x)
     vy = value(y)
@@ -352,12 +354,7 @@ end
     calc_hypot(x, y, T)
 )
 
-@inline sincos(x) = (sin(x), cos(x))
-
-@inline function sincos{T}(d::Dual{T})
-    sd, cd = sincos(value(d))
-    return (Dual{T}(sd, cd * partials(d)), Dual{T}(cd, -sd * partials(d)))
-end
+# atan2
 
 @inline function calc_atan2{T}(y, x, ::Type{T})
     z = y / x
@@ -407,8 +404,13 @@ end
 
 @inline Base.fma(x::Real, y::Dual, z::Real) = fma(y, x, z)
 
-@inline function Base.fma(x::Real, y::Real, z::Dual)
-    Dual(fma(x, y, value(z)), partials(z))
+# sincos
+
+@inline sincos(x) = (sin(x), cos(x))
+
+@inline function sincos{T}(d::Dual{T})
+    sd, cd = sincos(value(d))
+    return (Dual{T}(sd, cd * partials(d)), Dual{T}(cd, -sd * partials(d)))
 end
 
 ###################
diff --git a/src/gradient.jl b/src/gradient.jl
index fd0f9e29..f9cfc325 100644
--- a/src/gradient.jl
+++ b/src/gradient.jl
@@ -2,7 +2,12 @@
 # API methods #
 ###############
 
-function gradient{F}(f::F, x, cfg::AbstractConfig = GradientConfig(x))
+@compat const AllowedGradientConfig{F,M} = Union{GradientConfig{Tag{F,M}}, GradientConfig{Tag{Void,M}}}
+
+gradient(f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
+gradient!(out, f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
+
+function gradient{F,M}(f::F, x, cfg::AllowedGradientConfig{F,M} = GradientConfig(f, x))
     if chunksize(cfg) == length(x)
         return vector_mode_gradient(f, x, cfg)
     else
@@ -10,7 +15,7 @@ function gradient{F}(f::F, x, cfg::AbstractConfig = GradientConfig(x))
     end
 end
 
-function gradient!{F}(out, f::F, x, cfg::AbstractConfig = GradientConfig(x))
+function gradient!{F,M}(out, f::F, x, cfg::AllowedGradientConfig{F,M} = GradientConfig(f, x))
     if chunksize(cfg) == length(x)
         vector_mode_gradient!(out, f, x, cfg)
     else
@@ -72,9 +77,6 @@ end
 # chunk mode #
 ##############
 
-# single threaded #
-#-----------------#
-
 function chunk_mode_gradient_expr(out_definition::Expr)
     return quote
         @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))"
@@ -119,80 +121,10 @@ function chunk_mode_gradient_expr(out_definition::Expr)
     end
 end
 
-@eval function chunk_mode_gradient{F,N}(f::F, x, cfg::GradientConfig{N})
+@eval function chunk_mode_gradient{F,T,V,N}(f::F, x, cfg::GradientConfig{T,V,N})
     $(chunk_mode_gradient_expr(:(out = similar(x, valtype(ydual)))))
 end
 
-@eval function chunk_mode_gradient!{F,N}(out, f::F, x, cfg::GradientConfig{N})
+@eval function chunk_mode_gradient!{F,T,V,N}(out, f::F, x, cfg::GradientConfig{T,V,N})
     $(chunk_mode_gradient_expr(:()))
 end
-
-# multithreaded #
-#---------------#
-
-if IS_MULTITHREADED_JULIA
-    function multithread_chunk_mode_expr(out_definition::Expr)
-        return quote
-            cfg = gradient_config(multi_cfg)
-            N = chunksize(cfg)
-            @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))"
-
-            # precalculate loop bounds
-            xlen = length(x)
-            remainder = xlen % N
-            lastchunksize = ifelse(remainder == 0, N, remainder)
-            lastchunkindex = xlen - lastchunksize + 1
-            middlechunks = 2:div(xlen - lastchunksize, N)
-
-            # fetch and seed work vectors
-            current_cfg = cfg[compat_threadid()]
-            current_xdual = current_cfg.duals
-            current_seeds = current_cfg.seeds
-
-            Base.Threads.@threads for t in 1:length(cfg)
-                seed!(cfg[t].duals, x)
-            end
-
-            # do first chunk manually to calculate output type
-            seed!(current_xdual, x, 1, current_seeds)
-            current_ydual = f(current_xdual)
-            $(out_definition)
-            extract_gradient_chunk!(out, current_ydual, 1, N)
-            seed!(current_xdual, x, 1)
-
-            # do middle chunks
-            Base.Threads.@threads for c in middlechunks
-                # see https://github.com/JuliaLang/julia/issues/14948
-                local chunk_cfg = cfg[compat_threadid()]
-                local chunk_xdual = chunk_cfg.duals
-                local chunk_seeds = chunk_cfg.seeds
-                local chunk_index = ((c - 1) * N + 1)
-                seed!(chunk_xdual, x, chunk_index, chunk_seeds)
-                local chunk_dual = f(chunk_xdual)
-                extract_gradient_chunk!(out, chunk_dual, chunk_index, N)
-                seed!(chunk_xdual, x, chunk_index)
-            end
-
-            # do final chunk
-            seed!(current_xdual, x, lastchunkindex, current_seeds, lastchunksize)
-            current_ydual = f(current_xdual)
-            extract_gradient_chunk!(out, current_ydual, lastchunkindex, lastchunksize)
-
-            # load value, this is a no-op unless `out` is a DiffResult
-            extract_value!(out, current_ydual)
-
-            return out
-        end
-    end
-
-    @eval function chunk_mode_gradient{F}(f::F, x, multi_cfg::MultithreadConfig)
-        $(multithread_chunk_mode_expr(:(out = similar(x, valtype(current_ydual)))))
-    end
-
-    @eval function chunk_mode_gradient!{F}(out, f::F, x, multi_cfg::MultithreadConfig)
-        $(multithread_chunk_mode_expr(:()))
-    end
-else
-    chunk_mode_gradient(f, x, cfg::Tuple) = error("Multithreading is not enabled for this Julia installation.")
-    chunk_mode_gradient!(out, f, x, cfg::Tuple) = chunk_mode_gradient!(f, x, cfg)
-end
diff --git a/src/hessian.jl b/src/hessian.jl
index ae59b092..3b3b6410 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -2,24 +2,30 @@
 # API methods #
 ###############
 
-function hessian{F}(f::F, x, cfg::AbstractConfig = HessianConfig(x))
-    ∇f = y -> gradient(f, y, gradient_config(cfg))
-    return jacobian(∇f, x, jacobian_config(cfg))
+@compat const AllowedHessianConfig{F,M} = Union{HessianConfig{Tag{F,M}}, HessianConfig{Tag{Void,M}}}
+
+hessian(f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+hessian!(out, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+hessian!(out::DiffResult, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+
+function hessian{F,M}(f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(f, x))
+    ∇f = y -> gradient(f, y, cfg.gradient_config)
+    return jacobian(∇f, x, cfg.jacobian_config)
 end
 
-function hessian!{F}(out, f::F, x, cfg::AbstractConfig = HessianConfig(x))
-    ∇f = y -> gradient(f, y, gradient_config(cfg))
-    jacobian!(out, ∇f, x, jacobian_config(cfg))
+function hessian!{F,M}(out, f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(f, x))
+    ∇f = y -> gradient(f, y, cfg.gradient_config)
+    jacobian!(out, ∇f, x, cfg.jacobian_config)
     return out
 end
 
-function hessian!{F}(out::DiffResult, f::F, x, cfg::AbstractConfig = HessianConfig(out, x))
+function hessian!{F,M}(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(out, f, x))
     ∇f! = (y, z) -> begin
         result = DiffResult(zero(eltype(y)), y)
-        gradient!(result, f, z, gradient_config(cfg))
+        gradient!(result, f, z, cfg.gradient_config)
         DiffBase.value!(out, value(DiffBase.value(result)))
         return y
     end
-    jacobian!(DiffBase.hessian(out), ∇f!, DiffBase.gradient(out), x, jacobian_config(cfg))
+    jacobian!(DiffBase.hessian(out), ∇f!, DiffBase.gradient(out), x, cfg.jacobian_config)
     return out
 end
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 83103045..2df9bdc2 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -2,7 +2,14 @@
 # API methods #
 ###############
 
-function jacobian{F}(f::F, x, cfg::JacobianConfig = JacobianConfig(x))
+@compat const AllowedJacobianConfig{F,M} = Union{JacobianConfig{Tag{F,M}}, JacobianConfig{Tag{Void,M}}}
+
+jacobian(f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
+jacobian(f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
+jacobian!(out, f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
+jacobian!(out, f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
+
+function jacobian{F,M}(f::F, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f, x))
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f, x, cfg)
     else
@@ -10,7 +17,7 @@ function jacobian{F}(f::F, x, cfg::JacobianConfig = JacobianConfig(x))
     end
 end
 
-function jacobian{F}(f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
+function jacobian{F,M}(f!::F, y, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f!, y, x))
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f!, y, x, cfg)
     else
@@ -18,7 +25,7 @@ function jacobian{F}(f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
     end
 end
 
-function jacobian!{F}(out, f::F, x, cfg::JacobianConfig = JacobianConfig(x))
+function jacobian!{F,M}(out, f::F, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f, x))
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f, x, cfg)
     else
@@ -27,7 +34,7 @@ function jacobian!{F}(out, f::F, x, cfg::JacobianConfig = JacobianConfig(x))
     return out
 end
 
-function jacobian!{F}(out, f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
+function jacobian!{F,M}(out, f!::F, y, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f!, y, x))
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f!, y, x, cfg)
     else
@@ -71,7 +78,7 @@ reshape_jacobian(out::DiffResult, ydual, xdual) = reshape_jacobian(DiffBase.jaco
 # vector mode #
 ###############
 
-function vector_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian{F,T,V,N}(f::F, x, cfg::JacobianConfig{T,V,N})
     ydual = vector_mode_dual_eval(f, x, cfg)
     out = similar(ydual, valtype(eltype(ydual)), length(ydual), N)
     extract_jacobian!(out, ydual, N)
@@ -79,7 +86,7 @@ function vector_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
     return out
 end
 
-function vector_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian{F,T,V,N}(f!::F, y, x, cfg::JacobianConfig{T,V,N})
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     out = similar(y, length(y), N)
@@ -88,14 +95,14 @@ function vector_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
     return out
 end
 
-function vector_mode_jacobian!{F,N}(out, f::F, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian!{F,T,V,N}(out, f::F, x, cfg::JacobianConfig{T,V,N})
     ydual = vector_mode_dual_eval(f, x, cfg)
     extract_jacobian!(out, ydual, N)
     extract_value!(out, ydual)
     return out
 end
 
-function vector_mode_jacobian!{F,N}(out, f!::F, y, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian!{F,T,V,N}(out, f!::F, y, x, cfg::JacobianConfig{T,V,N})
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     extract_jacobian!(out, ydual, N)
@@ -150,7 +157,7 @@ function jacobian_chunk_mode_expr(work_array_definition::Expr, compute_ydual::Ex
     end
 end
 
-@eval function chunk_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian{F,T,V,N}(f::F, x, cfg::JacobianConfig{T,V,N})
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -160,7 +167,7 @@ end
                                :()))
 end
 
-@eval function chunk_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian{F,T,V,N}(f!::F, y, x, cfg::JacobianConfig{T,V,N})
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
@@ -170,7 +177,7 @@ end
                                :(map!(value, y, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,N}(out, f::F, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian!{F,T,V,N}(out, f::F, x, cfg::JacobianConfig{T,V,N})
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -180,7 +187,7 @@ end
                                :(extract_value!(out, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,N}(out, f!::F, y, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian!{F,T,V,N}(out, f!::F, y, x, cfg::JacobianConfig{T,V,N})
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
diff --git a/src/utils.jl b/src/utils.jl
index 147890bd..0b9b33bb 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -35,42 +35,42 @@ end
 # seed construction/manipulation #
 ##################################
 
-@generated function construct_seeds{N,T}(::Type{Partials{N,T}})
-    return Expr(:tuple, [:(single_seed(Partials{N,T}, Val{$i})) for i in 1:N]...)
+@generated function construct_seeds{N,V}(::Type{Partials{N,V}})
+    return Expr(:tuple, [:(single_seed(Partials{N,V}, Val{$i})) for i in 1:N]...)
 end
 
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x,
-                    seed::Partials{N,T} = zero(Partials{N,T}))
+function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x,
+                      seed::Partials{N,V} = zero(Partials{N,V}))
     for i in eachindex(duals)
-        duals[i] = Dual{N,T}(x[i], seed)
+        duals[i] = Dual{T,V,N}(x[i], seed)
     end
     return duals
 end
 
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x,
-                    seeds::NTuple{N,Partials{N,T}})
+function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x,
+                      seeds::NTuple{N,Partials{N,V}})
     for i in 1:N
-        duals[i] = Dual{N,T}(x[i], seeds[i])
+        duals[i] = Dual{T,V,N}(x[i], seeds[i])
     end
     return duals
 end
 
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x, index,
-                    seed::Partials{N,T} = zero(Partials{N,T}))
+function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x, index,
+                      seed::Partials{N,V} = zero(Partials{N,V}))
     offset = index - 1
     for i in 1:N
         j = i + offset
-        duals[j] = Dual{N,T}(x[j], seed)
+        duals[j] = Dual{T,V,N}(x[j], seed)
     end
     return duals
 end
 
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x, index,
-                    seeds::NTuple{N,Partials{N,T}}, chunksize = N)
+function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x, index,
+                      seeds::NTuple{N,Partials{N,V}}, chunksize = N)
     offset = index - 1
     for i in 1:chunksize
         j = i + offset
-        duals[j] = Dual{N,T}(x[j], seeds[i])
+        duals[j] = Dual{T,V,N}(x[j], seeds[i])
     end
     return duals
 end
diff --git a/test/DeprecatedTest.jl b/test/DeprecatedTest.jl
index 9cadc9e0..38b3581f 100644
--- a/test/DeprecatedTest.jl
+++ b/test/DeprecatedTest.jl
@@ -47,18 +47,16 @@ ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = false)
 @test DiffBase.value(out) == v
 @test DiffBase.gradient(out) == g
 
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = true) == g
+@test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = true) == g
 
-    out = similar(x)
-    ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == g
+out = similar(x)
+ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
+@test out == g
 
-    out = DiffBase.GradientResult(x)
-    ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == v
-    @test DiffBase.gradient(out) == g
-end
+out = DiffBase.GradientResult(x)
+ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
+@test DiffBase.value(out) == v
+@test DiffBase.gradient(out) == g
 
 ######################
 # jacobian/jacobian! #
@@ -83,18 +81,16 @@ ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = false)
 @test DiffBase.value(out) == y
 @test DiffBase.jacobian(out) == j
 
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = true) == j
+@test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = true) == j
 
-    out = similar(x, length(y), length(x))
-    ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == j
+out = similar(x, length(y), length(x))
+ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
+@test out == j
 
-    out = DiffBase.JacobianResult(x)
-    ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == y
-    @test DiffBase.jacobian(out) == j
-end
+out = DiffBase.JacobianResult(x)
+ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
+@test DiffBase.value(out) == y
+@test DiffBase.jacobian(out) == j
 
 # f!(y, x) #
 #----------#
@@ -115,18 +111,16 @@ ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = false)
 @test DiffBase.value(out) == y
 @test DiffBase.jacobian(out) == j
 
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = true) == j
+@test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = true) == j
 
-    out = similar(x, length(y), length(x))
-    ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-    @test out == j
+out = similar(x, length(y), length(x))
+ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
+@test out == j
 
-    out = DiffBase.JacobianResult(y, x)
-    ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == y
-    @test DiffBase.jacobian(out) == j
-end
+out = DiffBase.JacobianResult(y, x)
+ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
+@test DiffBase.value(out) == y
+@test DiffBase.jacobian(out) == j
 
 ####################
 # hessian/hessian! #
@@ -150,19 +144,17 @@ ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = false)
 @test DiffBase.gradient(out) == g
 @test DiffBase.hessian(out) == h
 
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = true) == h
+@test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = true) == h
 
-    out = similar(x, length(x), length(x))
-    ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == h
+out = similar(x, length(x), length(x))
+ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
+@test out == h
 
-    out = DiffBase.HessianResult(x)
-    ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == v
-    @test DiffBase.gradient(out) == g
-    @test DiffBase.hessian(out) == h
-end
+out = DiffBase.HessianResult(x)
+ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
+@test DiffBase.value(out) == v
+@test DiffBase.gradient(out) == g
+@test DiffBase.hessian(out) == h
 
 info("Deprecation testing is now complete, so any further deprecation warnings are real.")
 
diff --git a/test/DualTest.jl b/test/DualTest.jl
index 53dc610e..83b1c96e 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -13,7 +13,7 @@ samerng() = MersenneTwister(1)
 # By lower-bounding the Int range at 2, we avoid cases where differentiating an
 # exponentiation of an Int value would cause a DomainError due to reducing the
 # exponent by one
-intrand(T) = T == Int ? rand(2:10) : rand(T)
+intrand(V) = V == Int ? rand(2:10) : rand(V)
 
 # fix testing issue with Base.hypot(::Int...) undefined in 0.4
 if v"0.4" <= VERSION < v"0.5"
@@ -30,34 +30,34 @@ else
     test_approx_diffnums(a::Real, b::Real) = @test isapprox(a, b)
 end
 
-function test_approx_diffnums{N}(a::Dual{N}, b::Dual{N})
+function test_approx_diffnums{T,A,B,N}(a::Dual{T,A,N}, b::Dual{T,B,N})
     test_approx_diffnums(value(a), value(b))
     for i in 1:N
         test_approx_diffnums(partials(a)[i], partials(b)[i])
     end
 end
 
-for N in (0,3), M in (0,4), T in (Int, Float32)
-    println("  ...testing Dual{$N,$T} and Dual{$N,Dual{$M,$T}}")
+for N in (0,3), M in (0,4), V in (Int, Float32)
+    println("  ...testing Dual{Void,$V,$N} and Dual{Void,Dual{Void,$V,$M},$N}")
 
-    PARTIALS = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL = intrand(T)
+    PARTIALS = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL = intrand(V)
     FDNUM = Dual(PRIMAL, PARTIALS)
 
-    PARTIALS2 = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL2 = intrand(T)
+    PARTIALS2 = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL2 = intrand(V)
     FDNUM2 = Dual(PRIMAL2, PARTIALS2)
 
-    PARTIALS3 = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL3 = intrand(T)
+    PARTIALS3 = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL3 = intrand(V)
     FDNUM3 = Dual(PRIMAL3, PARTIALS3)
 
-    M_PARTIALS = Partials{M,T}(ntuple(m -> intrand(T), Val{M}))
-    NESTED_PARTIALS = convert(Partials{N,Dual{M,T}}, PARTIALS)
+    M_PARTIALS = Partials{M,V}(ntuple(m -> intrand(V), Val{M}))
+    NESTED_PARTIALS = convert(Partials{N,Dual{Void,V,M}}, PARTIALS)
     NESTED_FDNUM = Dual(Dual(PRIMAL, M_PARTIALS), NESTED_PARTIALS)
 
-    M_PARTIALS2 = Partials{M,T}(ntuple(m -> intrand(T), Val{M}))
-    NESTED_PARTIALS2 = convert(Partials{N,Dual{M,T}}, PARTIALS2)
+    M_PARTIALS2 = Partials{M,V}(ntuple(m -> intrand(V), Val{M}))
+    NESTED_PARTIALS2 = convert(Partials{N,Dual{Void,V,M}}, PARTIALS2)
     NESTED_FDNUM2 = Dual(Dual(PRIMAL2, M_PARTIALS2), NESTED_PARTIALS2)
 
     ################
@@ -65,10 +65,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     ################
 
     @test Dual(PRIMAL, PARTIALS...) === FDNUM
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS)) === Dual{N,widen(T)}
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS.values)) === Dual{N,widen(T)}
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS...)) === Dual{N,widen(T)}
-    @test typeof(NESTED_FDNUM) == Dual{N,Dual{M,T}}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS)) === Dual{Void,widen(V),N}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS.values)) === Dual{Void,widen(V),N}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS...)) === Dual{Void,widen(V),N}
+    @test typeof(NESTED_FDNUM) == Dual{Void,Dual{Void,V,M},N}
 
     #############
     # Accessors #
@@ -78,7 +78,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test value(FDNUM) == PRIMAL
     @test value(NESTED_FDNUM) === Dual(PRIMAL, M_PARTIALS)
 
-    @test partials(PRIMAL) == Partials{0,T}(tuple())
+    @test partials(PRIMAL) == Partials{0,V}(tuple())
     @test partials(FDNUM) == PARTIALS
     @test partials(NESTED_FDNUM) === NESTED_PARTIALS
 
@@ -94,10 +94,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test ForwardDiff.npartials(NESTED_FDNUM) == N
     @test ForwardDiff.npartials(typeof(NESTED_FDNUM)) == N
 
-    @test ForwardDiff.valtype(FDNUM) == T
-    @test ForwardDiff.valtype(typeof(FDNUM)) == T
-    @test ForwardDiff.valtype(NESTED_FDNUM) == Dual{M,T}
-    @test ForwardDiff.valtype(typeof(NESTED_FDNUM)) == Dual{M,T}
+    @test ForwardDiff.valtype(FDNUM) == V
+    @test ForwardDiff.valtype(typeof(FDNUM)) == V
+    @test ForwardDiff.valtype(NESTED_FDNUM) == Dual{Void,V,M}
+    @test ForwardDiff.valtype(typeof(NESTED_FDNUM)) == Dual{Void,V,M}
 
     #####################
     # Generic Functions #
@@ -106,11 +106,11 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test FDNUM === copy(FDNUM)
     @test NESTED_FDNUM === copy(NESTED_FDNUM)
 
-    if T != Int
+    if V != Int
         @test eps(FDNUM) === eps(PRIMAL)
-        @test eps(typeof(FDNUM)) === eps(T)
+        @test eps(typeof(FDNUM)) === eps(V)
         @test eps(NESTED_FDNUM) === eps(PRIMAL)
-        @test eps(typeof(NESTED_FDNUM)) === eps(T)
+        @test eps(typeof(NESTED_FDNUM)) === eps(V)
 
         @test floor(Int, FDNUM) === floor(Int, PRIMAL)
         @test floor(Int, FDNUM2) === floor(Int, PRIMAL2)
@@ -145,7 +145,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
         @test round(NESTED_FDNUM) === round(PRIMAL)
 
         @test Base.rtoldefault(typeof(FDNUM)) ≡ Base.rtoldefault(typeof(PRIMAL))
-        @test Dual(PRIMAL-eps(T), PARTIALS) ≈ FDNUM
+        @test Dual(PRIMAL-eps(V), PARTIALS) ≈ FDNUM
         @test Base.rtoldefault(typeof(NESTED_FDNUM)) ≡ Base.rtoldefault(typeof(PRIMAL))
     end
 
@@ -169,19 +169,19 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     close(TMPIO)
 
     @test zero(FDNUM) === Dual(zero(PRIMAL), zero(PARTIALS))
-    @test zero(typeof(FDNUM)) === Dual(zero(T), zero(Partials{N,T}))
+    @test zero(typeof(FDNUM)) === Dual(zero(V), zero(Partials{N,V}))
     @test zero(NESTED_FDNUM) === Dual(Dual(zero(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test zero(typeof(NESTED_FDNUM)) === Dual(Dual(zero(T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test zero(typeof(NESTED_FDNUM)) === Dual(Dual(zero(V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
     @test one(FDNUM) === Dual(one(PRIMAL), zero(PARTIALS))
-    @test one(typeof(FDNUM)) === Dual(one(T), zero(Partials{N,T}))
+    @test one(typeof(FDNUM)) === Dual(one(V), zero(Partials{N,V}))
     @test one(NESTED_FDNUM) === Dual(Dual(one(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test one(typeof(NESTED_FDNUM)) === Dual(Dual(one(T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test one(typeof(NESTED_FDNUM)) === Dual(Dual(one(V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
-    @test rand(samerng(), FDNUM) === Dual(rand(samerng(), T), zero(PARTIALS))
-    @test rand(samerng(), typeof(FDNUM)) === Dual(rand(samerng(), T), zero(Partials{N,T}))
-    @test rand(samerng(), NESTED_FDNUM) === Dual(Dual(rand(samerng(), T), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test rand(samerng(), typeof(NESTED_FDNUM)) === Dual(Dual(rand(samerng(), T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test rand(samerng(), FDNUM) === Dual(rand(samerng(), V), zero(PARTIALS))
+    @test rand(samerng(), typeof(FDNUM)) === Dual(rand(samerng(), V), zero(Partials{N,V}))
+    @test rand(samerng(), NESTED_FDNUM) === Dual(Dual(rand(samerng(), V), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
+    @test rand(samerng(), typeof(NESTED_FDNUM)) === Dual(Dual(rand(samerng(), V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
     # Predicates #
     #------------#
@@ -268,10 +268,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test isreal(NESTED_FDNUM)
 
     @test isinteger(Dual(1.0, PARTIALS))
-    @test isinteger(FDNUM) == (T == Int)
+    @test isinteger(FDNUM) == (V == Int)
 
     @test isinteger(Dual(Dual(1.0, M_PARTIALS), NESTED_PARTIALS))
-    @test isinteger(NESTED_FDNUM) == (T == Int)
+    @test isinteger(NESTED_FDNUM) == (V == Int)
 
     @test iseven(Dual(2))
     @test !(iseven(Dual(1)))
@@ -289,42 +289,42 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     # Promotion/Conversion #
     ########################
 
-    const WIDE_T = widen(T)
+    const WIDE_T = widen(V)
 
-    @test promote_type(Dual{N,T}, T) == Dual{N,T}
-    @test promote_type(Dual{N,T}, WIDE_T) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,WIDE_T}, T) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,T}, Dual{N,T}) == Dual{N,T}
-    @test promote_type(Dual{N,T}, Dual{N,WIDE_T}) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,WIDE_T}, Dual{N,Dual{M,T}}) == Dual{N,Dual{M,WIDE_T}}
+    @test promote_type(Dual{Void,V,N}, V) == Dual{Void,V,N}
+    @test promote_type(Dual{Void,V,N}, WIDE_T) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,WIDE_T,N}, V) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,V,N}, Dual{Void,V,N}) == Dual{Void,V,N}
+    @test promote_type(Dual{Void,V,N}, Dual{Void,WIDE_T,N}) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,WIDE_T,N}, Dual{Void,Dual{Void,V,M},N}) == Dual{Void,Dual{Void,WIDE_T,M},N}
 
-    const WIDE_FDNUM = convert(Dual{N,WIDE_T}, FDNUM)
-    const WIDE_NESTED_FDNUM = convert(Dual{N,Dual{M,WIDE_T}}, NESTED_FDNUM)
+    const WIDE_FDNUM = convert(Dual{Void,WIDE_T,N}, FDNUM)
+    const WIDE_NESTED_FDNUM = convert(Dual{Void,Dual{Void,WIDE_T,M},N}, NESTED_FDNUM)
 
-    @test typeof(WIDE_FDNUM) === Dual{N,WIDE_T}
-    @test typeof(WIDE_NESTED_FDNUM) === Dual{N,Dual{M,WIDE_T}}
+    @test typeof(WIDE_FDNUM) === Dual{Void,WIDE_T,N}
+    @test typeof(WIDE_NESTED_FDNUM) === Dual{Void,Dual{Void,WIDE_T,M},N}
 
     @test value(WIDE_FDNUM) == PRIMAL
     @test value(WIDE_NESTED_FDNUM) == PRIMAL
 
     @test convert(Dual, FDNUM) === FDNUM
     @test convert(Dual, NESTED_FDNUM) === NESTED_FDNUM
-    @test convert(Dual{N,T}, FDNUM) === FDNUM
-    @test convert(Dual{N,Dual{M,T}}, NESTED_FDNUM) === NESTED_FDNUM
-    @test convert(Dual{N,WIDE_T}, PRIMAL) === Dual(WIDE_T(PRIMAL), zero(Partials{N,WIDE_T}))
-    @test convert(Dual{N,Dual{M,WIDE_T}}, PRIMAL) === Dual(Dual(WIDE_T(PRIMAL), zero(Partials{M,WIDE_T})), zero(Partials{N,Dual{M,T}}))
-    @test convert(Dual{N,Dual{M,T}}, FDNUM) === Dual(Dual{M,T}(PRIMAL), convert(Partials{N,Dual{M,T}}, PARTIALS))
-    @test convert(Dual{N,Dual{M,WIDE_T}}, FDNUM) === Dual(Dual{M,WIDE_T}(PRIMAL), convert(Partials{N,Dual{M,WIDE_T}}, PARTIALS))
-
-    if T != Int
-        @test Base.promote_array_type(+, Dual{N,T}, T, Base.promote_op(+, Dual{N,T}, T)) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,Int}, T, Base.promote_op(+, Dual{N,Int}, T)) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,T}, Base.promote_op(+, T, Dual{N,T})) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,Int}, Base.promote_op(+, T, Dual{N,Int})) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,T}, T) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,Int}, T) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,T}) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,Int}) == Dual{N,T}
+    @test convert(Dual{Void,V,N}, FDNUM) === FDNUM
+    @test convert(Dual{Void,Dual{Void,V,M},N}, NESTED_FDNUM) === NESTED_FDNUM
+    @test convert(Dual{Void,WIDE_T,N}, PRIMAL) === Dual(WIDE_T(PRIMAL), zero(Partials{N,WIDE_T}))
+    @test convert(Dual{Void,Dual{Void,WIDE_T,M},N}, PRIMAL) === Dual(Dual(WIDE_T(PRIMAL), zero(Partials{M,WIDE_T})), zero(Partials{N,Dual{Void,V,M}}))
+    @test convert(Dual{Void,Dual{Void,V,M},N}, FDNUM) === Dual(Dual{Void,V,M}(PRIMAL), convert(Partials{N,Dual{Void,V,M}}, PARTIALS))
+    @test convert(Dual{Void,Dual{Void,WIDE_T,M},N}, FDNUM) === Dual(Dual{Void,WIDE_T,M}(PRIMAL), convert(Partials{N,Dual{Void,WIDE_T,M}}, PARTIALS))
+
+    if V != Int
+        @test Base.promote_array_type(+, Dual{Void,V,N}, V, Base.promote_op(+, Dual{Void,V,N}, V)) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,Int,N}, V, Base.promote_op(+, Dual{Void,Int,N}, V)) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,V,N}, Base.promote_op(+, V, Dual{Void,V,N})) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,Int,N}, Base.promote_op(+, V, Dual{Void,Int,N})) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,V,N}, V) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,Int,N}, V) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,V,N}) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,Int,N}) == Dual{Void,V,N}
     end
 
     ########
@@ -361,12 +361,11 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test PRIMAL * NESTED_FDNUM === Dual(value(NESTED_FDNUM) * PRIMAL, partials(NESTED_FDNUM) * PRIMAL)
 
     if M > 0 && N > 0
-        @test Dual(FDNUM) / Dual(PRIMAL) === Dual(FDNUM / PRIMAL)
-        @test Dual(PRIMAL) / Dual(FDNUM) === Dual(PRIMAL / FDNUM)
-        @test Dual(FDNUM) / FDNUM2 === FDNUM / FDNUM2
-        @test FDNUM / Dual(FDNUM2) === FDNUM / FDNUM2
-        @test Dual(FDNUM, FDNUM2) / Dual(PRIMAL) === Dual(FDNUM, FDNUM2) / PRIMAL
-        @test Dual(PRIMAL) / Dual(FDNUM, FDNUM2) === PRIMAL / Dual(FDNUM, FDNUM2)
+        @test Dual{1}(FDNUM) / Dual{1}(PRIMAL) === Dual{1}(FDNUM / PRIMAL)
+        @test Dual{1}(PRIMAL) / Dual{1}(FDNUM) === Dual{1}(PRIMAL / FDNUM)
+        @test Dual{1}(FDNUM) / FDNUM2 === Dual{1}(FDNUM / FDNUM2)
+        @test FDNUM / Dual{1}(FDNUM2) === Dual{1}(FDNUM / FDNUM2)
+        @test Dual{1}(FDNUM / PRIMAL, FDNUM2 / PRIMAL) === Dual{1}(FDNUM, FDNUM2) / PRIMAL
     end
 
     test_approx_diffnums(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
@@ -417,7 +416,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test abs(-NESTED_FDNUM) === NESTED_FDNUM
     @test abs(NESTED_FDNUM) === NESTED_FDNUM
 
-    if T != Int
+    if V != Int
         UNSUPPORTED_NESTED_FUNCS = (:trigamma, :airyprime, :besselj1, :bessely1)
         DOMAIN_ERR_FUNCS = (:asec, :acsc, :asecd, :acscd, :acoth, :acosh)
 
@@ -452,14 +451,14 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
         end
     end
 
-    # Manually Optimized Functions #
-    #------------------------------#
+    # Special Cases #
+    #---------------#
 
     test_approx_diffnums(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
     test_approx_diffnums(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
     map(test_approx_diffnums, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM)))
 
-    if T === Float32
+    if V === Float32
         @test typeof(sqrt(FDNUM)) === typeof(FDNUM)
         @test typeof(sqrt(NESTED_FDNUM)) === typeof(NESTED_FDNUM)
     end
diff --git a/test/GradientTest.jl b/test/GradientTest.jl
index 6b468b43..487ad6b2 100644
--- a/test/GradientTest.jl
+++ b/test/GradientTest.jl
@@ -16,12 +16,10 @@ x = [0.1, 0.2, 0.3]
 v = f(x)
 g = [-9.4, 15.6, 52.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded test with chunk size = $c")
-    cfg = ForwardDiff.GradientConfig{c}(x)
+for c in (1, 2, 3), tag in (nothing, f)
+    println("  ...running hardcoded test with chunk size = $c and tag = $tag")
+    cfg = ForwardDiff.GradientConfig(tag, x, ForwardDiff.Chunk{c}())
 
-    # single-threaded #
-    #-----------------#
     @test isapprox(g, ForwardDiff.gradient(f, x, cfg))
     @test isapprox(g, ForwardDiff.gradient(f, x))
 
@@ -41,24 +39,6 @@ for c in (1, 2, 3)
     out = DiffBase.GradientResult(x)
     ForwardDiff.gradient!(out, f, x)
     @test isapprox(DiffBase.value(out), v)
-    @test isapprox(DiffBase.gradient(out), g)
-
-    # multithreaded #
-    #---------------#
-    if ForwardDiff.IS_MULTITHREADED_JULIA
-        multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-
-        @test isapprox(g, ForwardDiff.gradient(f, x, multi_cfg))
-
-        out = similar(x)
-        ForwardDiff.gradient!(out, f, x, multi_cfg)
-        @test isapprox(out, g)
-
-        out = DiffBase.GradientResult(x)
-        ForwardDiff.gradient!(out, f, x, multi_cfg)
-        @test isapprox(DiffBase.value(out), v)
-        @test isapprox(DiffBase.gradient(out), g)
-    end
 end
 
 ########################
@@ -69,12 +49,10 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
     v = f(X)
     g = ForwardDiff.gradient(f, X)
     @test isapprox(g, Calculus.gradient(f, X), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        println("  ...testing $f with chunk size = $c")
-        cfg = ForwardDiff.GradientConfig{c}(X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = ForwardDiff.GradientConfig(tag, X, ForwardDiff.Chunk{c}())
 
-        # single-threaded #
-        #-----------------#
         out = ForwardDiff.gradient(f, X, cfg)
         @test isapprox(out, g)
 
@@ -86,24 +64,6 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
         ForwardDiff.gradient!(out, f, X, cfg)
         @test isapprox(DiffBase.value(out), v)
         @test isapprox(DiffBase.gradient(out), g)
-
-        # multithreaded #
-        #---------------#
-        if ForwardDiff.IS_MULTITHREADED_JULIA
-            multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-
-            out = ForwardDiff.gradient(f, X, multi_cfg)
-            @test isapprox(out, g)
-
-            out = similar(X)
-            ForwardDiff.gradient!(out, f, X, multi_cfg)
-            @test isapprox(out, g)
-
-            out = DiffBase.GradientResult(X)
-            ForwardDiff.gradient!(out, f, X, multi_cfg)
-            @test isapprox(DiffBase.value(out), v)
-            @test isapprox(DiffBase.gradient(out), g)
-        end
     end
 end
 
diff --git a/test/HessianTest.jl b/test/HessianTest.jl
index 2b89e50e..cb16271d 100644
--- a/test/HessianTest.jl
+++ b/test/HessianTest.jl
@@ -19,13 +19,11 @@ h = [-66.0  -40.0    0.0;
      -40.0  130.0  -80.0;
        0.0  -80.0  200.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded test with chunk size = $c")
-    cfg = ForwardDiff.HessianConfig{c}(x)
-    resultcfg = ForwardDiff.HessianConfig{c}(DiffBase.HessianResult(x), x)
+for c in (1, 2, 3), tag in (nothing, f)
+    println("  ...running hardcoded test with chunk size = $c and tag = $tag")
+    cfg = ForwardDiff.HessianConfig(tag, x, ForwardDiff.Chunk{c}())
+    resultcfg = ForwardDiff.HessianConfig(DiffBase.HessianResult(x), tag, x, ForwardDiff.Chunk{c}())
 
-    # single-threaded #
-    #-----------------#
     @test isapprox(h, ForwardDiff.hessian(f, x))
     @test isapprox(h, ForwardDiff.hessian(f, x, cfg))
 
@@ -48,25 +46,6 @@ for c in (1, 2, 3)
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.gradient(out), g)
     @test isapprox(DiffBase.hessian(out), h)
-
-    # multithreaded #
-    #---------------#
-    if ForwardDiff.IS_MULTITHREADED_JULIA
-        multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-        multi_resultcfg = ForwardDiff.MultithreadConfig(resultcfg)
-
-        @test isapprox(h, ForwardDiff.hessian(f, x, multi_cfg))
-
-        out = similar(x, 3, 3)
-        ForwardDiff.hessian!(out, f, x, multi_cfg)
-        @test isapprox(out, h)
-
-        out = DiffBase.HessianResult(x)
-        ForwardDiff.hessian!(out, f, x, multi_resultcfg)
-        @test isapprox(DiffBase.value(out), v)
-        @test isapprox(DiffBase.gradient(out), g)
-        @test isapprox(DiffBase.hessian(out), h)
-    end
 end
 
 ########################
@@ -79,13 +58,11 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
     h = ForwardDiff.hessian(f, X)
     # finite difference approximation error is really bad for Hessians...
     @test isapprox(h, Calculus.hessian(f, X), atol=0.02)
-    for c in CHUNK_SIZES
-        println("  ...testing $f with chunk size = $c")
-        cfg = ForwardDiff.HessianConfig{c}(X)
-        resultcfg = ForwardDiff.HessianConfig{c}(DiffBase.HessianResult(X), X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = ForwardDiff.HessianConfig(tag, X, ForwardDiff.Chunk{c}())
+        resultcfg = ForwardDiff.HessianConfig(DiffBase.HessianResult(X), tag, X, ForwardDiff.Chunk{c}())
 
-        # single-threaded #
-        #-----------------#
         out = ForwardDiff.hessian(f, X, cfg)
         @test isapprox(out, h)
 
@@ -98,26 +75,6 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
         @test isapprox(DiffBase.value(out), v)
         @test isapprox(DiffBase.gradient(out), g)
         @test isapprox(DiffBase.hessian(out), h)
-
-        # multithreaded #
-        #---------------#
-        if ForwardDiff.IS_MULTITHREADED_JULIA
-            multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-            multi_resultcfg = ForwardDiff.MultithreadConfig(resultcfg)
-
-            out = ForwardDiff.hessian(f, X, multi_cfg)
-            @test isapprox(out, h)
-
-            out = similar(X, length(X), length(X))
-            ForwardDiff.hessian!(out, f, X, multi_cfg)
-            @test isapprox(out, h)
-
-            out = DiffBase.HessianResult(X)
-            ForwardDiff.hessian!(out, f, X, multi_resultcfg)
-            @test isapprox(DiffBase.value(out), v)
-            @test isapprox(DiffBase.gradient(out), g)
-            @test isapprox(DiffBase.hessian(out), h)
-        end
     end
 end
 
diff --git a/test/JacobianTest.jl b/test/JacobianTest.jl
index 4e7ae9ac..83ee0a47 100644
--- a/test/JacobianTest.jl
+++ b/test/JacobianTest.jl
@@ -28,10 +28,10 @@ j = [0.8242369704835132  0.4121184852417566  -10.933563142616123
      0.169076696546684   0.084538348273342   -2.299173530851733
      0.0                 0.0                 1.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded tests with chunk size $c")
-    cfg = JacobianConfig{c}(x)
-    ycfg = JacobianConfig{c}(zeros(4), x)
+for c in (1, 2, 3), tags in ((nothing, nothing), (f, f!))
+    println("  ...running hardcoded test with chunk size = $c and tag = $tags")
+    cfg = JacobianConfig(tags[1], x, ForwardDiff.Chunk{c}())
+    ycfg = JacobianConfig(tags[2], zeros(4), x, ForwardDiff.Chunk{c}())
 
     # testing f(x)
     @test isapprox(j, ForwardDiff.jacobian(f, x, cfg))
@@ -46,7 +46,7 @@ for c in (1, 2, 3)
     @test isapprox(out, j)
 
     out = DiffBase.JacobianResult(zeros(4), zeros(3))
-    ForwardDiff.jacobian!(out, f, x, JacobianConfig(x))
+    ForwardDiff.jacobian!(out, f, x, JacobianConfig(tags[1], x))
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.jacobian(out), j)
 
@@ -92,10 +92,10 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
     v = f(X)
     j = ForwardDiff.jacobian(f, X)
     @test isapprox(j, Calculus.jacobian(x -> vec(f(x)), X, :forward), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        cfg = JacobianConfig{c}(X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = JacobianConfig(tag, X, ForwardDiff.Chunk{c}())
 
-        println("  ...testing $f with chunk size = $c")
         out = ForwardDiff.jacobian(f, X, cfg)
         @test isapprox(out, j)
 
@@ -115,11 +115,10 @@ for f! in DiffBase.INPLACE_ARRAY_TO_ARRAY_FUNCS
     f!(v, X)
     j = ForwardDiff.jacobian(f!, zeros(Y), X)
     @test isapprox(j, Calculus.jacobian(x -> (y = zeros(Y); f!(y, x); vec(y)), X, :forward), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        cfg = JacobianConfig{c}(X)
-        ycfg = JacobianConfig{c}(zeros(Y), X)
+    for c in CHUNK_SIZES, tag in (nothing, f!)
+        println("  ...testing $(f!) with chunk size = $c and tag = $tag")
+        ycfg = JacobianConfig(tag, zeros(Y), X, ForwardDiff.Chunk{c}())
 
-        println("  ...testing $(f!) with chunk size = $c")
         y = zeros(Y)
         out = ForwardDiff.jacobian(f!, y, X, ycfg)
         @test isapprox(y, v)
diff --git a/test/runtests.jl b/test/runtests.jl
index f41cabe6..9be0309c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,19 +30,19 @@ tic()
 include("HessianTest.jl")
 println("done (took $(toq()) seconds).")
 
-println("Testing miscellaneous functionality...")
-tic()
-include("MiscTest.jl")
-println("done (took $(toq()) seconds).")
-
-if Base.JLOptions().opt_level >= 3 && VERSION >= v"0.5"
-    println("Testing SIMD vectorization...")
-    tic()
-    include("SIMDTest.jl")
-    println("done (took $(toq()) seconds).")
-end
-
-println("Testing deprecations...")
-tic()
-include("DeprecatedTest.jl")
-println("done (took $(toq()) seconds).")
+# println("Testing miscellaneous functionality...")
+# tic()
+# include("MiscTest.jl")
+# println("done (took $(toq()) seconds).")
+#
+# if Base.JLOptions().opt_level >= 3 && VERSION >= v"0.5"
+#     println("Testing SIMD vectorization...")
+#     tic()
+#     include("SIMDTest.jl")
+#     println("done (took $(toq()) seconds).")
+# end
+#
+# println("Testing deprecations...")
+# tic()
+# include("DeprecatedTest.jl")
+# println("done (took $(toq()) seconds).")
diff --git a/test/utils.jl b/test/utils.jl
index f1bac85e..fd5941a8 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -1,13 +1,13 @@
 import ForwardDiff
-using ForwardDiff.CHUNK_THRESHOLD
+using ForwardDiff.DEFAULT_CHUNK_THRESHOLD
 using Base.Test
 
 # seed RNG, thus making result inaccuracies deterministic
 # so we don't have to retune EPS for arbitrary inputs
 srand(1)
 
-const XLEN = CHUNK_THRESHOLD + 1
-const YLEN = div(CHUNK_THRESHOLD, 2) + 1
+const XLEN = DEFAULT_CHUNK_THRESHOLD + 1
+const YLEN = div(DEFAULT_CHUNK_THRESHOLD, 2) + 1
 const X, Y = rand(XLEN), rand(YLEN)
-const CHUNK_SIZES = (1, div(CHUNK_THRESHOLD, 3), div(CHUNK_THRESHOLD, 2), CHUNK_THRESHOLD, CHUNK_THRESHOLD + 1)
+const CHUNK_SIZES = (1, div(DEFAULT_CHUNK_THRESHOLD, 3), div(DEFAULT_CHUNK_THRESHOLD, 2), DEFAULT_CHUNK_THRESHOLD, DEFAULT_CHUNK_THRESHOLD + 1)
 const FINITEDIFF_ERROR = 3e-5

From 1fd6981bf9a568a43741f79476e7cecf48c2f90c Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 29 Mar 2017 14:31:17 -0400
Subject: [PATCH 10/26] attempt to fix method ambiguity on Julia v0.5

---
 src/ForwardDiff.jl |   2 +
 src/config.jl      |  15 ++--
 src/dual.jl        | 190 ++++++++++++++++++++++++++++++++++++---------
 test/DualTest.jl   |  50 ++++++------
 4 files changed, 188 insertions(+), 69 deletions(-)

diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 7e4450e2..12c0d774 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -35,6 +35,8 @@ const SPECIAL_FUNCS = (:erf, :erfc, :erfinv, :erfcinv, :erfi, :erfcx,
                        :besselyx, :besselh, :hankelh1, :hankelh1x, :hankelh2,
                        :hankelh2x, :besseli, :besselix, :besselk, :besselkx)
 
+const REAL_TYPES = (AbstractFloat, Irrational, Integer, Rational, Real)
+
 # chunk settings #
 #----------------#
 
diff --git a/src/config.jl b/src/config.jl
index 9a17a9c4..ace02fd5 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -1,3 +1,9 @@
+#######
+# Tag #
+#######
+
+@compat immutable Tag{F,M} end
+
 #########
 # Chunk #
 #########
@@ -25,15 +31,6 @@ function pickchunksize(input_length, threshold = DEFAULT_CHUNK_THRESHOLD)
     end
 end
 
-#######
-# Tag #
-#######
-
-@compat immutable Tag{F,M} end
-
-@inline order{V}(::Type{V}) = 0
-@inline order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
-
 ##################
 # AbstractConfig #
 ##################
diff --git a/src/dual.jl b/src/dual.jl
index 119f7d29..23eaf903 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -7,22 +7,6 @@
     partials::Partials{N,V}
 end
 
-####################
-# TagMismatchError #
-####################
-
-@compat immutable TagMismatchError{X,Y} <: Exception
-    x::Dual{X}
-    y::Dual{Y}
-end
-
-function Base.showerror{X,Y}(io::IO, e::TagMismatchError{X,Y})
-    print(io, "potential perturbation confusion detected when computing binary operation ",
-              "on $(e.x) and $(e.y) (tag mismatch: $X != $Y). ForwardDiff cannot safely ",
-              "perform differentiation in this context; see the following issue for ",
-              "details: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83")
-end
-
 ################
 # Constructors #
 ################
@@ -41,6 +25,34 @@ end
 
 @inline Dual(args...) = Dual{Void}(args...)
 
+####################
+# TagMismatchError #
+####################
+
+@compat immutable TagMismatchError{X,Y} <: Exception
+    x::Dual{X}
+    y::Dual{Y}
+end
+
+function TagMismatchError(x, y, z)
+    if isa(x, Dual) && isa(y, Dual) && tagtype(x) !== tagtype(y)
+        return TagMismatchError(x, y)
+    elseif isa(x, Dual) && isa(z, Dual) && tagtype(x) !== tagtype(z)
+        return TagMismatchError(x, z)
+    elseif isa(y, Dual) && isa(z, Dual) && tagtype(y) !== tagtype(z)
+        return TagMismatchError(y, z)
+    else
+        error("the provided arguments have matching tags, or are not Duals")
+    end
+end
+
+function Base.showerror{X,Y}(io::IO, e::TagMismatchError{X,Y})
+    print(io, "potential perturbation confusion detected when computing binary operation ",
+              "on $(e.x) and $(e.y) (tag mismatch: $X != $Y). ForwardDiff cannot safely ",
+              "perform differentiation in this context; see the following issue for ",
+              "details: https://github.com/JuliaDiff/jl/issues/83")
+end
+
 ##############################
 # Utility/Accessor Functions #
 ##############################
@@ -58,34 +70,122 @@ end
 @inline npartials{T,V,N}(::Dual{T,V,N}) = N
 @inline npartials{T,V,N}(::Type{Dual{T,V,N}}) = N
 
+@inline order{V}(::Type{V}) = 0
+@inline order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
+
 @inline valtype{V}(::V) = V
 @inline valtype{V}(::Type{V}) = V
 @inline valtype{T,V,N}(::Dual{T,V,N}) = V
 @inline valtype{T,V,N}(::Type{Dual{T,V,N}}) = V
 
-#####################
-# Generic Functions #
-#####################
+@inline tagtype{V}(::V) = Void
+@inline tagtype{V}(::Type{V}) = Void
+@inline tagtype{T,V,N}(::Dual{T,V,N}) = T
+@inline tagtype{T,V,N}(::Type{Dual{T,V,N}}) = T
 
-macro define_binary_dual_op(f, both_body, left_body, right_body)
-    return esc(quote
-        @inline $(f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-        @inline $(f){T}(x::Dual{T}, y::Dual{T}) = $both_body
+#####################################
+# N-ary Operation Definition Macros #
+#####################################
 
-        # define on all these types to avoid various ambiguities
-        for R in (:AbstractFloat, :Irrational, :Integer, :Rational, :Real)
-            @eval begin
-                @inline $(f){T}(x::Dual{T}, y::$(Expr(:$, :R))) = $left_body
-                @inline $(f){T}(x::$(Expr(:$, :R)), y::Dual{T}) = $right_body
-            end
+macro define_binary_dual_op(f, xy_body, x_body, y_body)
+    expr = quote
+        @inline $(f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+        @inline $(f){T}(x::Dual{T}, y::Dual{T}) = $xy_body
+    end
+    for R in REAL_TYPES
+        real_defs = quote
+            @inline $(f){T}(x::Dual{T}, y::$R) = $x_body
+            @inline $(f){T}(x::$R, y::Dual{T}) = $y_body
         end
-
-        @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}) = $both_body
-        @inline $(f){T,S,V,N}(x::Dual{T,Dual{S,V,N}}, y::Dual{S}) = $left_body
-        @inline $(f){T,S,V,N}(x::Dual{S}, y::Dual{T,Dual{S,V,N}}) = $right_body
-    end)
+        append!(expr.args, real_defs.args)
+    end
+    nested_defs = quote
+        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{T,X,N},M}, y::Dual{T,Dual{T,Y,N},M}) = $xy_body
+        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{T,X,N},M}, y::Dual{T,Y,N}) = $x_body
+        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,X,N}, y::Dual{T,Dual{T,Y,N},M}) = $y_body
+
+        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}) = $xy_body
+        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N}) = $x_body
+        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{S,X,N}, y::Dual{T,Dual{S,Y,N},M}) = $y_body
+    end
+    append!(expr.args, nested_defs.args)
+    return esc(expr)
 end
 
+# macro define_ternary_dual_op(f, xyz_body, xy_body, xz_body, yz_body, x_body, y_body, z_body)
+#     return esc(quote
+#         @inline $(f){T}(x::Dual, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+#         @inline $(f){T}(x::Dual{T}, y::Dual{T}, z::Dual{T}) = $xyz_body
+#
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $xyz_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N})         = $xy_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N},         z::Dual{T,Dual{T,Z,N}}) = $xz_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $yz_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N},         z::Dual{T,Z,N})         = $x_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N})         = $y_body
+#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Y,N},         z::Dual{T,Dual{T,Z,N}}) = $z_body
+#
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $xyz_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N})         = $xy_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N},         z::Dual{T,Dual{S,Z,N}}) = $xz_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $yz_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N},         z::Dual{S,Z,N})         = $x_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N})         = $y_body
+#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{S,Y,N},         z::Dual{T,Dual{S,Z,N}}) = $z_body
+#
+#         for RT in REAL_TYPES
+#             R = Expr(:$, :RT)
+#             @eval begin
+#                 println("here 1")
+#                 @inline $(f)(x::Dual, y::Dual, z::$R) = throw(TagMismatchError(x, y, z))
+#                 @inline $(f)(x::Dual, y::$R, z::Dual) = throw(TagMismatchError(x, y, z))
+#                 @inline $(f)(x::$R, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+#
+#                 @inline $(f){T}(x::Dual{T}, y::Dual{T}, z::$R) = $xy_body
+#                 @inline $(f){T}(x::Dual{T}, y::$R, z::Dual{T}) = $xz_body
+#                 @inline $(f){T}(x::$R, y::Dual{T}, z::Dual{T}) = $yz_body
+#
+#                 @inline $(f){T,X,Y,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::$R)                  = $xy_body
+#                 @inline $(f){T,X,Z,N}(x::Dual{T,Dual{T,X,N}}, y::$R,                  z::Dual{T,Dual{T,Z,N}}) = $xz_body
+#                 @inline $(f){T,Y,Z,N}(x::$R,                  y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $yz_body
+#                 @inline $(f){T,X,Y,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N}, z::$R)          = $x_body
+#                 @inline $(f){T,X,Z,N}(x::Dual{T,Dual{T,X,N}}, y::$R,          z::Dual{T,Z,N}) = $x_body
+#                 @inline $(f){T,X,Y,N}(x::Dual{T,X,N}, y::Dual{T,Dual{T,Y,N}}, z::$R)          = $y_body
+#                 @inline $(f){T,Y,Z,N}(x::$R,          y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N}) = $y_body
+#                 @inline $(f){T,X,Z,N}(x::Dual{T,X,N}, y::$R, z::Dual{T,Dual{T,Z,N}}) = $z_body
+#                 @inline $(f){T,Y,Z,N}(x::$R, y::Dual{T,Y,N}, z::Dual{T,Dual{T,Z,N}}) = $z_body
+#                 println("here 2")
+#                 @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::$R)                  = $xy_body
+#                 @inline $(f){T,S,X,Z,N}(x::Dual{T,Dual{S,X,N}}, y::$R,                  z::Dual{T,Dual{S,Z,N}}) = $xz_body
+#                 @inline $(f){T,S,Y,Z,N}(x::$R,                  y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $yz_body
+#                 @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N}, z::$R)          = $x_body
+#                 @inline $(f){T,S,X,Z,N}(x::Dual{T,Dual{S,X,N}}, y::$R,          z::Dual{S,Z,N}) = $x_body
+#                 @inline $(f){T,S,X,Y,N}(x::Dual{S,X,N}, y::Dual{T,Dual{S,Y,N}}, z::$R)          = $y_body
+#                 @inline $(f){T,S,Y,Z,N}(x::$R,          y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N}) = $y_body
+#                 @inline $(f){T,S,X,Z,N}(x::Dual{S,X,N}, y::$R, z::Dual{T,Dual{S,Z,N}}) = $z_body
+#                 @inline $(f){T,S,Y,Z,N}(x::$R, y::Dual{S,Y,N}, z::Dual{T,Dual{S,Z,N}}) = $z_body
+#             end
+#             for QT in REAL_TYPES
+#                 Q = Expr(:$, :QT)
+#                 @eval begin
+#                     println("here 3")
+#                     @inline $(f)(x::Dual, y::$R, z::$Q) = throw(TagMismatchError(x, y, z))
+#                     @inline $(f)(x::$R, y::Dual, z::$Q) = throw(TagMismatchError(x, y, z))
+#                     @inline $(f)(x::$R, y::$Q, z::Dual) = throw(TagMismatchError(x, y, z))
+#                     println("here 4")
+#                     @inline $(f){T}(x::Dual{T}, y::$R, z::$Q) = $x_body
+#                     @inline $(f){T}(x::$R, y::Dual{T}, z::$Q) = $y_body
+#                     @inline $(f){T}(x::$R, y::$Q, z::Dual{T}) = $z_body
+#                 end
+#             end
+#         end
+#     end)
+# end
+
+#####################
+# Generic Functions #
+#####################
+
 Base.copy(d::Dual) = d
 
 Base.eps(d::Dual) = eps(value(d))
@@ -168,8 +268,8 @@ for R in (:BigFloat, :Bool, :Irrational, :Real)
 end
 
 Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, d::Dual{T}) = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
-Base.convert{D<:Dual}(::Type{D}, d::D) = d
 Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, x::Real) = Dual{T}(V(x), zero(Partials{N,V}))
+Base.convert{D<:Dual}(::Type{D}, d::D) = d
 
 Base.promote_array_type{D<:Dual, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = promote_type(D, A)
 Base.promote_array_type{D<:Dual, A<:AbstractFloat, P}(F, ::Type{D}, ::Type{A}, ::Type{P}) = P
@@ -354,6 +454,26 @@ end
     calc_hypot(x, y, T)
 )
 
+@inline function calc_hypot{T}(x, y, z, ::Type{T})
+    vx = value(x)
+    vy = value(y)
+    vz = value(z)
+    h = hypot(vx, vy, vz)
+    p = (vx / h) * partials(x) + (vy / h) * partials(y) + (vz / h) * partials(z)
+    return Dual{T}(h, p)
+end
+
+# @define_ternary_dual_op(
+#     Base.hypot,
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+#     calc_hypot(x, y, z, T),
+# )
+
 # atan2
 
 @inline function calc_atan2{T}(y, x, ::Type{T})
diff --git a/test/DualTest.jl b/test/DualTest.jl
index 83b1c96e..edefa636 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -25,15 +25,15 @@ if VERSION < v"0.5"
     # isapprox on v0.4 doesn't properly set the tolerance
     # for mixed-precision inputs, while @test_approx_eq does
     # Use @eval to avoid expanding @test_approx_eq on 0.6 where it's deprecated
-    @eval test_approx_diffnums(a::Real, b::Real) = @test_approx_eq a b
+    @eval test_approx_duals(a::Real, b::Real) = @test_approx_eq a b
 else
-    test_approx_diffnums(a::Real, b::Real) = @test isapprox(a, b)
+    test_approx_duals(a::Real, b::Real) = @test isapprox(a, b)
 end
 
-function test_approx_diffnums{T,A,B,N}(a::Dual{T,A,N}, b::Dual{T,B,N})
-    test_approx_diffnums(value(a), value(b))
+function test_approx_duals{T,A,B,N}(a::Dual{T,A,N}, b::Dual{T,B,N})
+    test_approx_duals(value(a), value(b))
     for i in 1:N
-        test_approx_diffnums(partials(a)[i], partials(b)[i])
+        test_approx_duals(partials(a)[i], partials(b)[i])
     end
 end
 
@@ -289,7 +289,7 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
     # Promotion/Conversion #
     ########################
 
-    const WIDE_T = widen(V)
+    WIDE_T = widen(V)
 
     @test promote_type(Dual{Void,V,N}, V) == Dual{Void,V,N}
     @test promote_type(Dual{Void,V,N}, WIDE_T) == Dual{Void,WIDE_T,N}
@@ -298,8 +298,8 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
     @test promote_type(Dual{Void,V,N}, Dual{Void,WIDE_T,N}) == Dual{Void,WIDE_T,N}
     @test promote_type(Dual{Void,WIDE_T,N}, Dual{Void,Dual{Void,V,M},N}) == Dual{Void,Dual{Void,WIDE_T,M},N}
 
-    const WIDE_FDNUM = convert(Dual{Void,WIDE_T,N}, FDNUM)
-    const WIDE_NESTED_FDNUM = convert(Dual{Void,Dual{Void,WIDE_T,M},N}, NESTED_FDNUM)
+    WIDE_FDNUM = convert(Dual{Void,WIDE_T,N}, FDNUM)
+    WIDE_NESTED_FDNUM = convert(Dual{Void,Dual{Void,WIDE_T,M},N}, NESTED_FDNUM)
 
     @test typeof(WIDE_FDNUM) === Dual{Void,WIDE_T,N}
     @test typeof(WIDE_NESTED_FDNUM) === Dual{Void,Dual{Void,WIDE_T,M},N}
@@ -368,21 +368,21 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
         @test Dual{1}(FDNUM / PRIMAL, FDNUM2 / PRIMAL) === Dual{1}(FDNUM, FDNUM2) / PRIMAL
     end
 
-    test_approx_diffnums(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
-    test_approx_diffnums(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
-    test_approx_diffnums(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
+    test_approx_duals(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
+    test_approx_duals(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
+    test_approx_duals(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
 
-    test_approx_diffnums(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
-    test_approx_diffnums(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
-    test_approx_diffnums(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
+    test_approx_duals(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
+    test_approx_duals(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
+    test_approx_duals(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
 
-    test_approx_diffnums(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
-    test_approx_diffnums(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
-    test_approx_diffnums(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
+    test_approx_duals(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
+    test_approx_duals(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
+    test_approx_duals(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
 
-    test_approx_diffnums(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
-    test_approx_diffnums(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
-    test_approx_diffnums(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
+    test_approx_duals(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
+    test_approx_duals(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
+    test_approx_duals(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
 
     @test partials(NaNMath.pow(Dual(-2.0, 1.0), Dual(2.0, 0.0)), 1) == -4.0
 
@@ -436,11 +436,11 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
                     @eval begin
                         fdnum = $(is_domain_err_func ? FDNUM + 1 : FDNUM)
                         $(v) = ForwardDiff.value(fdnum)
-                        $(test_approx_diffnums)($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
+                        $(test_approx_duals)($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
                         if $(!(is_unsupported_nested_func))
                             nested_fdnum = $(is_domain_err_func ? NESTED_FDNUM + 1 : NESTED_FDNUM)
                             $(v) = ForwardDiff.value(nested_fdnum)
-                            $(test_approx_diffnums)($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
+                            $(test_approx_duals)($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
                         end
                     end
                 end
@@ -454,9 +454,9 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
     # Special Cases #
     #---------------#
 
-    test_approx_diffnums(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
-    test_approx_diffnums(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
-    map(test_approx_diffnums, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM)))
+    test_approx_duals(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
+    test_approx_duals(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
+    map(test_approx_duals, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM)))
 
     if V === Float32
         @test typeof(sqrt(FDNUM)) === typeof(FDNUM)

From 5224972611f205fb1589390884e1bf81479f6806 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 7 Apr 2017 13:37:23 -0400
Subject: [PATCH 11/26] drop Julia v0.5

---
 .travis.yml        |  1 -
 REQUIRE            |  3 +--
 src/ForwardDiff.jl |  1 -
 src/config.jl      | 14 +++++++-------
 src/deprecated.jl  |  2 +-
 src/dual.jl        | 42 +++++++++++++++++++-----------------------
 src/gradient.jl    |  2 +-
 src/hessian.jl     |  2 +-
 src/jacobian.jl    |  2 +-
 src/partials.jl    |  2 +-
 10 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 8bf16fe6..69b0bf6b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 language: julia
 julia:
-    - 0.5
     - 0.6
     - nightly
 notifications:
diff --git a/REQUIRE b/REQUIRE
index 3809d461..7f2f5e18 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,6 +1,5 @@
-julia 0.5
+julia 0.6-
 DiffBase 0.0.3
-Compat 0.19.0
 Calculus 0.2.0
 NaNMath 0.2.2
 SpecialFunctions 0.1.0
diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 12c0d774..6faddaed 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -2,7 +2,6 @@ __precompile__()
 
 module ForwardDiff
 
-using Compat
 using DiffBase
 using DiffBase: DiffResult
 
diff --git a/src/config.jl b/src/config.jl
index ace02fd5..bf8d8a48 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -2,13 +2,13 @@
 # Tag #
 #######
 
-@compat immutable Tag{F,M} end
+struct Tag{F,M} end
 
 #########
 # Chunk #
 #########
 
-@compat immutable Chunk{N} end
+struct Chunk{N} end
 
 function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
     N = pickchunksize(input_length, threshold)
@@ -35,9 +35,9 @@ end
 # AbstractConfig #
 ##################
 
-@compat abstract type AbstractConfig{T<:Tag,N} end
+abstract type AbstractConfig{T<:Tag,N} end
 
-@compat immutable ConfigMismatchError{F,G,M} <: Exception
+struct ConfigMismatchError{F,G,M} <: Exception
     f::F
     cfg::AbstractConfig{Tag{G,M}}
 end
@@ -59,7 +59,7 @@ Base.copy(cfg::AbstractConfig) = deepcopy(cfg)
 # GradientConfig #
 ##################
 
-@compat immutable GradientConfig{T,V,N,D} <: AbstractConfig{T,N}
+struct GradientConfig{T,V,N,D} <: AbstractConfig{T,N}
     seeds::NTuple{N,Partials{N,V}}
     duals::D
 end
@@ -77,7 +77,7 @@ end
 # JacobianConfig #
 ##################
 
-@compat immutable JacobianConfig{T,V,N,D} <: AbstractConfig{T,N}
+struct JacobianConfig{T,V,N,D} <: AbstractConfig{T,N}
     seeds::NTuple{N,Partials{N,V}}
     duals::D
 end
@@ -107,7 +107,7 @@ end
 # HessianConfig #
 #################
 
-@compat immutable HessianConfig{T,V,N,D,MJ,DJ} <: AbstractConfig{T,N}
+struct HessianConfig{T,V,N,D,MJ,DJ} <: AbstractConfig{T,N}
     jacobian_config::JacobianConfig{Tag{Void,MJ},V,N,DJ}
     gradient_config::GradientConfig{T,Dual{Tag{Void,MJ},V,N},D}
 end
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 6a672a46..9ba22857 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -14,7 +14,7 @@ Base.@deprecate JacobianResult(x) DiffBase.JacobianResult(x)
 Base.@deprecate HessianResult(x, y, z) DiffBase.DiffResult(x, y, z)
 Base.@deprecate HessianResult(x) DiffBase.HessianResult(x)
 
-@compat immutable Chunk{N}
+struct Chunk{N}
     function (::Type{Chunk{N}}){N}()
         Base.depwarn("Chunk{N}() is deprecated, use the ForwardDiff.AbstractConfig API instead.", :Chunk)
         return new{N}()
diff --git a/src/dual.jl b/src/dual.jl
index 23eaf903..57e0602c 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -2,7 +2,7 @@
 # Dual #
 ########
 
-@compat immutable Dual{T,V<:Real,N} <: Real
+struct Dual{T,V<:Real,N} <: Real
     value::V
     partials::Partials{N,V}
 end
@@ -29,7 +29,7 @@ end
 # TagMismatchError #
 ####################
 
-@compat immutable TagMismatchError{X,Y} <: Exception
+struct TagMismatchError{X,Y} <: Exception
     x::Dual{X}
     y::Dual{Y}
 end
@@ -67,21 +67,21 @@ end
 @inline partials(d::Dual, i, j) = partials(d, i).partials[j]
 @inline partials(d::Dual, i, j, k...) = partials(partials(d, i, j), k...)
 
-@inline npartials{T,V,N}(::Dual{T,V,N}) = N
-@inline npartials{T,V,N}(::Type{Dual{T,V,N}}) = N
+@inline npartials(::Dual{T,V,N}) where {T,V,N} = N
+@inline npartials(::Type{Dual{T,V,N}}) where {T,V,N} = N
 
-@inline order{V}(::Type{V}) = 0
-@inline order{T,V,N}(::Type{Dual{T,V,N}}) = 1 + order(V)
+@inline order(::Type{V}) where {V} = 0
+@inline order(::Type{Dual{T,V,N}}) where {T,V,N} = 1 + order(V)
 
-@inline valtype{V}(::V) = V
-@inline valtype{V}(::Type{V}) = V
-@inline valtype{T,V,N}(::Dual{T,V,N}) = V
-@inline valtype{T,V,N}(::Type{Dual{T,V,N}}) = V
+@inline valtype(::V) where {V} = V
+@inline valtype(::Type{V}) where {V} = V
+@inline valtype(::Dual{T,V,N}) where {T,V,N} = V
+@inline valtype(::Type{Dual{T,V,N}}) where {T,V,N} = V
 
-@inline tagtype{V}(::V) = Void
-@inline tagtype{V}(::Type{V}) = Void
-@inline tagtype{T,V,N}(::Dual{T,V,N}) = T
-@inline tagtype{T,V,N}(::Type{Dual{T,V,N}}) = T
+@inline tagtype(::V) where {V} = Void
+@inline tagtype(::Type{V}) where {V} = Void
+@inline tagtype(::Dual{T,V,N}) where {T,V,N} = T
+@inline tagtype(::Type{Dual{T,V,N}}) where {T,V,N} = T
 
 #####################################
 # N-ary Operation Definition Macros #
@@ -94,19 +94,15 @@ macro define_binary_dual_op(f, xy_body, x_body, y_body)
     end
     for R in REAL_TYPES
         real_defs = quote
-            @inline $(f){T}(x::Dual{T}, y::$R) = $x_body
-            @inline $(f){T}(x::$R, y::Dual{T}) = $y_body
+            @inline $(f)(x::Dual{T}, y::$R) where {T} = $x_body
+            @inline $(f)(x::$R, y::Dual{T}) where {T} = $y_body
         end
         append!(expr.args, real_defs.args)
     end
     nested_defs = quote
-        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{T,X,N},M}, y::Dual{T,Dual{T,Y,N},M}) = $xy_body
-        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{T,X,N},M}, y::Dual{T,Y,N}) = $x_body
-        @inline $(f){T,X<:Real,Y<:Real,N,M}(x::Dual{T,X,N}, y::Dual{T,Dual{T,Y,N},M}) = $y_body
-
-        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}) = $xy_body
-        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N}) = $x_body
-        @inline $(f){T,S,X<:Real,Y<:Real,N,M}(x::Dual{S,X,N}, y::Dual{T,Dual{S,Y,N},M}) = $y_body
+        @inline $(f)(x::Dual{T,Dual{S,<:Real,N},M}, y::Dual{T,Dual{S,<:Real,N},M}) where {T,S,N,M} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,<:Real,N},M}, y::Dual{S,<:Real,N}) where {T,S,N,M} = $x_body
+        @inline $(f)(x::Dual{S,<:Real,N}, y::Dual{T,Dual{S,<:Real,N},M}) where {T,S,N,M} = $y_body
     end
     append!(expr.args, nested_defs.args)
     return esc(expr)
diff --git a/src/gradient.jl b/src/gradient.jl
index f9cfc325..5b2ad6da 100644
--- a/src/gradient.jl
+++ b/src/gradient.jl
@@ -2,7 +2,7 @@
 # API methods #
 ###############
 
-@compat const AllowedGradientConfig{F,M} = Union{GradientConfig{Tag{F,M}}, GradientConfig{Tag{Void,M}}}
+const AllowedGradientConfig{F,M} = Union{GradientConfig{Tag{F,M}}, GradientConfig{Tag{Void,M}}}
 
 gradient(f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
 gradient!(out, f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
diff --git a/src/hessian.jl b/src/hessian.jl
index 3b3b6410..21b8e501 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -2,7 +2,7 @@
 # API methods #
 ###############
 
-@compat const AllowedHessianConfig{F,M} = Union{HessianConfig{Tag{F,M}}, HessianConfig{Tag{Void,M}}}
+const AllowedHessianConfig{F,M} = Union{HessianConfig{Tag{F,M}}, HessianConfig{Tag{Void,M}}}
 
 hessian(f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
 hessian!(out, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 2df9bdc2..ceccb492 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -2,7 +2,7 @@
 # API methods #
 ###############
 
-@compat const AllowedJacobianConfig{F,M} = Union{JacobianConfig{Tag{F,M}}, JacobianConfig{Tag{Void,M}}}
+const AllowedJacobianConfig{F,M} = Union{JacobianConfig{Tag{F,M}}, JacobianConfig{Tag{Void,M}}}
 
 jacobian(f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
 jacobian(f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
diff --git a/src/partials.jl b/src/partials.jl
index 8798ceeb..22de9c8b 100644
--- a/src/partials.jl
+++ b/src/partials.jl
@@ -26,7 +26,7 @@ Base.start(partials::Partials) = start(partials.values)
 Base.next(partials::Partials, i) = next(partials.values, i)
 Base.done(partials::Partials, i) = done(partials.values, i)
 
-@compat Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
+Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
 
 #####################
 # Generic Functions #

From 4662ccdedef641305c7cbe5f94f30c654573178f Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Fri, 7 Apr 2017 18:09:39 -0400
Subject: [PATCH 12/26] get ternary dual operation definition macro working
 (note that it is currently poorly tested)

---
 src/dual.jl | 190 +++++++++++++++++++++++-----------------------------
 1 file changed, 82 insertions(+), 108 deletions(-)

diff --git a/src/dual.jl b/src/dual.jl
index 57e0602c..14bfa9d6 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -11,17 +11,17 @@ end
 # Constructors #
 ################
 
-@inline (::Type{Dual{T}}){T,N,V}(value::V, partials::Partials{N,V}) = Dual{T,V,N}(value, partials)
+@inline (::Type{Dual{T}})(value::V, partials::Partials{N,V}) where {T,N,V} = Dual{T,V,N}(value, partials)
 
-@inline function (::Type{Dual{T}}){T,N,A,B}(value::A, partials::Partials{N,B})
+@inline function (::Type{Dual{T}})(value::A, partials::Partials{N,B}) where {T,N,A,B}
     C = promote_type(A, B)
     return Dual{T}(convert(C, value), convert(Partials{N,C}, partials))
 end
 
-@inline (::Type{Dual{T}}){T}(value::Real, partials::Tuple) = Dual{T}(value, Partials(partials))
-@inline (::Type{Dual{T}}){T}(value::Real, partials::Tuple{}) = Dual{T}(value, Partials{0,typeof(value)}(partials))
-@inline (::Type{Dual{T}}){T}(value::Real, partials::Real...) = Dual{T}(value, partials)
-@inline (::Type{Dual{T}}){T,V<:Real,N,i}(value::V, ::Type{Val{N}}, ::Type{Val{i}}) = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
+@inline (::Type{Dual{T}})(value::Real, partials::Tuple) where {T} = Dual{T}(value, Partials(partials))
+@inline (::Type{Dual{T}})(value::Real, partials::Tuple{}) where {T} = Dual{T}(value, Partials{0,typeof(value)}(partials))
+@inline (::Type{Dual{T}})(value::Real, partials::Real...) where {T} = Dual{T}(value, partials)
+@inline (::Type{Dual{T}})(value::V, ::Type{Val{N}}, ::Type{Val{i}}) where {T,V<:Real,N,i} = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
 
 @inline Dual(args...) = Dual{Void}(args...)
 
@@ -46,7 +46,7 @@ function TagMismatchError(x, y, z)
     end
 end
 
-function Base.showerror{X,Y}(io::IO, e::TagMismatchError{X,Y})
+function Base.showerror(io::IO, e::TagMismatchError{X,Y}) where {X,Y}
     print(io, "potential perturbation confusion detected when computing binary operation ",
               "on $(e.x) and $(e.y) (tag mismatch: $X != $Y). ForwardDiff cannot safely ",
               "perform differentiation in this context; see the following issue for ",
@@ -88,95 +88,69 @@ end
 #####################################
 
 macro define_binary_dual_op(f, xy_body, x_body, y_body)
-    expr = quote
+    defs = quote
         @inline $(f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
-        @inline $(f){T}(x::Dual{T}, y::Dual{T}) = $xy_body
+        @inline $(f)(x::Dual{T}, y::Dual{T}) where {T} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}) where {T,S,X<:Real,Y<:Real,N,M} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N})           where {T,S,X<:Real,Y<:Real,N,M} = $x_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}) where {T,S,X<:Real,Y<:Real,N,M} = $y_body
     end
     for R in REAL_TYPES
-        real_defs = quote
+        expr = quote
             @inline $(f)(x::Dual{T}, y::$R) where {T} = $x_body
             @inline $(f)(x::$R, y::Dual{T}) where {T} = $y_body
         end
-        append!(expr.args, real_defs.args)
+        append!(defs.args, expr.args)
     end
-    nested_defs = quote
-        @inline $(f)(x::Dual{T,Dual{S,<:Real,N},M}, y::Dual{T,Dual{S,<:Real,N},M}) where {T,S,N,M} = $xy_body
-        @inline $(f)(x::Dual{T,Dual{S,<:Real,N},M}, y::Dual{S,<:Real,N}) where {T,S,N,M} = $x_body
-        @inline $(f)(x::Dual{S,<:Real,N}, y::Dual{T,Dual{S,<:Real,N},M}) where {T,S,N,M} = $y_body
-    end
-    append!(expr.args, nested_defs.args)
-    return esc(expr)
+    return esc(defs)
 end
 
-# macro define_ternary_dual_op(f, xyz_body, xy_body, xz_body, yz_body, x_body, y_body, z_body)
-#     return esc(quote
-#         @inline $(f){T}(x::Dual, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
-#         @inline $(f){T}(x::Dual{T}, y::Dual{T}, z::Dual{T}) = $xyz_body
-#
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $xyz_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N})         = $xy_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N},         z::Dual{T,Dual{T,Z,N}}) = $xz_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $yz_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N},         z::Dual{T,Z,N})         = $x_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N})         = $y_body
-#         @inline $(f){T,X,Y,Z,N}(x::Dual{T,X,N},         y::Dual{T,Y,N},         z::Dual{T,Dual{T,Z,N}}) = $z_body
-#
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $xyz_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N})         = $xy_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N},         z::Dual{T,Dual{S,Z,N}}) = $xz_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $yz_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N},         z::Dual{S,Z,N})         = $x_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N})         = $y_body
-#         @inline $(f){T,S,X,Y,Z,N}(x::Dual{S,X,N},         y::Dual{S,Y,N},         z::Dual{T,Dual{S,Z,N}}) = $z_body
-#
-#         for RT in REAL_TYPES
-#             R = Expr(:$, :RT)
-#             @eval begin
-#                 println("here 1")
-#                 @inline $(f)(x::Dual, y::Dual, z::$R) = throw(TagMismatchError(x, y, z))
-#                 @inline $(f)(x::Dual, y::$R, z::Dual) = throw(TagMismatchError(x, y, z))
-#                 @inline $(f)(x::$R, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
-#
-#                 @inline $(f){T}(x::Dual{T}, y::Dual{T}, z::$R) = $xy_body
-#                 @inline $(f){T}(x::Dual{T}, y::$R, z::Dual{T}) = $xz_body
-#                 @inline $(f){T}(x::$R, y::Dual{T}, z::Dual{T}) = $yz_body
-#
-#                 @inline $(f){T,X,Y,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Dual{T,Y,N}}, z::$R)                  = $xy_body
-#                 @inline $(f){T,X,Z,N}(x::Dual{T,Dual{T,X,N}}, y::$R,                  z::Dual{T,Dual{T,Z,N}}) = $xz_body
-#                 @inline $(f){T,Y,Z,N}(x::$R,                  y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Dual{T,Z,N}}) = $yz_body
-#                 @inline $(f){T,X,Y,N}(x::Dual{T,Dual{T,X,N}}, y::Dual{T,Y,N}, z::$R)          = $x_body
-#                 @inline $(f){T,X,Z,N}(x::Dual{T,Dual{T,X,N}}, y::$R,          z::Dual{T,Z,N}) = $x_body
-#                 @inline $(f){T,X,Y,N}(x::Dual{T,X,N}, y::Dual{T,Dual{T,Y,N}}, z::$R)          = $y_body
-#                 @inline $(f){T,Y,Z,N}(x::$R,          y::Dual{T,Dual{T,Y,N}}, z::Dual{T,Z,N}) = $y_body
-#                 @inline $(f){T,X,Z,N}(x::Dual{T,X,N}, y::$R, z::Dual{T,Dual{T,Z,N}}) = $z_body
-#                 @inline $(f){T,Y,Z,N}(x::$R, y::Dual{T,Y,N}, z::Dual{T,Dual{T,Z,N}}) = $z_body
-#                 println("here 2")
-#                 @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{T,Dual{S,Y,N}}, z::$R)                  = $xy_body
-#                 @inline $(f){T,S,X,Z,N}(x::Dual{T,Dual{S,X,N}}, y::$R,                  z::Dual{T,Dual{S,Z,N}}) = $xz_body
-#                 @inline $(f){T,S,Y,Z,N}(x::$R,                  y::Dual{T,Dual{S,Y,N}}, z::Dual{T,Dual{S,Z,N}}) = $yz_body
-#                 @inline $(f){T,S,X,Y,N}(x::Dual{T,Dual{S,X,N}}, y::Dual{S,Y,N}, z::$R)          = $x_body
-#                 @inline $(f){T,S,X,Z,N}(x::Dual{T,Dual{S,X,N}}, y::$R,          z::Dual{S,Z,N}) = $x_body
-#                 @inline $(f){T,S,X,Y,N}(x::Dual{S,X,N}, y::Dual{T,Dual{S,Y,N}}, z::$R)          = $y_body
-#                 @inline $(f){T,S,Y,Z,N}(x::$R,          y::Dual{T,Dual{S,Y,N}}, z::Dual{S,Z,N}) = $y_body
-#                 @inline $(f){T,S,X,Z,N}(x::Dual{S,X,N}, y::$R, z::Dual{T,Dual{S,Z,N}}) = $z_body
-#                 @inline $(f){T,S,Y,Z,N}(x::$R, y::Dual{S,Y,N}, z::Dual{T,Dual{S,Z,N}}) = $z_body
-#             end
-#             for QT in REAL_TYPES
-#                 Q = Expr(:$, :QT)
-#                 @eval begin
-#                     println("here 3")
-#                     @inline $(f)(x::Dual, y::$R, z::$Q) = throw(TagMismatchError(x, y, z))
-#                     @inline $(f)(x::$R, y::Dual, z::$Q) = throw(TagMismatchError(x, y, z))
-#                     @inline $(f)(x::$R, y::$Q, z::Dual) = throw(TagMismatchError(x, y, z))
-#                     println("here 4")
-#                     @inline $(f){T}(x::Dual{T}, y::$R, z::$Q) = $x_body
-#                     @inline $(f){T}(x::$R, y::Dual{T}, z::$Q) = $y_body
-#                     @inline $(f){T}(x::$R, y::$Q, z::Dual{T}) = $z_body
-#                 end
-#             end
-#         end
-#     end)
-# end
+macro define_ternary_dual_op(f, xyz_body, xy_body, xz_body, yz_body, x_body, y_body, z_body)
+    defs = quote
+        @inline $(f)(x::Dual, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+        @inline $(f)(x::Dual{T}, y::Dual{T}, z::Dual{T}) where {T} = $xyz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xyz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xz_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $yz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $x_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $y_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $z_body
+    end
+    for R in REAL_TYPES
+        expr = quote
+            @inline $(f)(x::Dual, y::Dual, z::$R) = throw(TagMismatchError(x, y, z))
+            @inline $(f)(x::Dual, y::$R, z::Dual) = throw(TagMismatchError(x, y, z))
+            @inline $(f)(x::$R, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+
+            @inline $(f)(x::Dual{T}, y::Dual{T}, z::$R) where {T} = $xy_body
+            @inline $(f)(x::Dual{T}, y::$R, z::Dual{T}) where {T} = $xz_body
+            @inline $(f)(x::$R, y::Dual{T}, z::Dual{T}) where {T} = $yz_body
+
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $xy_body
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::$R,                    z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Z<:Real,N,M} = $xz_body
+            @inline $(f)(x::$R,                    y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,Y<:Real,Z<:Real,N,M} = $yz_body
+
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $x_body
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::$R,                    z::Dual{S,Z,N})           where {T,S,X<:Real,Z<:Real,N,M} = $x_body
+            @inline $(f)(x::$R,                    y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,Y<:Real,Z<:Real,N,M} = $y_body
+            @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $y_body
+            @inline $(f)(x::Dual{S,X,N},           y::$R,                    z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Z<:Real,N,M} = $z_body
+            @inline $(f)(x::$R,                    y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,Y<:Real,Z<:Real,N,M} = $z_body
+        end
+        append!(defs.args, expr.args)
+        for Q in REAL_TYPES
+            Q === R && continue
+            expr = quote
+                @inline $(f)(x::Dual{T}, y::$R, z::$Q) where {T} = $x_body
+                @inline $(f)(x::$R, y::Dual{T}, z::$Q) where {T} = $y_body
+                @inline $(f)(x::$R, y::$Q, z::Dual{T}) where {T} = $z_body
+            end
+            append!(defs.args, expr.args)
+        end
+    end
+    return esc(defs)
+end
 
 #####################
 # Generic Functions #
@@ -185,26 +159,26 @@ end
 Base.copy(d::Dual) = d
 
 Base.eps(d::Dual) = eps(value(d))
-Base.eps{D<:Dual}(::Type{D}) = eps(valtype(D))
+Base.eps(::Type{D}) where {D<:Dual} = eps(valtype(D))
 
-Base.rtoldefault{D<:Dual}(::Type{D}) = Base.rtoldefault(valtype(D))
+Base.rtoldefault(::Type{D}) where {D<:Dual} = Base.rtoldefault(valtype(D))
 
-Base.floor{R<:Real}(::Type{R}, d::Dual) = floor(R, value(d))
+Base.floor(::Type{R}, d::Dual) where {R<:Real} = floor(R, value(d))
 Base.floor(d::Dual) = floor(value(d))
 
-Base.ceil{R<:Real}(::Type{R}, d::Dual) = ceil(R, value(d))
+Base.ceil(::Type{R}, d::Dual) where {R<:Real} = ceil(R, value(d))
 Base.ceil(d::Dual) = ceil(value(d))
 
-Base.trunc{R<:Real}(::Type{R}, d::Dual) = trunc(R, value(d))
+Base.trunc(::Type{R}, d::Dual) where {R<:Real} = trunc(R, value(d))
 Base.trunc(d::Dual) = trunc(value(d))
 
-Base.round{R<:Real}(::Type{R}, d::Dual) = round(R, value(d))
+Base.round(::Type{R}, d::Dual) where {R<:Real} = round(R, value(d))
 Base.round(d::Dual) = round(value(d))
 
 Base.hash(d::Dual) = hash(value(d))
 Base.hash(d::Dual, hsh::UInt64) = hash(value(d), hsh)
 
-function Base.read{T,V,N}(io::IO, ::Type{Dual{T,V,N}})
+function Base.read(io::IO, ::Type{Dual{T,V,N}}) where {T,V,N}
     value = read(io, V)
     partials = read(io, Partials{N,V})
     return Dual{T,V,N}(value, partials)
@@ -216,15 +190,15 @@ function Base.write(io::IO, d::Dual)
 end
 
 @inline Base.zero(d::Dual) = zero(typeof(d))
-@inline Base.zero{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(zero(V), zero(Partials{N,V}))
+@inline Base.zero(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(zero(V), zero(Partials{N,V}))
 
 @inline Base.one(d::Dual) = one(typeof(d))
-@inline Base.one{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(one(V), zero(Partials{N,V}))
+@inline Base.one(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(one(V), zero(Partials{N,V}))
 
 @inline Base.rand(d::Dual) = rand(typeof(d))
-@inline Base.rand{T,V,N}(::Type{Dual{T,V,N}}) = Dual{T}(rand(V), zero(Partials{N,V}))
+@inline Base.rand(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(rand(V), zero(Partials{N,V}))
 @inline Base.rand(rng::AbstractRNG, d::Dual) = rand(rng, typeof(d))
-@inline Base.rand{T,V,N}(rng::AbstractRNG, ::Type{Dual{T,V,N}}) = Dual{T}(rand(rng, V), zero(Partials{N,V}))
+@inline Base.rand(rng::AbstractRNG, ::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(rand(rng, V), zero(Partials{N,V}))
 
 # Predicates #
 #------------#
@@ -459,16 +433,16 @@ end
     return Dual{T}(h, p)
 end
 
-# @define_ternary_dual_op(
-#     Base.hypot,
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-#     calc_hypot(x, y, z, T),
-# )
+@define_ternary_dual_op(
+    Base.hypot,
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+)
 
 # atan2
 

From 72b3b7d2ed77de1c249665f9868390d4e6cfe261 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Mon, 10 Apr 2017 13:17:57 -0400
Subject: [PATCH 13/26] re-enable SIMD tests

---
 test/runtests.jl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 9be0309c..4e509e7c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,18 +30,18 @@ tic()
 include("HessianTest.jl")
 println("done (took $(toq()) seconds).")
 
-# println("Testing miscellaneous functionality...")
-# tic()
-# include("MiscTest.jl")
-# println("done (took $(toq()) seconds).")
-#
-# if Base.JLOptions().opt_level >= 3 && VERSION >= v"0.5"
-#     println("Testing SIMD vectorization...")
-#     tic()
-#     include("SIMDTest.jl")
-#     println("done (took $(toq()) seconds).")
-# end
-#
+println("Testing miscellaneous functionality...")
+tic()
+include("MiscTest.jl")
+println("done (took $(toq()) seconds).")
+
+if Base.JLOptions().opt_level >= 3
+    println("Testing SIMD vectorization...")
+    tic()
+    include("SIMDTest.jl")
+    println("done (took $(toq()) seconds).")
+end
+
 # println("Testing deprecations...")
 # tic()
 # include("DeprecatedTest.jl")

From d01378aa11a87e70fc29ce209e14240086fef480 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Mon, 10 Apr 2017 13:19:52 -0400
Subject: [PATCH 14/26] remove non-v0.6 version compat code and replace
 test_approx_duals with a simpler predicate (proposed by @KristofferC)

---
 test/DualTest.jl | 57 ++++++++++++++++--------------------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/test/DualTest.jl b/test/DualTest.jl
index edefa636..6f3482c9 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -15,27 +15,8 @@ samerng() = MersenneTwister(1)
 # exponent by one
 intrand(V) = V == Int ? rand(2:10) : rand(V)
 
-# fix testing issue with Base.hypot(::Int...) undefined in 0.4
-if v"0.4" <= VERSION < v"0.5"
-    Base.hypot(x::Int, y::Int) = Base.hypot(Float64(x), Float64(y))
-    Base.hypot(x, y, z) = hypot(hypot(x, y), z)
-end
-
-if VERSION < v"0.5"
-    # isapprox on v0.4 doesn't properly set the tolerance
-    # for mixed-precision inputs, while @test_approx_eq does
-    # Use @eval to avoid expanding @test_approx_eq on 0.6 where it's deprecated
-    @eval test_approx_duals(a::Real, b::Real) = @test_approx_eq a b
-else
-    test_approx_duals(a::Real, b::Real) = @test isapprox(a, b)
-end
-
-function test_approx_duals{T,A,B,N}(a::Dual{T,A,N}, b::Dual{T,B,N})
-    test_approx_duals(value(a), value(b))
-    for i in 1:N
-        test_approx_duals(partials(a)[i], partials(b)[i])
-    end
-end
+dual_isapprox(a, b) = isapprox(a, b)
+dual_isapprox(a::Dual, b::Dual) = isapprox(value(a), value(b)) && isapprox(partials(a), partials(b))
 
 for N in (0,3), M in (0,4), V in (Int, Float32)
     println("  ...testing Dual{Void,$V,$N} and Dual{Void,Dual{Void,$V,$M},$N}")
@@ -368,21 +349,21 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
         @test Dual{1}(FDNUM / PRIMAL, FDNUM2 / PRIMAL) === Dual{1}(FDNUM, FDNUM2) / PRIMAL
     end
 
-    test_approx_duals(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
-    test_approx_duals(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
-    test_approx_duals(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
+    @test dual_isapprox(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
+    @test dual_isapprox(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
+    @test dual_isapprox(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
 
-    test_approx_duals(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
-    test_approx_duals(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
-    test_approx_duals(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
+    @test dual_isapprox(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
+    @test dual_isapprox(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
+    @test dual_isapprox(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
 
-    test_approx_duals(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
-    test_approx_duals(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
-    test_approx_duals(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
+    @test dual_isapprox(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
+    @test dual_isapprox(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
+    @test dual_isapprox(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
 
-    test_approx_duals(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
-    test_approx_duals(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
-    test_approx_duals(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
+    @test dual_isapprox(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
+    @test dual_isapprox(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
+    @test dual_isapprox(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
 
     @test partials(NaNMath.pow(Dual(-2.0, 1.0), Dual(2.0, 0.0)), 1) == -4.0
 
@@ -436,11 +417,11 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
                     @eval begin
                         fdnum = $(is_domain_err_func ? FDNUM + 1 : FDNUM)
                         $(v) = ForwardDiff.value(fdnum)
-                        $(test_approx_duals)($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
+                        @test duals_isapprox($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
                         if $(!(is_unsupported_nested_func))
                             nested_fdnum = $(is_domain_err_func ? NESTED_FDNUM + 1 : NESTED_FDNUM)
                             $(v) = ForwardDiff.value(nested_fdnum)
-                            $(test_approx_duals)($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
+                            @test duals_isapprox($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
                         end
                     end
                 end
@@ -454,9 +435,9 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
     # Special Cases #
     #---------------#
 
-    test_approx_duals(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
-    test_approx_duals(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
-    map(test_approx_duals, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM)))
+    @test dual_isapprox(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
+    @test dual_isapprox(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
+    @test all(map(dual_isapprox, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM))))
 
     if V === Float32
         @test typeof(sqrt(FDNUM)) === typeof(FDNUM)

From 4fa1add3e85ce416d4d63bce2a6427f7c503d55d Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Mon, 10 Apr 2017 15:56:10 -0400
Subject: [PATCH 15/26] implement in-place tuple derivative function

---
 src/config.jl     | 36 +++++++++++++++++++---------
 src/derivative.jl | 60 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/src/config.jl b/src/config.jl
index bf8d8a48..9360358a 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -2,7 +2,21 @@
 # Tag #
 #######
 
-struct Tag{F,M} end
+struct Tag{F,H} end
+
+# Here, we could've just as easily used `hash`; however, this
+# is unsafe/undefined behavior if `hash(::Type{V})` is overloaded
+# in a module loaded after ForwardDiff. Thus, we instead use
+# `hash(Symbol(V))`, which is somewhat safer since it's far less
+# likely that somebody would overwrite the Base definition for
+# `Symbol(::DataType)` or `hash(::Symbol)`.
+@generated function Tag(::Type{F}, ::Type{V}) where {F,V}
+    H = hash(Symbol(V))
+    return quote
+        $(Expr(:meta, :inline))
+        Tag{F,$H}()
+    end
+end
 
 #########
 # Chunk #
@@ -37,9 +51,9 @@ end
 
 abstract type AbstractConfig{T<:Tag,N} end
 
-struct ConfigMismatchError{F,G,M} <: Exception
+struct ConfigMismatchError{F,G,H} <: Exception
     f::F
-    cfg::AbstractConfig{Tag{G,M}}
+    cfg::AbstractConfig{Tag{G,H}}
 end
 
 function Base.showerror{F,G}(io::IO, e::ConfigMismatchError{F,G})
@@ -67,7 +81,7 @@ end
 function GradientConfig{V,N,F,T}(::F,
                                  x::AbstractArray{V},
                                  ::Chunk{N} = Chunk(x),
-                                 ::T = Tag{F,order(V)}())
+                                 ::T = Tag(F, V))
     seeds = construct_seeds(Partials{N,V})
     duals = similar(x, Dual{T,V,N})
     return GradientConfig{T,V,N,typeof(duals)}(seeds, duals)
@@ -85,7 +99,7 @@ end
 function JacobianConfig{V,N,F,T}(::F,
                                  x::AbstractArray{V},
                                  ::Chunk{N} = Chunk(x),
-                                 ::T = Tag{F,order(V)}())
+                                 ::T = Tag(F, V))
     seeds = construct_seeds(Partials{N,V})
     duals = similar(x, Dual{T,V,N})
     return JacobianConfig{T,V,N,typeof(duals)}(seeds, duals)
@@ -95,7 +109,7 @@ function JacobianConfig{Y,X,N,F,T}(::F,
                                    y::AbstractArray{Y},
                                    x::AbstractArray{X},
                                    ::Chunk{N} = Chunk(x),
-                                   ::T = Tag{F,order(X)}())
+                                   ::T = Tag(F, X))
     seeds = construct_seeds(Partials{N,X})
     yduals = similar(y, Dual{T,Y,N})
     xduals = similar(x, Dual{T,X,N})
@@ -107,15 +121,15 @@ end
 # HessianConfig #
 #################
 
-struct HessianConfig{T,V,N,D,MJ,DJ} <: AbstractConfig{T,N}
-    jacobian_config::JacobianConfig{Tag{Void,MJ},V,N,DJ}
-    gradient_config::GradientConfig{T,Dual{Tag{Void,MJ},V,N},D}
+struct HessianConfig{T,V,N,D,H,DJ} <: AbstractConfig{T,N}
+    jacobian_config::JacobianConfig{Tag{Void,H},V,N,DJ}
+    gradient_config::GradientConfig{T,Dual{Tag{Void,H},V,N},D}
 end
 
 function HessianConfig{F,V}(f::F,
                             x::AbstractArray{V},
                             chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag{F,order(Dual{Void,V,0})}())
+                            tag::Tag = Tag(F, Dual{Void,V,0}))
     jacobian_config = JacobianConfig(nothing, x, chunk)
     gradient_config = GradientConfig(f, jacobian_config.duals, chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
@@ -125,7 +139,7 @@ function HessianConfig{F,V}(result::DiffResult,
                             f::F,
                             x::AbstractArray{V},
                             chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag{F,order(Dual{Void,V,0})}())
+                            tag::Tag = Tag(F, Dual{Void,V,0}))
     jacobian_config = JacobianConfig(nothing, DiffBase.gradient(result), x, chunk)
     gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
diff --git a/src/derivative.jl b/src/derivative.jl
index 1cb83797..8e7987db 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -2,29 +2,32 @@
 # API methods #
 ###############
 
-@generated function derivative{F,R<:Real}(f::F, x::R)
-    T = Tag{F,order(R)}
-    return quote
-        $(Expr(:meta, :inline))
-        return extract_derivative(f(Dual{$T}(x, one(x))))
-    end
+@inline function derivative(f::F, x::R) where {F,R<:Real}
+    T = Tag(F, R)
+    return extract_derivative(f(Dual{T}(x, one(x))))
 end
 
-@generated function derivative{F,N}(f::F, x::NTuple{N,Real})
-    T = Tag{F,maximum(order(R) for R in x.parameters)}
-    args = [:(Dual{$T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
+@generated function derivative(f::F, x::NTuple{N,Real}) where {F,N}
+    args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
     return quote
         $(Expr(:meta, :inline))
+        T = Tag(F, typeof(x))
         extract_derivative(f($(args...)))
     end
 end
 
-@generated function derivative!{F,R<:Real}(out, f::F, x::R)
-    T = Tag{F,order(R)}
+@inline function derivative!(out, f::F, x::R) where {F,R<:Real}
+    T = Tag(F, typeof(x))
+    extract_derivative!(out, f(Dual{T}(x, one(x))))
+    return out
+end
+
+@generated function derivative!(out::NTuple{N,Any}, f::F, x::NTuple{N,Real}) where {F,N}
+    args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
     return quote
         $(Expr(:meta, :inline))
-        extract_derivative!(out, f(Dual{$T}(x, one(x))))
-        return out
+        T = Tag(F, typeof(x))
+        extract_derivative!(out, f($(args...)))
     end
 end
 
@@ -32,7 +35,10 @@ end
 # result extraction #
 #####################
 
-@generated function extract_derivative{T,V,N}(y::Dual{T,V,N})
+# non-mutating #
+#--------------#
+
+@generated function extract_derivative(y::Dual{T,V,N}) where {T,V,N}
     return quote
         $(Expr(:meta, :inline))
         $(Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...))
@@ -43,10 +49,36 @@ end
 @inline extract_derivative(y::Real) = zero(y)
 @inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
 
+# mutating #
+#----------#
+
+@generated function extract_derivative!(out::NTuple{N,Any}, y::Dual{T,V,N}) where {T,V,N}
+    return quote
+        $(Expr(:meta, :inline))
+        $(Expr(:block, [:(out[$i][] = partials(y, $i)) for i in 1:N]...))
+        return out
+    end
+end
+
+@generated function extract_derivative!(out::NTuple{N,Any}, y::AbstractArray) where {N}
+    return quote
+        $(Expr(:meta, :inline))
+        $(Expr(:block, [:(extract_derivative!(out[$i], y, $i)) for i in 1:N]...))
+        return out
+    end
+end
+
 extract_derivative!(out::AbstractArray, y::AbstractArray) = map!(extract_derivative, out, y)
+extract_derivative!(out::AbstractArray, y::AbstractArray, p) = map!(x -> partials(x, p), out, y)
 
 function extract_derivative!(out::DiffResult, y)
     DiffBase.value!(value, out, y)
     DiffBase.derivative!(extract_derivative, out, y)
     return out
 end
+
+function extract_derivative!(out::DiffResult, y::AbstractArray, p)
+    DiffBase.value!(value, out, y)
+    DiffBase.derivative!(x -> partials(x, p), out, y)
+    return out
+end

From a53de0f669200e36d93275c1a09884ca453f71bc Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Mon, 10 Apr 2017 16:20:49 -0400
Subject: [PATCH 16/26] fix typo

---
 test/DualTest.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/DualTest.jl b/test/DualTest.jl
index 6f3482c9..403155b2 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -417,11 +417,11 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
                     @eval begin
                         fdnum = $(is_domain_err_func ? FDNUM + 1 : FDNUM)
                         $(v) = ForwardDiff.value(fdnum)
-                        @test duals_isapprox($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
+                        @test dual_isapprox($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
                         if $(!(is_unsupported_nested_func))
                             nested_fdnum = $(is_domain_err_func ? NESTED_FDNUM + 1 : NESTED_FDNUM)
                             $(v) = ForwardDiff.value(nested_fdnum)
-                            @test duals_isapprox($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
+                            @test dual_isapprox($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
                         end
                     end
                 end

From a93cc69503e2e4b09244c14cb4fe6c5b9fbddac6 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Tue, 11 Apr 2017 13:38:51 -0400
Subject: [PATCH 17/26] get FMA working

---
 src/dual.jl      | 48 ++++++++++++++++++++++++++++++------------------
 test/DualTest.jl | 24 +++++++++---------------
 2 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/src/dual.jl b/src/dual.jl
index 14bfa9d6..23e7789a 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -148,6 +148,12 @@ macro define_ternary_dual_op(f, xyz_body, xy_body, xz_body, yz_body, x_body, y_b
             end
             append!(defs.args, expr.args)
         end
+        expr = quote
+            @inline $(f)(x::Dual{T}, y::$R, z::$R) where {T} = $x_body
+            @inline $(f)(x::$R, y::Dual{T}, z::$R) where {T} = $y_body
+            @inline $(f)(x::$R, y::$R, z::Dual{T}) where {T} = $z_body
+        end
+        append!(defs.args, expr.args)
     end
     return esc(defs)
 end
@@ -391,23 +397,21 @@ end
 # Special Cases #
 #################
 
-# Manually Optimized Functions #
-#------------------------------#
+# exp
 
 @inline function Base.exp{T}(d::Dual{T})
     expv = exp(value(d))
     return Dual{T}(expv, expv * partials(d))
 end
 
+# sqrt
+
 @inline function Base.sqrt{T}(d::Dual{T})
     sqrtv = sqrt(value(d))
     deriv = inv(sqrtv + sqrtv)
     return Dual{T}(sqrtv, deriv * partials(d))
 end
 
-# Other Functions #
-#-----------------#
-
 # hypot
 
 @inline function calc_hypot{T}(x, y, ::Type{T})
@@ -461,22 +465,28 @@ end
     calc_atan2(x, y, T)
 )
 
-@generated function Base.fma{N}(x::Dual{N}, y::Dual{N}, z::Dual{N})
+# fma
+
+@generated function calc_fma_xyz(x::Dual{T,<:Real,N},
+                                 y::Dual{T,<:Real,N},
+                                 z::Dual{T,<:Real,N}) where {T,N}
     ex = Expr(:tuple, [:(fma(value(x), partials(y)[$i], fma(value(y), partials(x)[$i], partials(z)[$i]))) for i in 1:N]...)
     return quote
         $(Expr(:meta, :inline))
         v = fma(value(x), value(y), value(z))
-        Dual(v, $ex)
+        return Dual{T}(v, $ex)
     end
 end
 
-@inline function Base.fma(x::Dual, y::Dual, z::Real)
+@inline function calc_fma_xy(x::Dual{T}, y::Dual{T}, z::Real) where T
     vx, vy = value(x), value(y)
     result = fma(vx, vy, z)
-    return Dual(result, _mul_partials(partials(x), partials(y), vy, vx))
+    return Dual{T}(result, _mul_partials(partials(x), partials(y), vy, vx))
 end
 
-@generated function Base.fma{N}(x::Dual{N}, y::Real, z::Dual{N})
+@generated function calc_fma_xz(x::Dual{T,<:Real,N},
+                                y::Real,
+                                z::Dual{T,<:Real,N}) where {T,N}
     ex = Expr(:tuple, [:(fma(partials(x)[$i], y,  partials(z)[$i])) for i in 1:N]...)
     return quote
         $(Expr(:meta, :inline))
@@ -485,14 +495,16 @@ end
     end
 end
 
-@inline Base.fma(x::Real, y::Dual, z::Dual) = fma(y, x, z)
-
-@inline function Base.fma(x::Dual, y::Real, z::Real)
-    vx = value(x)
-    return Dual(fma(vx, y, value(z)), partials(x) * y)
-end
-
-@inline Base.fma(x::Real, y::Dual, z::Real) = fma(y, x, z)
+@define_ternary_dual_op(
+    Base.fma,
+    calc_fma_xyz(x, y, z),                         # xyz_body
+    calc_fma_xy(x, y, z),                          # xy_body
+    calc_fma_xz(x, y, z),                          # xz_body
+    Base.fma(y, x, z),                             # yz_body
+    Dual{T}(fma(value(x), y, z), partials(x) * y), # x_body
+    Base.fma(y, x, z),                             # y_body
+    Dual{T}(fma(x, y, value(z)), partials(z))      # z_body
+)
 
 # sincos
 
diff --git a/test/DualTest.jl b/test/DualTest.jl
index 403155b2..c8f91765 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -367,21 +367,6 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
 
     @test partials(NaNMath.pow(Dual(-2.0, 1.0), Dual(2.0, 0.0)), 1) == -4.0
 
-    test_approx_diffnums(fma(FDNUM, FDNUM2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                             PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS +
-                                             PARTIALS3))
-    test_approx_diffnums(fma(FDNUM, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS))
-    test_approx_diffnums(fma(PRIMAL, FDNUM2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL*PARTIALS2 + PARTIALS3))
-    test_approx_diffnums(fma(PRIMAL, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                               PRIMAL*PARTIALS2))
-    test_approx_diffnums(fma(FDNUM, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL2*PARTIALS + PARTIALS3))
-    test_approx_diffnums(fma(FDNUM, PRIMAL2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                               PRIMAL2*PARTIALS))
-    test_approx_diffnums(fma(PRIMAL, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PARTIALS3))
-
     # Unary Functions #
     #-----------------#
 
@@ -437,12 +422,21 @@ for N in (0,3), M in (0,4), V in (Int, Float32)
 
     @test dual_isapprox(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
     @test dual_isapprox(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
+
     @test all(map(dual_isapprox, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM))))
 
     if V === Float32
         @test typeof(sqrt(FDNUM)) === typeof(FDNUM)
         @test typeof(sqrt(NESTED_FDNUM)) === typeof(NESTED_FDNUM)
     end
+
+    @test dual_isapprox(fma(FDNUM, FDNUM2, FDNUM3),   Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS + PARTIALS3))
+    @test dual_isapprox(fma(FDNUM, FDNUM2, PRIMAL3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS))
+    @test dual_isapprox(fma(PRIMAL, FDNUM2, FDNUM3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PARTIALS3))
+    @test dual_isapprox(fma(PRIMAL, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2))
+    @test dual_isapprox(fma(FDNUM, PRIMAL2, FDNUM3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL2*PARTIALS + PARTIALS3))
+    @test dual_isapprox(fma(FDNUM, PRIMAL2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL2*PARTIALS))
+    @test dual_isapprox(fma(PRIMAL, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PARTIALS3))
 end
 
 end # module

From 56a839e8f4d101d4c996bd279a1f0857b2128fcf Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Tue, 11 Apr 2017 14:11:46 -0400
Subject: [PATCH 18/26] use where syntax instead of to-be-deprecated prefix
 typevar syntax

---
 src/config.jl    | 48 ++++++++++++++--------------
 src/dual.jl      | 47 ++++++++++++++-------------
 src/gradient.jl  | 18 +++++------
 src/hessian.jl   |  8 ++---
 src/jacobian.jl  | 26 +++++++--------
 src/partials.jl  | 82 ++++++++++++++++++++++++------------------------
 src/utils.jl     | 22 ++++++-------
 test/SIMDTest.jl |  2 +-
 8 files changed, 128 insertions(+), 125 deletions(-)

diff --git a/src/config.jl b/src/config.jl
index 9360358a..5390d6bf 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -56,7 +56,7 @@ struct ConfigMismatchError{F,G,H} <: Exception
     cfg::AbstractConfig{Tag{G,H}}
 end
 
-function Base.showerror{F,G}(io::IO, e::ConfigMismatchError{F,G})
+function Base.showerror(io::IO, e::ConfigMismatchError{F,G}) where {F,G}
     print(io, "The provided configuration (of type $(typeof(e.cfg))) was constructed for a",
               " function other than the current target function. ForwardDiff cannot safely",
               " perform differentiation in this context; see the following issue for details:",
@@ -67,7 +67,7 @@ end
 
 Base.copy(cfg::AbstractConfig) = deepcopy(cfg)
 
-@inline chunksize{T,N}(::AbstractConfig{T,N}) = N
+@inline chunksize(::AbstractConfig{T,N}) where {T,N} = N
 
 ##################
 # GradientConfig #
@@ -78,10 +78,10 @@ struct GradientConfig{T,V,N,D} <: AbstractConfig{T,N}
     duals::D
 end
 
-function GradientConfig{V,N,F,T}(::F,
-                                 x::AbstractArray{V},
-                                 ::Chunk{N} = Chunk(x),
-                                 ::T = Tag(F, V))
+function GradientConfig(::F,
+                        x::AbstractArray{V},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, V)) where {F,V,N,T}
     seeds = construct_seeds(Partials{N,V})
     duals = similar(x, Dual{T,V,N})
     return GradientConfig{T,V,N,typeof(duals)}(seeds, duals)
@@ -96,20 +96,20 @@ struct JacobianConfig{T,V,N,D} <: AbstractConfig{T,N}
     duals::D
 end
 
-function JacobianConfig{V,N,F,T}(::F,
-                                 x::AbstractArray{V},
-                                 ::Chunk{N} = Chunk(x),
-                                 ::T = Tag(F, V))
+function JacobianConfig(::F,
+                        x::AbstractArray{V},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, V)) where {F,V,N,T}
     seeds = construct_seeds(Partials{N,V})
     duals = similar(x, Dual{T,V,N})
     return JacobianConfig{T,V,N,typeof(duals)}(seeds, duals)
 end
 
-function JacobianConfig{Y,X,N,F,T}(::F,
-                                   y::AbstractArray{Y},
-                                   x::AbstractArray{X},
-                                   ::Chunk{N} = Chunk(x),
-                                   ::T = Tag(F, X))
+function JacobianConfig(::F,
+                        y::AbstractArray{Y},
+                        x::AbstractArray{X},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, X)) where {F,Y,X,N,T}
     seeds = construct_seeds(Partials{N,X})
     yduals = similar(y, Dual{T,Y,N})
     xduals = similar(x, Dual{T,X,N})
@@ -126,20 +126,20 @@ struct HessianConfig{T,V,N,D,H,DJ} <: AbstractConfig{T,N}
     gradient_config::GradientConfig{T,Dual{Tag{Void,H},V,N},D}
 end
 
-function HessianConfig{F,V}(f::F,
-                            x::AbstractArray{V},
-                            chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag(F, Dual{Void,V,0}))
+function HessianConfig(f::F,
+                       x::AbstractArray{V},
+                       chunk::Chunk = Chunk(x),
+                       tag::Tag = Tag(F, Dual{Void,V,0})) where {F,V}
     jacobian_config = JacobianConfig(nothing, x, chunk)
     gradient_config = GradientConfig(f, jacobian_config.duals, chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
 end
 
-function HessianConfig{F,V}(result::DiffResult,
-                            f::F,
-                            x::AbstractArray{V},
-                            chunk::Chunk = Chunk(x),
-                            tag::Tag = Tag(F, Dual{Void,V,0}))
+function HessianConfig(result::DiffResult,
+                       f::F,
+                       x::AbstractArray{V},
+                       chunk::Chunk = Chunk(x),
+                       tag::Tag = Tag(F, Dual{Void,V,0})) where {F,V}
     jacobian_config = JacobianConfig(nothing, DiffBase.gradient(result), x, chunk)
     gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk, tag)
     return HessianConfig(jacobian_config, gradient_config)
diff --git a/src/dual.jl b/src/dual.jl
index 23e7789a..7e3ec4c9 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -234,26 +234,29 @@ Base.isodd(d::Dual) = isodd(value(d))
 # Promotion/Conversion #
 ########################
 
-Base.promote_rule{T,A<:Real,B<:Real,N}(::Type{Dual{T,A,N}}, ::Type{Dual{T,B,N}}) = Dual{T,promote_type(A, B),N}
+function Base.promote_rule(::Type{Dual{T,A,N}},
+                           ::Type{Dual{T,B,N}}) where {T,A<:Real,B<:Real,N}
+    return Dual{T,promote_type(A, B),N}
+end
 
 for R in (:BigFloat, :Bool, :Irrational, :Real)
     @eval begin
-        Base.promote_rule{R<:$R,T,V<:Real,N}(::Type{R}, ::Type{Dual{T,V,N}}) = Dual{T,promote_type(R, V),N}
-        Base.promote_rule{T,V<:Real,N,R<:$R}(::Type{Dual{T,V,N}}, ::Type{R}) = Dual{T,promote_type(V, R),N}
+        Base.promote_rule(::Type{R}, ::Type{Dual{T,V,N}}) where {R<:$R,T,V<:Real,N} = Dual{T,promote_type(R, V),N}
+        Base.promote_rule(::Type{Dual{T,V,N}}, ::Type{R}) where {T,V<:Real,N,R<:$R} = Dual{T,promote_type(V, R),N}
     end
 end
 
-Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, d::Dual{T}) = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
-Base.convert{T,V<:Real,N}(::Type{Dual{T,V,N}}, x::Real) = Dual{T}(V(x), zero(Partials{N,V}))
-Base.convert{D<:Dual}(::Type{D}, d::D) = d
+Base.convert(::Type{Dual{T,V,N}}, d::Dual{T}) where {T,V<:Real,N} = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
+Base.convert(::Type{Dual{T,V,N}}, x::Real) where {T,V<:Real,N} = Dual{T}(V(x), zero(Partials{N,V}))
+Base.convert(::Type{D}, d::D) where {D<:Dual} = d
 
-Base.promote_array_type{D<:Dual, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = promote_type(D, A)
-Base.promote_array_type{D<:Dual, A<:AbstractFloat, P}(F, ::Type{D}, ::Type{A}, ::Type{P}) = P
-Base.promote_array_type{A<:AbstractFloat, D<:Dual}(F, ::Type{A}, ::Type{D}) = promote_type(D, A)
-Base.promote_array_type{A<:AbstractFloat, D<:Dual, P}(F, ::Type{A}, ::Type{D}, ::Type{P}) = P
+Base.promote_array_type(F, ::Type{D}, ::Type{A}) where {D<:Dual,A<:AbstractFloat} = promote_type(D, A)
+Base.promote_array_type(F, ::Type{<:Dual}, ::Type{<:AbstractFloat}, ::Type{P}) where {P} = P
+Base.promote_array_type(F, ::Type{A}, ::Type{D}) where {D<:Dual,A<:AbstractFloat} = promote_type(D, A)
+Base.promote_array_type(F, ::Type{<:AbstractFloat}, ::Type{<:Dual}, ::Type{P}) where {P} = P
 
-Base.float{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d)
-Base.AbstractFloat{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d)
+Base.float(d::Dual{T,V,N}) where {T,V,N} = Dual{T,promote_type(V, Float16),N}(d)
+Base.AbstractFloat(d::Dual{T,V,N}) where {T,V,N} = Dual{T,promote_type(V, Float16),N}(d)
 
 ########
 # Math #
@@ -276,7 +279,7 @@ Base.AbstractFloat{T,V,N}(d::Dual{T,V,N}) = Dual{T,promote_type(V, Float16),N}(d
     Dual{T}(x - value(y), -partials(y))
 )
 
-@inline Base.:-{T}(d::Dual{T}) = Dual{T}(-value(d), -partials(d))
+@inline Base.:-(d::Dual{T}) where {T} = Dual{T}(-value(d), -partials(d))
 
 # Multiplication #
 #----------------#
@@ -373,7 +376,7 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
         (!(is_special_function) || VERSION < v"0.6.0-dev.2767") && push!(funcs, :(Base.$(fsym)))
         for func in funcs
             @eval begin
-                @inline function $(func){T}(d::Dual{T})
+                @inline function $(func)(d::Dual{T}) where T
                     $(v) = value(d)
                     return Dual{T}($(func)($v), $(deriv) * partials(d))
                 end
@@ -385,7 +388,7 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
     if fsym in NANMATH_FUNCS
         nan_deriv = to_nanmath(deriv)
         @eval begin
-            @inline function NaNMath.$(fsym){T}(d::Dual{T})
+            @inline function NaNMath.$(fsym)(d::Dual{T}) where T
                 v = value(d)
                 return Dual{T}(NaNMath.$(fsym)($v), $(nan_deriv) * partials(d))
             end
@@ -399,14 +402,14 @@ end
 
 # exp
 
-@inline function Base.exp{T}(d::Dual{T})
+@inline function Base.exp(d::Dual{T}) where T
     expv = exp(value(d))
     return Dual{T}(expv, expv * partials(d))
 end
 
 # sqrt
 
-@inline function Base.sqrt{T}(d::Dual{T})
+@inline function Base.sqrt(d::Dual{T}) where T
     sqrtv = sqrt(value(d))
     deriv = inv(sqrtv + sqrtv)
     return Dual{T}(sqrtv, deriv * partials(d))
@@ -414,7 +417,7 @@ end
 
 # hypot
 
-@inline function calc_hypot{T}(x, y, ::Type{T})
+@inline function calc_hypot(x, y, ::Type{T}) where T
     vx = value(x)
     vy = value(y)
     h = hypot(vx, vy)
@@ -428,7 +431,7 @@ end
     calc_hypot(x, y, T)
 )
 
-@inline function calc_hypot{T}(x, y, z, ::Type{T})
+@inline function calc_hypot(x, y, z, ::Type{T}) where T
     vx = value(x)
     vy = value(y)
     vz = value(z)
@@ -450,7 +453,7 @@ end
 
 # atan2
 
-@inline function calc_atan2{T}(y, x, ::Type{T})
+@inline function calc_atan2(y, x, ::Type{T}) where T
     z = y / x
     v = value(z)
     atan2v = atan2(value(y), value(x))
@@ -510,7 +513,7 @@ end
 
 @inline sincos(x) = (sin(x), cos(x))
 
-@inline function sincos{T}(d::Dual{T})
+@inline function sincos(d::Dual{T}) where T
     sd, cd = sincos(value(d))
     return (Dual{T}(sd, cd * partials(d)), Dual{T}(cd, -sd * partials(d)))
 end
@@ -519,7 +522,7 @@ end
 # Pretty Printing #
 ###################
 
-function Base.show{T,V,N}(io::IO, d::Dual{T,V,N})
+function Base.show(io::IO, d::Dual{T,V,N}) where {T,V,N}
     print(io, "Dual{$T}(", value(d))
     for i in 1:N
         print(io, ",", partials(d, i))
diff --git a/src/gradient.jl b/src/gradient.jl
index 5b2ad6da..c611d22a 100644
--- a/src/gradient.jl
+++ b/src/gradient.jl
@@ -2,12 +2,12 @@
 # API methods #
 ###############
 
-const AllowedGradientConfig{F,M} = Union{GradientConfig{Tag{F,M}}, GradientConfig{Tag{Void,M}}}
+const AllowedGradientConfig{F,H} = Union{GradientConfig{Tag{F,H}}, GradientConfig{Tag{Void,H}}}
 
-gradient(f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
-gradient!(out, f, x, cfg::GradientConfig) = throw(ConfigMismatchError(f, cfg))
+gradient(f, x, cfg::GradientConfig) = throw(ConfigHismatchError(f, cfg))
+gradient!(out, f, x, cfg::GradientConfig) = throw(ConfigHismatchError(f, cfg))
 
-function gradient{F,M}(f::F, x, cfg::AllowedGradientConfig{F,M} = GradientConfig(f, x))
+function gradient(f::F, x, cfg::AllowedGradientConfig{F,H} = GradientConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_gradient(f, x, cfg)
     else
@@ -15,7 +15,7 @@ function gradient{F,M}(f::F, x, cfg::AllowedGradientConfig{F,M} = GradientConfig
     end
 end
 
-function gradient!{F,M}(out, f::F, x, cfg::AllowedGradientConfig{F,M} = GradientConfig(f, x))
+function gradient!(out, f::F, x, cfg::AllowedGradientConfig{F,H} = GradientConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_gradient!(out, f, x, cfg)
     else
@@ -61,13 +61,13 @@ end
 # vector mode #
 ###############
 
-function vector_mode_gradient{F}(f::F, x, cfg)
+function vector_mode_gradient(f::F, x, cfg) where {F}
     ydual = vector_mode_dual_eval(f, x, cfg)
     out = similar(x, valtype(ydual))
     return extract_gradient!(out, ydual)
 end
 
-function vector_mode_gradient!{F}(out, f::F, x, cfg)
+function vector_mode_gradient!(out, f::F, x, cfg) where {F}
     ydual = vector_mode_dual_eval(f, x, cfg)
     extract_gradient!(out, ydual)
     return out
@@ -121,10 +121,10 @@ function chunk_mode_gradient_expr(out_definition::Expr)
     end
 end
 
-@eval function chunk_mode_gradient{F,T,V,N}(f::F, x, cfg::GradientConfig{T,V,N})
+@eval function chunk_mode_gradient(f::F, x, cfg::GradientConfig{T,V,N}) where {F,T,V,N}
     $(chunk_mode_gradient_expr(:(out = similar(x, valtype(ydual)))))
 end
 
-@eval function chunk_mode_gradient!{F,T,V,N}(out, f::F, x, cfg::GradientConfig{T,V,N})
+@eval function chunk_mode_gradient!(out, f::F, x, cfg::GradientConfig{T,V,N}) where {F,T,V,N}
     $(chunk_mode_gradient_expr(:()))
 end
diff --git a/src/hessian.jl b/src/hessian.jl
index 21b8e501..7097063c 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -2,24 +2,24 @@
 # API methods #
 ###############
 
-const AllowedHessianConfig{F,M} = Union{HessianConfig{Tag{F,M}}, HessianConfig{Tag{Void,M}}}
+const AllowedHessianConfig{F,H} = Union{HessianConfig{Tag{F,H}}, HessianConfig{Tag{Void,H}}}
 
 hessian(f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
 hessian!(out, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
 hessian!(out::DiffResult, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
 
-function hessian{F,M}(f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(f, x))
+function hessian(f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, x)) where {F,H}
     ∇f = y -> gradient(f, y, cfg.gradient_config)
     return jacobian(∇f, x, cfg.jacobian_config)
 end
 
-function hessian!{F,M}(out, f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(f, x))
+function hessian!(out, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, x)) where {F,H}
     ∇f = y -> gradient(f, y, cfg.gradient_config)
     jacobian!(out, ∇f, x, cfg.jacobian_config)
     return out
 end
 
-function hessian!{F,M}(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,M} = HessianConfig(out, f, x))
+function hessian!(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(out, f, x)) where {F,H}
     ∇f! = (y, z) -> begin
         result = DiffResult(zero(eltype(y)), y)
         gradient!(result, f, z, cfg.gradient_config)
diff --git a/src/jacobian.jl b/src/jacobian.jl
index ceccb492..96f73de4 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -2,14 +2,14 @@
 # API methods #
 ###############
 
-const AllowedJacobianConfig{F,M} = Union{JacobianConfig{Tag{F,M}}, JacobianConfig{Tag{Void,M}}}
+const AllowedJacobianConfig{F,H} = Union{JacobianConfig{Tag{F,H}}, JacobianConfig{Tag{Void,H}}}
 
 jacobian(f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
 jacobian(f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
 jacobian!(out, f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
 jacobian!(out, f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
 
-function jacobian{F,M}(f::F, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f, x))
+function jacobian(f::F, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f, x, cfg)
     else
@@ -17,7 +17,7 @@ function jacobian{F,M}(f::F, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig
     end
 end
 
-function jacobian{F,M}(f!::F, y, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f!, y, x))
+function jacobian(f!::F, y, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f!, y, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f!, y, x, cfg)
     else
@@ -25,7 +25,7 @@ function jacobian{F,M}(f!::F, y, x, cfg::AllowedJacobianConfig{F,M} = JacobianCo
     end
 end
 
-function jacobian!{F,M}(out, f::F, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f, x))
+function jacobian!(out, f::F, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f, x, cfg)
     else
@@ -34,7 +34,7 @@ function jacobian!{F,M}(out, f::F, x, cfg::AllowedJacobianConfig{F,M} = Jacobian
     return out
 end
 
-function jacobian!{F,M}(out, f!::F, y, x, cfg::AllowedJacobianConfig{F,M} = JacobianConfig(f!, y, x))
+function jacobian!(out, f!::F, y, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f!, y, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f!, y, x, cfg)
     else
@@ -78,7 +78,7 @@ reshape_jacobian(out::DiffResult, ydual, xdual) = reshape_jacobian(DiffBase.jaco
 # vector mode #
 ###############
 
-function vector_mode_jacobian{F,T,V,N}(f::F, x, cfg::JacobianConfig{T,V,N})
+function vector_mode_jacobian(f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f, x, cfg)
     out = similar(ydual, valtype(eltype(ydual)), length(ydual), N)
     extract_jacobian!(out, ydual, N)
@@ -86,7 +86,7 @@ function vector_mode_jacobian{F,T,V,N}(f::F, x, cfg::JacobianConfig{T,V,N})
     return out
 end
 
-function vector_mode_jacobian{F,T,V,N}(f!::F, y, x, cfg::JacobianConfig{T,V,N})
+function vector_mode_jacobian(f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     out = similar(y, length(y), N)
@@ -95,14 +95,14 @@ function vector_mode_jacobian{F,T,V,N}(f!::F, y, x, cfg::JacobianConfig{T,V,N})
     return out
 end
 
-function vector_mode_jacobian!{F,T,V,N}(out, f::F, x, cfg::JacobianConfig{T,V,N})
+function vector_mode_jacobian!(out, f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f, x, cfg)
     extract_jacobian!(out, ydual, N)
     extract_value!(out, ydual)
     return out
 end
 
-function vector_mode_jacobian!{F,T,V,N}(out, f!::F, y, x, cfg::JacobianConfig{T,V,N})
+function vector_mode_jacobian!(out, f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     extract_jacobian!(out, ydual, N)
@@ -157,7 +157,7 @@ function jacobian_chunk_mode_expr(work_array_definition::Expr, compute_ydual::Ex
     end
 end
 
-@eval function chunk_mode_jacobian{F,T,V,N}(f::F, x, cfg::JacobianConfig{T,V,N})
+@eval function chunk_mode_jacobian(f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -167,7 +167,7 @@ end
                                :()))
 end
 
-@eval function chunk_mode_jacobian{F,T,V,N}(f!::F, y, x, cfg::JacobianConfig{T,V,N})
+@eval function chunk_mode_jacobian(f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
@@ -177,7 +177,7 @@ end
                                :(map!(value, y, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,T,V,N}(out, f::F, x, cfg::JacobianConfig{T,V,N})
+@eval function chunk_mode_jacobian!(out, f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -187,7 +187,7 @@ end
                                :(extract_value!(out, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,T,V,N}(out, f!::F, y, x, cfg::JacobianConfig{T,V,N})
+@eval function chunk_mode_jacobian!(out, f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
diff --git a/src/partials.jl b/src/partials.jl
index 22de9c8b..b8ce0352 100644
--- a/src/partials.jl
+++ b/src/partials.jl
@@ -6,19 +6,19 @@ end
 # Utility/Accessor Functions #
 ##############################
 
-@generated function single_seed{N,T,i}(::Type{Partials{N,T}}, ::Type{Val{i}})
+@generated function single_seed(::Type{Partials{N,T}}, ::Type{Val{i}}) where {N,T,i}
     ex = Expr(:tuple, [ifelse(i === j, :(one(T)), :(zero(T))) for j in 1:N]...)
     return :(Partials($(ex)))
 end
 
-@inline valtype{N,T}(::Partials{N,T}) = T
-@inline valtype{N,T}(::Type{Partials{N,T}}) = T
+@inline valtype(::Partials{N,T}) where {N,T} = T
+@inline valtype(::Type{Partials{N,T}}) where {N,T} = T
 
-@inline npartials{N}(::Partials{N}) = N
-@inline npartials{N,T}(::Type{Partials{N,T}}) = N
+@inline npartials(::Partials{N}) where {N} = N
+@inline npartials(::Type{Partials{N,T}}) where {N,T} = N
 
-@inline Base.length{N}(::Partials{N}) = N
-@inline Base.size{N}(::Partials{N}) = (N,)
+@inline Base.length(::Partials{N}) where {N} = N
+@inline Base.size(::Partials{N}) where {N} = (N,)
 
 @inline Base.getindex(partials::Partials, i::Int) = partials.values[i]
 
@@ -35,18 +35,18 @@ Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
 @inline iszero(partials::Partials) = iszero_tuple(partials.values)
 
 @inline Base.zero(partials::Partials) = zero(typeof(partials))
-@inline Base.zero{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(zero_tuple(NTuple{N,T}))
+@inline Base.zero(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(zero_tuple(NTuple{N,T}))
 
 @inline Base.one(partials::Partials) = one(typeof(partials))
-@inline Base.one{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(one_tuple(NTuple{N,T}))
+@inline Base.one(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(one_tuple(NTuple{N,T}))
 
 @inline Base.rand(partials::Partials) = rand(typeof(partials))
-@inline Base.rand{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(rand_tuple(NTuple{N,T}))
+@inline Base.rand(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(rand_tuple(NTuple{N,T}))
 @inline Base.rand(rng::AbstractRNG, partials::Partials) = rand(rng, typeof(partials))
-@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{Partials{N,T}}) = Partials{N,T}(rand_tuple(rng, NTuple{N,T}))
+@inline Base.rand(rng::AbstractRNG, ::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(rand_tuple(rng, NTuple{N,T}))
 
-Base.isequal{N}(a::Partials{N}, b::Partials{N}) = isequal(a.values, b.values)
-Base.:(==){N}(a::Partials{N}, b::Partials{N}) = a.values == b.values
+Base.isequal(a::Partials{N}, b::Partials{N}) where {N} = isequal(a.values, b.values)
+Base.:(==)(a::Partials{N}, b::Partials{N}) where {N} = a.values == b.values
 
 const PARTIALS_HASH = hash(Partials)
 
@@ -55,7 +55,7 @@ Base.hash(partials::Partials, hsh::UInt64) = hash(hash(partials), hsh)
 
 @inline Base.copy(partials::Partials) = partials
 
-Base.read{N,T}(io::IO, ::Type{Partials{N,T}}) = Partials{N,T}(ntuple(i->read(io, T), Val{N}))
+Base.read(io::IO, ::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(ntuple(i->read(io, T), Val{N}))
 
 function Base.write(io::IO, partials::Partials)
     for p in partials
@@ -67,17 +67,17 @@ end
 # Conversion/Promotion #
 ########################
 
-Base.promote_rule{N,A,B}(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) = Partials{N,promote_type(A, B)}
+Base.promote_rule(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) where {N,A,B} = Partials{N,promote_type(A, B)}
 
-Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials) = Partials{N,T}(partials.values)
-Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials{N,T}) = partials
+Base.convert(::Type{Partials{N,T}}, partials::Partials) where {N,T} = Partials{N,T}(partials.values)
+Base.convert(::Type{Partials{N,T}}, partials::Partials{N,T}) where {N,T} = partials
 
 ########################
 # Arithmetic Functions #
 ########################
 
-@inline Base.:+{N}(a::Partials{N}, b::Partials{N}) = Partials(add_tuples(a.values, b.values))
-@inline Base.:-{N}(a::Partials{N}, b::Partials{N}) = Partials(sub_tuples(a.values, b.values))
+@inline Base.:+(a::Partials{N}, b::Partials{N}) where {N} = Partials(add_tuples(a.values, b.values))
+@inline Base.:-(a::Partials{N}, b::Partials{N}) where {N} = Partials(sub_tuples(a.values, b.values))
 @inline Base.:-(partials::Partials) = Partials(minus_tuple(partials.values))
 @inline Base.:*(x::Real, partials::Partials) = partials*x
 
@@ -99,7 +99,7 @@ if NANSAFE_MODE_ENABLED
         return Partials(div_tuple_by_scalar(partials.values, x))
     end
 
-    @inline function _mul_partials{N}(a::Partials{N}, b::Partials{N}, x_a, x_b)
+    @inline function _mul_partials(a::Partials{N}, b::Partials{N}, x_a, x_b) where N
         x_a = ifelse(!isfinite(x_a) && iszero(a), one(x_a), x_a)
         x_b = ifelse(!isfinite(x_b) && iszero(b), one(x_b), x_b)
         return Partials(mul_tuples(a.values, b.values, x_a, x_b))
@@ -113,7 +113,7 @@ else
         return Partials(div_tuple_by_scalar(partials.values, x))
     end
 
-    @inline function _mul_partials{N}(a::Partials{N}, b::Partials{N}, x_a, x_b)
+    @inline function _mul_partials(a::Partials{N}, b::Partials{N}, x_a, x_b) where N
         return Partials(mul_tuples(a.values, b.values, x_a, x_b))
     end
 end
@@ -121,15 +121,15 @@ end
 # edge cases where N == 0 #
 #-------------------------#
 
-@inline Base.:+{A,B}(a::Partials{0,A}, b::Partials{0,B}) = Partials{0,promote_type(A,B)}(tuple())
-@inline Base.:-{A,B}(a::Partials{0,A}, b::Partials{0,B}) = Partials{0,promote_type(A,B)}(tuple())
-@inline Base.:-{T}(partials::Partials{0,T}) = partials
-@inline Base.:*{T}(partials::Partials{0,T}, x::Real) = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:*{T}(x::Real, partials::Partials{0,T}) = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:/{T}(partials::Partials{0,T}, x::Real) = Partials{0,promote_type(T,typeof(x))}(tuple())
+@inline Base.:+(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline Base.:-(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline Base.:-(partials::Partials{0,T}) where {T} = partials
+@inline Base.:*(partials::Partials{0,T}, x::Real) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
+@inline Base.:*(x::Real, partials::Partials{0,T}) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
+@inline Base.:/(partials::Partials{0,T}, x::Real) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
 
-@inline _mul_partials{A,B}(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) = Partials{0,promote_type(A,B)}(tuple())
-@inline _div_partials{A,B}(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) = Partials{0,promote_type(A,B)}(tuple())
+@inline _mul_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline _div_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
 
 ##################################
 # Generated Functions on NTuples #
@@ -154,7 +154,7 @@ end
 @inline rand_tuple(::AbstractRNG, ::Type{Tuple{}}) = tuple()
 @inline rand_tuple(::Type{Tuple{}}) = tuple()
 
-@generated function iszero_tuple{N,T}(tup::NTuple{N,T})
+@generated function iszero_tuple(tup::NTuple{N,T}) where {N,T}
     ex = Expr(:&&, [:(z == tup[$i]) for i=1:N]...)
     return quote
         z = zero(T)
@@ -163,7 +163,7 @@ end
     end
 end
 
-@generated function zero_tuple{N,T}(::Type{NTuple{N,T}})
+@generated function zero_tuple(::Type{NTuple{N,T}}) where {N,T}
     ex = tupexpr(i -> :(z), N)
     return quote
         z = zero(T)
@@ -171,7 +171,7 @@ end
     end
 end
 
-@generated function one_tuple{N,T}(::Type{NTuple{N,T}})
+@generated function one_tuple(::Type{NTuple{N,T}}) where {N,T}
     ex = tupexpr(i -> :(z), N)
     return quote
         z = one(T)
@@ -179,35 +179,35 @@ end
     end
 end
 
-@generated function rand_tuple{N,T}(rng::AbstractRNG, ::Type{NTuple{N,T}})
+@generated function rand_tuple(rng::AbstractRNG, ::Type{NTuple{N,T}}) where {N,T}
     return tupexpr(i -> :(rand(rng, T)), N)
 end
 
-@generated function rand_tuple{N,T}(::Type{NTuple{N,T}})
+@generated function rand_tuple(::Type{NTuple{N,T}}) where {N,T}
     return tupexpr(i -> :(rand(T)), N)
 end
 
-@generated function scale_tuple{N}(tup::NTuple{N}, x)
+@generated function scale_tuple(tup::NTuple{N}, x) where N
     return tupexpr(i -> :(tup[$i] * x), N)
 end
 
-@generated function div_tuple_by_scalar{N}(tup::NTuple{N}, x)
+@generated function div_tuple_by_scalar(tup::NTuple{N}, x) where N
     return tupexpr(i -> :(tup[$i] / x), N)
 end
 
-@generated function add_tuples{N}(a::NTuple{N}, b::NTuple{N})
+@generated function add_tuples(a::NTuple{N}, b::NTuple{N})  where N
     return tupexpr(i -> :(a[$i] + b[$i]), N)
 end
 
-@generated function sub_tuples{N}(a::NTuple{N}, b::NTuple{N})
+@generated function sub_tuples(a::NTuple{N}, b::NTuple{N})  where N
     return tupexpr(i -> :(a[$i] - b[$i]), N)
 end
 
-@generated function minus_tuple{N}(tup::NTuple{N})
+@generated function minus_tuple(tup::NTuple{N}) where N
     return tupexpr(i -> :(-tup[$i]), N)
 end
 
-@generated function mul_tuples{N}(a::NTuple{N}, b::NTuple{N}, afactor, bfactor)
+@generated function mul_tuples(a::NTuple{N}, b::NTuple{N}, afactor, bfactor) where N
     return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N)
 end
 
@@ -215,4 +215,4 @@ end
 # Pretty Printing #
 ###################
 
-Base.show{N}(io::IO, p::Partials{N}) = print(io, "Partials", p.values)
+Base.show(io::IO, p::Partials{N}) where {N} = print(io, "Partials", p.values)
diff --git a/src/utils.jl b/src/utils.jl
index 0b9b33bb..a9d3dbd4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -17,13 +17,13 @@ end
 # vector mode function evaluation #
 ###################################
 
-function vector_mode_dual_eval{F}(f::F, x, cfg::Union{JacobianConfig,GradientConfig})
+function vector_mode_dual_eval(f::F, x, cfg::Union{JacobianConfig,GradientConfig}) where F
     xdual = cfg.duals
     seed!(xdual, x, cfg.seeds)
     return f(xdual)
 end
 
-function vector_mode_dual_eval{F}(f!::F, y, x, cfg::JacobianConfig)
+function vector_mode_dual_eval(f!::F, y, x, cfg::JacobianConfig) where F
     ydual, xdual = cfg.duals
     seed!(xdual, x, cfg.seeds)
     seed!(ydual, y)
@@ -35,28 +35,28 @@ end
 # seed construction/manipulation #
 ##################################
 
-@generated function construct_seeds{N,V}(::Type{Partials{N,V}})
+@generated function construct_seeds(::Type{Partials{N,V}}) where {N,V}
     return Expr(:tuple, [:(single_seed(Partials{N,V}, Val{$i})) for i in 1:N]...)
 end
 
-function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x,
-                      seed::Partials{N,V} = zero(Partials{N,V}))
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
+               seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
     for i in eachindex(duals)
         duals[i] = Dual{T,V,N}(x[i], seed)
     end
     return duals
 end
 
-function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x,
-                      seeds::NTuple{N,Partials{N,V}})
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
+               seeds::NTuple{N,Partials{N,V}}) where {T,V,N}
     for i in 1:N
         duals[i] = Dual{T,V,N}(x[i], seeds[i])
     end
     return duals
 end
 
-function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x, index,
-                      seed::Partials{N,V} = zero(Partials{N,V}))
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
+               seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
     offset = index - 1
     for i in 1:N
         j = i + offset
@@ -65,8 +65,8 @@ function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x, index,
     return duals
 end
 
-function seed!{T,V,N}(duals::AbstractArray{Dual{T,V,N}}, x, index,
-                      seeds::NTuple{N,Partials{N,V}}, chunksize = N)
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
+               seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N}
     offset = index - 1
     for i in 1:chunksize
         j = i + offset
diff --git a/test/SIMDTest.jl b/test/SIMDTest.jl
index 9446494d..ad75e47f 100644
--- a/test/SIMDTest.jl
+++ b/test/SIMDTest.jl
@@ -8,7 +8,7 @@ const DUALS = (Dual(1., 2., 3., 4.),
                Dual(Dual(1., 2.), Dual(3., 4.)))
 
 
-function simd_sum{T}(x::Vector{T})
+function simd_sum(x::Vector{T}) where T
     s = zero(T)
     @simd for i in eachindex(x)
         @inbounds s = s + x[i]

From 799f2dcb3e7043a67541a3f484ccb18847696604 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Tue, 11 Apr 2017 15:28:06 -0400
Subject: [PATCH 19/26] test n-ary derivative API + perturbation confusion

---
 src/derivative.jl      | 37 +++++++++++-----------
 test/DerivativeTest.jl | 71 +++++++++++++++++++++++++++++++++++++++++-
 test/MiscTest.jl       |  8 +++++
 3 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/src/derivative.jl b/src/derivative.jl
index 8e7987db..abb4b7b3 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -3,7 +3,7 @@
 ###############
 
 @inline function derivative(f::F, x::R) where {F,R<:Real}
-    T = Tag(F, R)
+    T = typeof(Tag(F, R))
     return extract_derivative(f(Dual{T}(x, one(x))))
 end
 
@@ -11,13 +11,13 @@ end
     args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
     return quote
         $(Expr(:meta, :inline))
-        T = Tag(F, typeof(x))
-        extract_derivative(f($(args...)))
+        T = typeof(Tag(F, typeof(x)))
+        extract_derivative(f($(args...)), Chunk{N}())
     end
 end
 
 @inline function derivative!(out, f::F, x::R) where {F,R<:Real}
-    T = Tag(F, typeof(x))
+    T = typeof(Tag(F, typeof(x)))
     extract_derivative!(out, f(Dual{T}(x, one(x))))
     return out
 end
@@ -26,7 +26,7 @@ end
     args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
     return quote
         $(Expr(:meta, :inline))
-        T = Tag(F, typeof(x))
+        T = typeof(Tag(F, typeof(x)))
         extract_derivative!(out, f($(args...)))
     end
 end
@@ -38,29 +38,29 @@ end
 # non-mutating #
 #--------------#
 
-@generated function extract_derivative(y::Dual{T,V,N}) where {T,V,N}
+@inline extract_derivative(y::Dual{T,V,1}) where {T,V} = partials(y, 1)
+@inline extract_derivative(y::Real) = zero(y)
+@inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
+
+@generated function extract_derivative(y::Dual{T,V,N}, ::Chunk{N}) where {T,V,N}
     return quote
         $(Expr(:meta, :inline))
         $(Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...))
     end
 end
 
-@inline extract_derivative{T,V}(y::Dual{T,V,1}) = partials(y, 1)
-@inline extract_derivative(y::Real) = zero(y)
-@inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
-
-# mutating #
-#----------#
-
-@generated function extract_derivative!(out::NTuple{N,Any}, y::Dual{T,V,N}) where {T,V,N}
+@generated function extract_derivative(y::AbstractArray, ::Chunk{N}) where {N}
     return quote
         $(Expr(:meta, :inline))
-        $(Expr(:block, [:(out[$i][] = partials(y, $i)) for i in 1:N]...))
-        return out
+        V = valtype(eltype(y))
+        out = $(Expr(:tuple, [:(similar(y, V)) for i in 1:N]...))
+        return extract_derivative!(out, y)
     end
 end
+# mutating #
+#----------#
 
-@generated function extract_derivative!(out::NTuple{N,Any}, y::AbstractArray) where {N}
+@generated function extract_derivative!(out::NTuple{N,Any}, y) where {N}
     return quote
         $(Expr(:meta, :inline))
         $(Expr(:block, [:(extract_derivative!(out[$i], y, $i)) for i in 1:N]...))
@@ -70,6 +70,7 @@ end
 
 extract_derivative!(out::AbstractArray, y::AbstractArray) = map!(extract_derivative, out, y)
 extract_derivative!(out::AbstractArray, y::AbstractArray, p) = map!(x -> partials(x, p), out, y)
+extract_derivative!(out::Union{AbstractArray,Base.Ref}, y::Dual, p) = (out[] = partials(y, p); out)
 
 function extract_derivative!(out::DiffResult, y)
     DiffBase.value!(value, out, y)
@@ -77,7 +78,7 @@ function extract_derivative!(out::DiffResult, y)
     return out
 end
 
-function extract_derivative!(out::DiffResult, y::AbstractArray, p)
+function extract_derivative!(out::DiffResult, y, p)
     DiffBase.value!(value, out, y)
     DiffBase.derivative!(x -> partials(x, p), out, y)
     return out
diff --git a/test/DerivativeTest.jl b/test/DerivativeTest.jl
index dcedbbba..901a66cb 100644
--- a/test/DerivativeTest.jl
+++ b/test/DerivativeTest.jl
@@ -7,6 +7,8 @@ using ForwardDiff
 
 include(joinpath(dirname(@__FILE__), "utils.jl"))
 
+srand(1)
+
 ########################
 # test vs. Calculus.jl #
 ########################
@@ -29,17 +31,84 @@ for f in DiffBase.NUMBER_TO_ARRAY_FUNCS
     println("  ...testing $f")
     v = f(x)
     d = ForwardDiff.derivative(f, x)
+
+    @test !(eltype(d) <: ForwardDiff.Dual)
     @test isapprox(d, Calculus.derivative(f, x), atol=FINITEDIFF_ERROR)
 
     out = similar(v)
     ForwardDiff.derivative!(out, f, x)
     @test isapprox(out, d)
 
-    out = DiffBase.DiffResult(zero(v), similar(d))
+    out = DiffBase.DiffResult(similar(v), similar(d))
     ForwardDiff.derivative!(out, f, x)
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.derivative(out), d)
 end
 
+##################
+# n-ary versions #
+##################
+
+# (::Real, ::Real) -> ::Real #
+#----------------------------#
+
+f(a, b) = sin(a) * tan(b)
+
+a, b = rand(2)
+
+valf = f(a, b)
+∇f = ForwardDiff.gradient(x -> f(x...), [a, b])
+
+@test collect(ForwardDiff.derivative(f, (a, b))) == ∇f
+
+out = (DiffBase.DiffResult(zero(a), zero(a)), DiffBase.DiffResult(zero(b), zero(b)))
+ForwardDiff.derivative!(out, f, (a, b))
+@test DiffBase.value(out[1]) == DiffBase.value(out[2]) == valf
+@test [DiffBase.derivative(out[1]), DiffBase.derivative(out[2])] == ∇f
+
+out = (Base.RefValue(zero(a)), DiffBase.DiffResult(zero(b), zero(b)))
+ForwardDiff.derivative!(out, f, (a, b))
+@test DiffBase.value(out[2]) == valf
+@test [out[1][], DiffBase.derivative(out[2])] == ∇f
+
+out = (DiffBase.DiffResult(zero(a), zero(a)), [zero(b)])
+ForwardDiff.derivative!(out, f, (a, b))
+@test DiffBase.value(out[1]) == valf
+@test [DiffBase.derivative(out[1]), out[2][]] == ∇f
+
+out = (Base.RefValue(zero(a)), [zero(b)])
+ForwardDiff.derivative!(out, f, (a, b))
+@test [out[1][], out[2][]] == ∇f
+
+# (::Real, ::Real) -> ::Vector #
+#------------------------------#
+
+g(a, b) = cos.([f(a, b), f(b, a)]) .+ b .- a
+
+a, b = rand(2)
+
+valg = g(a, b)
+Jg = ForwardDiff.jacobian(x -> g(x...), [a, b])
+
+@test hcat(ForwardDiff.derivative(g, (a, b))...) == Jg
+
+out = (DiffBase.DiffResult(similar(valg), similar(valg)), DiffBase.DiffResult(similar(valg), similar(valg)))
+ForwardDiff.derivative!(out, g, (a, b))
+@test DiffBase.value(out[1]) == DiffBase.value(out[2]) == valg
+@test hcat(DiffBase.derivative(out[1]), DiffBase.derivative(out[2])) == Jg
+
+out = (similar(valg), DiffBase.DiffResult(similar(valg), similar(valg)))
+ForwardDiff.derivative!(out, g, (a, b))
+@test DiffBase.value(out[2]) == valg
+@test hcat(out[1], DiffBase.derivative(out[2])) == Jg
+
+out = (DiffBase.DiffResult(similar(valg), similar(valg)), similar(valg))
+ForwardDiff.derivative!(out, g, (a, b))
+@test DiffBase.value(out[1]) == valg
+@test hcat(DiffBase.derivative(out[1]), out[2]) == Jg
+
+out = (similar(valg), similar(valg))
+ForwardDiff.derivative!(out, g, (a, b))
+@test hcat(out[1], out[2]) == Jg
 
 end # module
diff --git a/test/MiscTest.jl b/test/MiscTest.jl
index 32faa075..53afcecc 100644
--- a/test/MiscTest.jl
+++ b/test/MiscTest.jl
@@ -71,6 +71,14 @@ testf2 = x -> testdf(x[1]) * f(x[2])
 
 @test isapprox(ForwardDiff.gradient(f2, x), ForwardDiff.gradient(testf2, x))
 
+# Perturbation Confusion (Issue #83) #
+#------------------------------------#
+
+D = ForwardDiff.derivative
+
+@test_throws ForwardDiff.TagMismatchError D(x -> x * D(y -> x + y, 1), 1)
+@test_throws ForwardDiff.TagMismatchError ForwardDiff.gradient(v -> sum(v) * D(y -> y * norm(v), 1), [1])
+
 ######################################
 # Higher-Dimensional Differentiation #
 ######################################

From c11eee262b8ff74c2c09110d6904a50fcddcfcd3 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Tue, 11 Apr 2017 16:11:25 -0400
Subject: [PATCH 20/26] implement ForwardDiff v0.4.x --> v0.5.x deprecation
 layer

---
 src/ForwardDiff.jl     |   2 +-
 src/config.jl          |   6 +-
 src/deprecated.jl      | 152 +++++++-----------------------------
 src/hessian.jl         |   2 +-
 test/DeprecatedTest.jl | 169 +++++++----------------------------------
 test/HessianTest.jl    |   4 +-
 test/runtests.jl       |   8 +-
 7 files changed, 64 insertions(+), 279 deletions(-)

diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 6faddaed..429c6291 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -53,7 +53,7 @@ include("derivative.jl")
 include("gradient.jl")
 include("jacobian.jl")
 include("hessian.jl")
-# include("deprecated.jl")
+include("deprecated.jl")
 
 export DiffBase
 
diff --git a/src/config.jl b/src/config.jl
index 5390d6bf..c43077aa 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -49,7 +49,7 @@ end
 # AbstractConfig #
 ##################
 
-abstract type AbstractConfig{T<:Tag,N} end
+abstract type AbstractConfig{T,N} end
 
 struct ConfigMismatchError{F,G,H} <: Exception
     f::F
@@ -135,8 +135,8 @@ function HessianConfig(f::F,
     return HessianConfig(jacobian_config, gradient_config)
 end
 
-function HessianConfig(result::DiffResult,
-                       f::F,
+function HessianConfig(f::F,
+                       result::DiffResult,
                        x::AbstractArray{V},
                        chunk::Chunk = Chunk(x),
                        tag::Tag = Tag(F, Dual{Void,V,0})) where {F,V}
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 9ba22857..b1803534 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -1,137 +1,39 @@
-#############################################
-# ForwardDiffResult --> DiffBase.DiffResult #
-#############################################
+#########################################################
+# Config{N}(args...) --> Config(f, args..., Chunk{N}()) #
+#########################################################
 
-Base.@deprecate DerivativeResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate DerivativeResult(x) DiffBase.DiffResult(copy(x), copy(x))
-
-Base.@deprecate GradientResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate GradientResult(x) DiffBase.GradientResult(x)
-
-Base.@deprecate JacobianResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate JacobianResult(x) DiffBase.JacobianResult(x)
-
-Base.@deprecate HessianResult(x, y, z) DiffBase.DiffResult(x, y, z)
-Base.@deprecate HessianResult(x) DiffBase.HessianResult(x)
-
-struct Chunk{N}
-    function (::Type{Chunk{N}}){N}()
-        Base.depwarn("Chunk{N}() is deprecated, use the ForwardDiff.AbstractConfig API instead.", :Chunk)
-        return new{N}()
-    end
-end
-
-export Chunk
-
-######################
-# gradient/gradient! #
-######################
-
-function gradient{N}(f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.gradient(f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig{N}(x))) instead.",
-                     :gradient)
-        return gradient(f, x, MultithreadConfig(GradientConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.gradient(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{N}(x)) instead.",
-                     :gradient)
-        return gradient(f, x, GradientConfig{N}(x))
-    end
-end
-
-function gradient!{N}(out, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.gradient!(out, f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.gradient!(out, f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig{N}(x))) instead.",
-                     :gradient!)
-        return gradient!(out, f, x, MultithreadConfig(GradientConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.gradient!(out, f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.gradient!(out, f, x, ForwardDiff.GradientConfig{N}(x)) instead.",
-                     :gradient!)
-        return gradient!(out, f, x, GradientConfig{N}(x))
-    end
+function (::Type{GradientConfig{N}})(x) where N
+    msg = "GradientConfig{N}(x) is deprecated; use GradientConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :GradientConfig)
+    return GradientConfig(nothing, x, Chunk{N}())
 end
 
-######################
-# jacobian/jacobian! #
-######################
-
-function jacobian{N}(f, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian(f, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian)
-    return jacobian(f, x, JacobianConfig{N}(x))
+function (::Type{JacobianConfig{N}})(x) where N
+    msg = "JacobianConfig{N}(x) is deprecated; use JacobianConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :JacobianConfig)
+    return JacobianConfig(nothing, x, Chunk{N}())
 end
 
-function jacobian{N}(f!, y, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian(f!, y, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian(f!, y, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian)
-    return jacobian(f!, y, x, JacobianConfig{N}(y, x))
-end
-
-function jacobian!{N}(out, f, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian!(out, f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian!(out, f, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian!)
-    return jacobian!(out, f, x, JacobianConfig{N}(x))
-end
-
-function jacobian!{N}(out, f!, y, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian!(out, f, y, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian!(out, f, y, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian!)
-    return jacobian!(out, f!, y, x, JacobianConfig{N}(y, x))
-end
-
-####################
-# hessian/hessian! #
-####################
-
-function hessian{N}(f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.hessian(f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.hessian(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.HessianConfig{N}(x))) instead.",
-                     :hessian)
-        return hessian(f, x, MultithreadConfig(HessianConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.hessian(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.hessian(f, x, ForwardDiff.HessianConfig{N}(x)) instead.",
-                     :hessian)
-        return hessian(f, x, HessianConfig{N}(x))
-    end
+function (::Type{JacobianConfig{N}})(y, x) where N
+    msg = "JacobianConfig{N}(y, x) is deprecated; use JacobianConfig(nothing, y, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :JacobianConfig)
+    return JacobianConfig(nothing, y, x, Chunk{N}())
 end
 
-function hessian!{N}(out, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    return deprecated_hessian!(out, f, x, chunk; multithread = multithread)
+function (::Type{HessianConfig{N}})(x) where N
+    msg = "HessianConfig{N}(x) is deprecated; use HessianConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :HessianConfig)
+    return HessianConfig(nothing, x, Chunk{N}())
 end
 
-function hessian!{N}(out::DiffResult, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    return deprecated_hessian!(out, f, x, chunk; multithread = multithread)
+function (::Type{HessianConfig{N}})(out, x) where N
+    msg = "HessianConfig{N}(out, x) is deprecated; use HessianConfig(nothing, out, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :HessianConfig)
+    return HessianConfig(nothing, out, x, Chunk{N}())
 end
 
-function deprecated_hessian!{N}(out, f, x, chunk::Chunk{N}; multithread = false)
-    if isa(out, DiffBase.DiffResult)
-        out_str = "out::DiffBase.DiffResult"
-        cfg_str = "ForwardDiff.HessianConfig{N}(out, x)"
-        cfg = HessianConfig{N}(out, x)
-    else
-        out_str = "out"
-        cfg_str = "ForwardDiff.HessianConfig{N}(x)"
-        cfg = HessianConfig{N}(x)
-    end
-    if multithread
-        Base.depwarn("ForwardDiff.hessian!($(out_str), f, x, ::Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.hessian!($(out_str), f, x, ForwardDiff.MultithreadConfig($(cfg_str))) instead.",
-                     :hessian!)
-        return hessian!(out, f, x, MultithreadConfig(cfg))
-    else
-        Base.depwarn("ForwardDiff.hessian!($(out_str), f, x, ::Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.hessian!($(out_str), f, x, $(cfg_str)) instead.",
-                     :hessian!)
-        return hessian!(out, f, x, cfg)
-    end
+function MultithreadConfig(cfg::AbstractConfig)
+    msg = "MultithreadConfig(cfg) is deprecated; use cfg instead (ForwardDiff no longer implements experimental multithreading)."
+    Base.depwarn(msg, :MultithreadConfig)
+    return cfg
 end
diff --git a/src/hessian.jl b/src/hessian.jl
index 7097063c..c74cf3a8 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -19,7 +19,7 @@ function hessian!(out, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f
     return out
 end
 
-function hessian!(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(out, f, x)) where {F,H}
+function hessian!(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, out, x)) where {F,H}
     ∇f! = (y, z) -> begin
         result = DiffResult(zero(eltype(y)), y)
         gradient!(result, f, z, cfg.gradient_config)
diff --git a/test/DeprecatedTest.jl b/test/DeprecatedTest.jl
index 38b3581f..561c5f2c 100644
--- a/test/DeprecatedTest.jl
+++ b/test/DeprecatedTest.jl
@@ -3,158 +3,41 @@ module DeprecatedTest
 using Base.Test
 using ForwardDiff, DiffBase
 
-include(joinpath(dirname(@__FILE__), "utils.jl"))
-
-info("The following tests print lots of deprecation warnings on purpose.")
-
-#############################################
-# ForwardDiffResult --> DiffBase.DiffResult #
-#############################################
-
-v = rand()
-x, y = rand(5), rand(5)
-h = rand(5, 5)
-
-@test isa(DerivativeResult(v, y), DiffBase.DiffResult)
-@test isa(DerivativeResult(v), DiffBase.DiffResult)
-
-@test isa(GradientResult(v, y), DiffBase.DiffResult)
-@test isa(GradientResult(x), DiffBase.DiffResult)
-
-@test isa(JacobianResult(x, y), DiffBase.DiffResult)
-@test isa(JacobianResult(x), DiffBase.DiffResult)
-
-@test isa(HessianResult(v, y, h), DiffBase.DiffResult)
-@test isa(HessianResult(x), DiffBase.DiffResult)
-
-######################
-# gradient/gradient! #
-######################
-
-x = rand(5)
-f = x -> prod(x) + sum(x)
-v = f(x)
-g = ForwardDiff.gradient(f, x)
-
-@test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = false) == g
-
-out = similar(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = false)
-@test out == g
-
-out = DiffBase.GradientResult(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-
-@test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = true) == g
-
-out = similar(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-@test out == g
-
-out = DiffBase.GradientResult(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-
-######################
-# jacobian/jacobian! #
-######################
-
-# f(x) -> y #
-#-----------#
+using ForwardDiff: AbstractConfig, GradientConfig,
+                   JacobianConfig, HessianConfig,
+                   MultithreadConfig
 
-x = rand(5)
-f = cumprod
-y = f(x)
-j = ForwardDiff.jacobian(f, x)
-
-@test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = false) == j
-
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = false)
-@test out == j
-
-out = DiffBase.JacobianResult(x)
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
-
-@test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = true) == j
-
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-@test out == j
-
-out = DiffBase.JacobianResult(x)
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
-
-# f!(y, x) #
-#----------#
-
-y = similar(x)
-f! = cumprod!
-f!(y, x)
-j = ForwardDiff.jacobian(f!, y, x)
-
-@test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = false) == j
-
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = false)
-@test out == j
-
-out = DiffBase.JacobianResult(y, x)
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
-
-@test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = true) == j
-
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-@test out == j
-
-out = DiffBase.JacobianResult(y, x)
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
+include(joinpath(dirname(@__FILE__), "utils.jl"))
 
-####################
-# hessian/hessian! #
-####################
+function similar_duals(a::AbstractArray, b::AbstractArray)
+    return typeof(a) == typeof(b) && size(a) == size(b)
+end
 
-x = rand(5)
-f = x -> prod(x) + sum(x)
-v = f(x)
-g = ForwardDiff.gradient(f, x)
-h = ForwardDiff.hessian(f, x, Chunk{1}())
+similar_duals(a::Tuple, b::Tuple) = all(similar_duals.(a, b))
 
-@test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = false) == h
+function similar_config(a::AbstractConfig, b::AbstractConfig)
+    return a.seeds == b.seeds && similar_duals(a.duals, b.duals)
+end
 
-out = similar(x, length(x), length(x))
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = false)
-@test out == h
+function similar_config(a::HessianConfig, b::HessianConfig)
+    return (similar_config(a.gradient_config, b.gradient_config) &&
+            similar_config(a.jacobian_config, b.jacobian_config))
+end
 
+x = rand(3)
+y = rand(3)
 out = DiffBase.HessianResult(x)
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-@test DiffBase.hessian(out) == h
-
-@test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = true) == h
+N = 1
+chunk = ForwardDiff.Chunk{N}()
 
-out = similar(x, length(x), length(x))
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-@test out == h
+info("The following tests print lots of deprecation warnings on purpose.")
 
-out = DiffBase.HessianResult(x)
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-@test DiffBase.hessian(out) == h
+@test similar_config(GradientConfig{N}(x), GradientConfig(nothing, x, chunk))
+@test similar_config(JacobianConfig{N}(x), JacobianConfig(nothing, x, chunk))
+@test similar_config(JacobianConfig{N}(y, x), JacobianConfig(nothing, y, x, chunk))
+@test similar_config(HessianConfig{N}(x), HessianConfig(nothing, x, chunk))
+@test similar_config(HessianConfig{N}(out, x), HessianConfig(nothing, out, x, chunk))
+@test similar_config(MultithreadConfig(GradientConfig(nothing, x, chunk)), GradientConfig(nothing, x, chunk))
 
 info("Deprecation testing is now complete, so any further deprecation warnings are real.")
 
diff --git a/test/HessianTest.jl b/test/HessianTest.jl
index cb16271d..5d697b16 100644
--- a/test/HessianTest.jl
+++ b/test/HessianTest.jl
@@ -22,7 +22,7 @@ h = [-66.0  -40.0    0.0;
 for c in (1, 2, 3), tag in (nothing, f)
     println("  ...running hardcoded test with chunk size = $c and tag = $tag")
     cfg = ForwardDiff.HessianConfig(tag, x, ForwardDiff.Chunk{c}())
-    resultcfg = ForwardDiff.HessianConfig(DiffBase.HessianResult(x), tag, x, ForwardDiff.Chunk{c}())
+    resultcfg = ForwardDiff.HessianConfig(tag, DiffBase.HessianResult(x), x, ForwardDiff.Chunk{c}())
 
     @test isapprox(h, ForwardDiff.hessian(f, x))
     @test isapprox(h, ForwardDiff.hessian(f, x, cfg))
@@ -61,7 +61,7 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
     for c in CHUNK_SIZES, tag in (nothing, f)
         println("  ...testing $f with chunk size = $c and tag = $tag")
         cfg = ForwardDiff.HessianConfig(tag, X, ForwardDiff.Chunk{c}())
-        resultcfg = ForwardDiff.HessianConfig(DiffBase.HessianResult(X), tag, X, ForwardDiff.Chunk{c}())
+        resultcfg = ForwardDiff.HessianConfig(tag, DiffBase.HessianResult(X), X, ForwardDiff.Chunk{c}())
 
         out = ForwardDiff.hessian(f, X, cfg)
         @test isapprox(out, h)
diff --git a/test/runtests.jl b/test/runtests.jl
index 4e509e7c..ede73646 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -42,7 +42,7 @@ if Base.JLOptions().opt_level >= 3
     println("done (took $(toq()) seconds).")
 end
 
-# println("Testing deprecations...")
-# tic()
-# include("DeprecatedTest.jl")
-# println("done (took $(toq()) seconds).")
+println("Testing deprecations...")
+tic()
+include("DeprecatedTest.jl")
+println("done (took $(toq()) seconds).")

From ee11abef272cd98281bc9c115a7bdaeedc655cb4 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 12 Apr 2017 14:08:16 -0400
Subject: [PATCH 21/26] update benchmarks

---
 .../benchmarks.jl                                      | 10 +++-------
 {benchmark => benchmarks}/cpp/.gitignore               |  0
 {benchmark => benchmarks}/cpp/Makefile                 |  0
 {benchmark => benchmarks}/cpp/benchmarks.cpp           |  0
 {benchmark => benchmarks}/cpp/benchmarks.h             |  0
 {benchmark => benchmarks}/cpp/dual1.cpp                |  0
 {benchmark => benchmarks}/cpp/dual2.cpp                |  0
 {benchmark => benchmarks}/cpp/dual3.cpp                |  0
 {benchmark => benchmarks}/cpp/dual4.cpp                |  0
 {benchmark => benchmarks}/cpp/dual5.cpp                |  0
 {benchmark => benchmarks}/py/algopy_benchmarks.py      |  0
 {benchmark => benchmarks}/py/autograd_benchmarks.py    |  0
 12 files changed, 3 insertions(+), 7 deletions(-)
 rename benchmark/ForwardDiffBenchmarks.jl => benchmarks/benchmarks.jl (92%)
 rename {benchmark => benchmarks}/cpp/.gitignore (100%)
 rename {benchmark => benchmarks}/cpp/Makefile (100%)
 rename {benchmark => benchmarks}/cpp/benchmarks.cpp (100%)
 rename {benchmark => benchmarks}/cpp/benchmarks.h (100%)
 rename {benchmark => benchmarks}/cpp/dual1.cpp (100%)
 rename {benchmark => benchmarks}/cpp/dual2.cpp (100%)
 rename {benchmark => benchmarks}/cpp/dual3.cpp (100%)
 rename {benchmark => benchmarks}/cpp/dual4.cpp (100%)
 rename {benchmark => benchmarks}/cpp/dual5.cpp (100%)
 rename {benchmark => benchmarks}/py/algopy_benchmarks.py (100%)
 rename {benchmark => benchmarks}/py/autograd_benchmarks.py (100%)

diff --git a/benchmark/ForwardDiffBenchmarks.jl b/benchmarks/benchmarks.jl
similarity index 92%
rename from benchmark/ForwardDiffBenchmarks.jl
rename to benchmarks/benchmarks.jl
index 88312f5c..bc576885 100644
--- a/benchmark/ForwardDiffBenchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,5 +1,3 @@
-module ForwardDiffBenchmarks
-
 using ForwardDiff, DiffBase
 using BenchmarkTools
 
@@ -39,11 +37,11 @@ for f in (DiffBase.VECTOR_TO_NUMBER_FUNCS..., DiffBase.MATRIX_TO_NUMBER_FUNCS...
         fval[length(x)] = @benchmarkable $(f)($x)
 
         gout = DiffBase.DiffResult(y, similar(x, typeof(y)))
-        gcfg = ForwardDiff.Config(x)
+        gcfg = ForwardDiff.GradientConfig(nothing, x)
         fgrad[length(x)] = @benchmarkable ForwardDiff.gradient!($gout, $f, $x, $gcfg)
 
         hout = DiffBase.DiffResult(y, similar(x, typeof(y)), similar(x, typeof(y), length(x), length(x)))
-        hcfg = ForwardDiff.HessianConfig(hout, x)
+        hcfg = ForwardDiff.HessianConfig(nothing, hout, x)
         fhess[length(x)] = @benchmarkable ForwardDiff.hessian!($hout, $f, $x, $hcfg)
     end
 end
@@ -56,9 +54,7 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
         fval[length(x)] = @benchmarkable $(f)($x)
 
         out = DiffBase.JacobianResult(y, x)
-        cfg = ForwardDiff.Config(x)
+        cfg = ForwardDiff.JacobianConfig(nothing, y, x)
         fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $x, $cfg)
     end
 end
-
-end # module
diff --git a/benchmark/cpp/.gitignore b/benchmarks/cpp/.gitignore
similarity index 100%
rename from benchmark/cpp/.gitignore
rename to benchmarks/cpp/.gitignore
diff --git a/benchmark/cpp/Makefile b/benchmarks/cpp/Makefile
similarity index 100%
rename from benchmark/cpp/Makefile
rename to benchmarks/cpp/Makefile
diff --git a/benchmark/cpp/benchmarks.cpp b/benchmarks/cpp/benchmarks.cpp
similarity index 100%
rename from benchmark/cpp/benchmarks.cpp
rename to benchmarks/cpp/benchmarks.cpp
diff --git a/benchmark/cpp/benchmarks.h b/benchmarks/cpp/benchmarks.h
similarity index 100%
rename from benchmark/cpp/benchmarks.h
rename to benchmarks/cpp/benchmarks.h
diff --git a/benchmark/cpp/dual1.cpp b/benchmarks/cpp/dual1.cpp
similarity index 100%
rename from benchmark/cpp/dual1.cpp
rename to benchmarks/cpp/dual1.cpp
diff --git a/benchmark/cpp/dual2.cpp b/benchmarks/cpp/dual2.cpp
similarity index 100%
rename from benchmark/cpp/dual2.cpp
rename to benchmarks/cpp/dual2.cpp
diff --git a/benchmark/cpp/dual3.cpp b/benchmarks/cpp/dual3.cpp
similarity index 100%
rename from benchmark/cpp/dual3.cpp
rename to benchmarks/cpp/dual3.cpp
diff --git a/benchmark/cpp/dual4.cpp b/benchmarks/cpp/dual4.cpp
similarity index 100%
rename from benchmark/cpp/dual4.cpp
rename to benchmarks/cpp/dual4.cpp
diff --git a/benchmark/cpp/dual5.cpp b/benchmarks/cpp/dual5.cpp
similarity index 100%
rename from benchmark/cpp/dual5.cpp
rename to benchmarks/cpp/dual5.cpp
diff --git a/benchmark/py/algopy_benchmarks.py b/benchmarks/py/algopy_benchmarks.py
similarity index 100%
rename from benchmark/py/algopy_benchmarks.py
rename to benchmarks/py/algopy_benchmarks.py
diff --git a/benchmark/py/autograd_benchmarks.py b/benchmarks/py/autograd_benchmarks.py
similarity index 100%
rename from benchmark/py/autograd_benchmarks.py
rename to benchmarks/py/autograd_benchmarks.py

From bfd5baf6bec777353461daf183eebfcf4efcfcbb Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 12 Apr 2017 14:25:44 -0400
Subject: [PATCH 22/26] fix tag pretty printing for symbols

---
 src/dual.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dual.jl b/src/dual.jl
index 7e3ec4c9..11669c08 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -523,7 +523,7 @@ end
 ###################
 
 function Base.show(io::IO, d::Dual{T,V,N}) where {T,V,N}
-    print(io, "Dual{$T}(", value(d))
+    print(io, "Dual{$(repr(T))}(", value(d))
     for i in 1:N
         print(io, ",", partials(d, i))
     end

From e708edbec34a5ec60a790a7e343f880128460211 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 12 Apr 2017 15:01:50 -0400
Subject: [PATCH 23/26] change T to V in Partials code to avoid confusion with
 tag parameter

---
 src/partials.jl | 56 ++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/partials.jl b/src/partials.jl
index b8ce0352..1485364a 100644
--- a/src/partials.jl
+++ b/src/partials.jl
@@ -1,21 +1,21 @@
-immutable Partials{N,T} <: AbstractVector{T}
-    values::NTuple{N,T}
+immutable Partials{N,V} <: AbstractVector{V}
+    values::NTuple{N,V}
 end
 
 ##############################
 # Utility/Accessor Functions #
 ##############################
 
-@generated function single_seed(::Type{Partials{N,T}}, ::Type{Val{i}}) where {N,T,i}
-    ex = Expr(:tuple, [ifelse(i === j, :(one(T)), :(zero(T))) for j in 1:N]...)
+@generated function single_seed(::Type{Partials{N,V}}, ::Type{Val{i}}) where {N,V,i}
+    ex = Expr(:tuple, [ifelse(i === j, :(one(V)), :(zero(V))) for j in 1:N]...)
     return :(Partials($(ex)))
 end
 
-@inline valtype(::Partials{N,T}) where {N,T} = T
-@inline valtype(::Type{Partials{N,T}}) where {N,T} = T
+@inline valtype(::Partials{N,V}) where {N,V} = V
+@inline valtype(::Type{Partials{N,V}}) where {N,V} = V
 
 @inline npartials(::Partials{N}) where {N} = N
-@inline npartials(::Type{Partials{N,T}}) where {N,T} = N
+@inline npartials(::Type{Partials{N,V}}) where {N,V} = N
 
 @inline Base.length(::Partials{N}) where {N} = N
 @inline Base.size(::Partials{N}) where {N} = (N,)
@@ -35,15 +35,15 @@ Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
 @inline iszero(partials::Partials) = iszero_tuple(partials.values)
 
 @inline Base.zero(partials::Partials) = zero(typeof(partials))
-@inline Base.zero(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(zero_tuple(NTuple{N,T}))
+@inline Base.zero(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(zero_tuple(NTuple{N,V}))
 
 @inline Base.one(partials::Partials) = one(typeof(partials))
-@inline Base.one(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(one_tuple(NTuple{N,T}))
+@inline Base.one(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(one_tuple(NTuple{N,V}))
 
 @inline Base.rand(partials::Partials) = rand(typeof(partials))
-@inline Base.rand(::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(rand_tuple(NTuple{N,T}))
+@inline Base.rand(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(rand_tuple(NTuple{N,V}))
 @inline Base.rand(rng::AbstractRNG, partials::Partials) = rand(rng, typeof(partials))
-@inline Base.rand(rng::AbstractRNG, ::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(rand_tuple(rng, NTuple{N,T}))
+@inline Base.rand(rng::AbstractRNG, ::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(rand_tuple(rng, NTuple{N,V}))
 
 Base.isequal(a::Partials{N}, b::Partials{N}) where {N} = isequal(a.values, b.values)
 Base.:(==)(a::Partials{N}, b::Partials{N}) where {N} = a.values == b.values
@@ -55,7 +55,7 @@ Base.hash(partials::Partials, hsh::UInt64) = hash(hash(partials), hsh)
 
 @inline Base.copy(partials::Partials) = partials
 
-Base.read(io::IO, ::Type{Partials{N,T}}) where {N,T} = Partials{N,T}(ntuple(i->read(io, T), Val{N}))
+Base.read(io::IO, ::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(ntuple(i->read(io, V), Val{N}))
 
 function Base.write(io::IO, partials::Partials)
     for p in partials
@@ -69,8 +69,8 @@ end
 
 Base.promote_rule(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) where {N,A,B} = Partials{N,promote_type(A, B)}
 
-Base.convert(::Type{Partials{N,T}}, partials::Partials) where {N,T} = Partials{N,T}(partials.values)
-Base.convert(::Type{Partials{N,T}}, partials::Partials{N,T}) where {N,T} = partials
+Base.convert(::Type{Partials{N,V}}, partials::Partials) where {N,V} = Partials{N,V}(partials.values)
+Base.convert(::Type{Partials{N,V}}, partials::Partials{N,V}) where {N,V} = partials
 
 ########################
 # Arithmetic Functions #
@@ -123,10 +123,10 @@ end
 
 @inline Base.:+(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
 @inline Base.:-(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
-@inline Base.:-(partials::Partials{0,T}) where {T} = partials
-@inline Base.:*(partials::Partials{0,T}, x::Real) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:*(x::Real, partials::Partials{0,T}) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:/(partials::Partials{0,T}, x::Real) where {T} = Partials{0,promote_type(T,typeof(x))}(tuple())
+@inline Base.:-(partials::Partials{0,V}) where {V} = partials
+@inline Base.:*(partials::Partials{0,V}, x::Real) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
+@inline Base.:*(x::Real, partials::Partials{0,V}) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
+@inline Base.:/(partials::Partials{0,V}, x::Real) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
 
 @inline _mul_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
 @inline _div_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
@@ -154,37 +154,37 @@ end
 @inline rand_tuple(::AbstractRNG, ::Type{Tuple{}}) = tuple()
 @inline rand_tuple(::Type{Tuple{}}) = tuple()
 
-@generated function iszero_tuple(tup::NTuple{N,T}) where {N,T}
+@generated function iszero_tuple(tup::NTuple{N,V}) where {N,V}
     ex = Expr(:&&, [:(z == tup[$i]) for i=1:N]...)
     return quote
-        z = zero(T)
+        z = zero(V)
         $(Expr(:meta, :inline))
         @inbounds return $ex
     end
 end
 
-@generated function zero_tuple(::Type{NTuple{N,T}}) where {N,T}
+@generated function zero_tuple(::Type{NTuple{N,V}}) where {N,V}
     ex = tupexpr(i -> :(z), N)
     return quote
-        z = zero(T)
+        z = zero(V)
         return $ex
     end
 end
 
-@generated function one_tuple(::Type{NTuple{N,T}}) where {N,T}
+@generated function one_tuple(::Type{NTuple{N,V}}) where {N,V}
     ex = tupexpr(i -> :(z), N)
     return quote
-        z = one(T)
+        z = one(V)
         return $ex
     end
 end
 
-@generated function rand_tuple(rng::AbstractRNG, ::Type{NTuple{N,T}}) where {N,T}
-    return tupexpr(i -> :(rand(rng, T)), N)
+@generated function rand_tuple(rng::AbstractRNG, ::Type{NTuple{N,V}}) where {N,V}
+    return tupexpr(i -> :(rand(rng, V)), N)
 end
 
-@generated function rand_tuple(::Type{NTuple{N,T}}) where {N,T}
-    return tupexpr(i -> :(rand(T)), N)
+@generated function rand_tuple(::Type{NTuple{N,V}}) where {N,V}
+    return tupexpr(i -> :(rand(V)), N)
 end
 
 @generated function scale_tuple(tup::NTuple{N}, x) where N

From 84ae3f4a01e17a7ff6608598c907a27a4607508c Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 12 Apr 2017 15:17:10 -0400
Subject: [PATCH 24/26] fix jacobian call in benchmarks

---
 benchmarks/benchmarks.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index bc576885..75523594 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -55,6 +55,6 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
 
         out = DiffBase.JacobianResult(y, x)
         cfg = ForwardDiff.JacobianConfig(nothing, y, x)
-        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $x, $cfg)
+        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $y, $x, $cfg)
     end
 end

From 065f07943fd02be6e225a33b55cc837f1d2def9e Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Wed, 12 Apr 2017 15:25:25 -0400
Subject: [PATCH 25/26] update docs

---
 docs/_rst/source/advanced_usage.rst |  79 ++++++++++---------
 docs/_rst/source/basic_api.rst      |  99 ++++++++++++------------
 docs/_rst/source/conf.py            |   4 +-
 docs/_rst/source/contributing.rst   |   4 +-
 docs/_rst/source/how_it_works.rst   |  28 +++----
 docs/_rst/source/install.rst        |   2 +-
 docs/_rst/source/limitations.rst    |   3 -
 docs/_rst/source/upgrade.rst        |  42 ++++++-----
 docs/_sources/advanced_usage.txt    |  79 ++++++++++---------
 docs/_sources/basic_api.txt         |  99 ++++++++++++------------
 docs/_sources/contributing.txt      |   4 +-
 docs/_sources/how_it_works.txt      |  28 +++----
 docs/_sources/install.txt           |   2 +-
 docs/_sources/limitations.txt       |   3 -
 docs/_sources/upgrade.txt           |  42 ++++++-----
 docs/advanced_usage.html            |  86 ++++++++++-----------
 docs/basic_api.html                 | 113 ++++++++++++++--------------
 docs/contributing.html              |  12 ++-
 docs/genindex.html                  |  18 +++--
 docs/how_it_works.html              |  35 ++++-----
 docs/index.html                     |   8 +-
 docs/install.html                   |  10 +--
 docs/limitations.html               |   9 +--
 docs/objects.inv                    |  10 ++-
 docs/search.html                    |   8 +-
 docs/searchindex.js                 |   2 +-
 docs/upgrade.html                   |  50 ++++++------
 27 files changed, 448 insertions(+), 431 deletions(-)

diff --git a/docs/_rst/source/advanced_usage.rst b/docs/_rst/source/advanced_usage.rst
index 576cc4ee..df0c887d 100644
--- a/docs/_rst/source/advanced_usage.rst
+++ b/docs/_rst/source/advanced_usage.rst
@@ -9,7 +9,7 @@ Accessing Lower-Order Results
 
 Let's say you want to calculate the value, gradient, and Hessian of some function ``f`` at
 an input ``x``. You could execute ``f(x)``, ``ForwardDiff.gradient(f, x)`` and
-``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to  accomplish
+``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to accomplish
 this task!**
 
 In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
@@ -37,7 +37,7 @@ For example:
 
 .. code-block:: julia
 
-    julia> import ForwardDiff
+    julia> using ForwardDiff: GradientConfig, Chunk, gradient!
 
     # let's use a Rosenbrock function as our target function
     julia> function rosenbrock(x)
@@ -58,25 +58,25 @@ For example:
     julia> out = similar(x);
 
     # construct GradientConfig with chunk size of 1
-    julia> cfg1 = ForwardDiff.GradientConfig{1}(x);
+    julia> cfg1 = GradientConfig(rosenbrock, x, Chunk{1}());
 
     # construct GradientConfig with chunk size of 4
-    julia> cfg4 = ForwardDiff.GradientConfig{4}(x);
+    julia> cfg4 = GradientConfig(rosenbrock, x, Chunk{4}());
 
     # construct GradientConfig with chunk size of 10
-    julia> cfg10 = ForwardDiff.GradientConfig{10}(x);
+    julia> cfg10 = GradientConfig(rosenbrock, x, Chunk{10}());
 
     # (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg1);
-      0.408305 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg1);
+      0.775139 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg4);
-      0.295764 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg4);
+      0.386459 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg10);
-      0.267396 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg10);
+      0.282529 seconds (4 allocations: 160 bytes)
 
 If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:
@@ -85,10 +85,10 @@ based on your input vector:
 
     # The GradientConfig constructor will automatically select a
     # chunk size in one is not explicitly provided
-    julia> cfg = ForwardDiff.GradientConfig(x);
+    julia> cfg = ForwardDiff.GradientConfig(rosenbrock, x);
 
     julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg);
-    0.266920 seconds (4 allocations: 160 bytes)
+      0.281853 seconds (4 allocations: 160 bytes)
 
 If your input dimension is a constant, you should explicitly select a chunk size rather than
 relying on ForwardDiff's heuristic. There are two reasons for this. The first is that
@@ -130,8 +130,8 @@ aren't sensitive to the input and thus cause ForwardDiff to incorrectly return `
 
     # the dual number's perturbation component is zero, so this
     # variable should not propagate derivative information
-    julia> log(ForwardDiff.Dual(0.0, 0.0))
-    Dual(-Inf,NaN) # oops, this NaN should be 0.0
+    julia> log(ForwardDiff.Dual{:tag}(0.0, 0.0))
+    Dual{:tag}(-Inf,NaN) # oops, this NaN should be 0.0
 
 Here, ForwardDiff computes the derivative of ``log(0.0)`` as ``NaN`` and then propagates
 this derivative by multiplying it by the perturbation component. Usually, ForwardDiff can
@@ -153,7 +153,6 @@ In the future, we plan on allowing users and downstream library authors to dynam
 enable ``NaN``-safe mode via the ``AbstractConfig`` API (see `the relevant issue
 <https://github.com/JuliaDiff/ForwardDiff.jl/issues/181>`_).
 
-
 Hessian of a vector-valued function
 -----------------------------------
 
@@ -163,17 +162,17 @@ For example:
 
 .. code-block:: julia
 
-    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(sin, x), [1,2,3])
-    9×3 Array{Float64,2}:
-     -0.841471   0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.909297  -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.14112
+    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(cumprod, x), [1,2,3])
+    9×3 Array{Int64,2}:
+     0  0  0
+     0  1  0
+     0  3  2
+     0  0  0
+     1  0  0
+     3  0  1
+     0  0  0
+     0  0  0
+     2  1  0
 
 Since this functionality is composed from ForwardDiff's existing API rather than built into
 it, you're free to construct a ``vector_hessian`` function which suits your needs. For
@@ -190,22 +189,22 @@ expensive operation):
        end
     vector_hessian (generic function with 1 method)
 
-    julia> vector_hessian(sin, [1, 2, 3])
-    3×3×3 Array{Float64,3}:
+    julia> vector_hessian(cumprod, [1, 2, 3])
+    3×3×3 Array{Int64,3}:
     [:, :, 1] =
-     -0.841471   0.0   0.0
-     -0.0       -0.0  -0.0
-     -0.0       -0.0  -0.0
+     0  0  0
+     0  1  0
+     0  3  2
 
     [:, :, 2] =
-      0.0   0.0        0.0
-     -0.0  -0.909297  -0.0
-     -0.0  -0.0       -0.0
+     0  0  0
+     1  0  0
+     3  0  1
 
     [:, :, 3] =
-      0.0   0.0   0.0
-     -0.0  -0.0  -0.0
-     -0.0  -0.0  -0.14112
+     0  0  0
+     0  0  0
+     2  1  0
 
 Likewise, you could write a version of ``vector_hessian`` which supports functions of the
 form ``f!(y, x)``, or perhaps an in-place Jacobian with ``ForwardDiff.jacobian!``.
@@ -232,10 +231,10 @@ SIMD instructions (i.e. not starting Julia with ``-O3``):
     julia> using ForwardDiff: Dual
 
     julia> a = Dual(1., 2., 3., 4.)
-    Dual(1.0,2.0,3.0,4.0)
+    Dual{Void}(1.0,2.0,3.0,4.0)
 
     julia> b = Dual(5., 6., 7., 8.)
-    Dual(5.0,6.0,7.0,8.0)
+    Dual{Void}(5.0,6.0,7.0,8.0)
 
     julia> @code_llvm a + b
 
diff --git a/docs/_rst/source/basic_api.rst b/docs/_rst/source/basic_api.rst
index 28eb6392..96115abd 100644
--- a/docs/_rst/source/basic_api.rst
+++ b/docs/_rst/source/basic_api.rst
@@ -4,22 +4,26 @@ Basic ForwardDiff API
 Derivatives of :math:`f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}`
 --------------------------------------------------------------------------------------------------
 
-Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real)::Real`` and ``f(::Real)::AbstractArray``.
+Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real...)::Real`` and ``f(::Real...)::AbstractArray``.
 
 .. function:: ForwardDiff.derivative!(out, f, x)
 
-    Compute :math:`f'(x)`, storing the output in ``out``.
+    Compute :math:`f'(x)`, storing the output in ``out``. If ``x`` is a ``Tuple``,
+    then ``f`` will be called as ``f(x...)`` and the derivatives with respect to
+    each element in `x` will be stored in the respective element of ``out`` (which
+    should also be a ``Tuple``).
 
 .. function:: ForwardDiff.derivative(f, x)
 
-    Compute and return :math:`f'(x)`.
+    Compute and return :math:`f'(x)`. If ``x`` is a ``Tuple``, ``f`` will be
+    called as ``f(x...)``, and a ``Tuple`` of derivatives will be returned.
 
 Gradients of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k} \to \mathbb{R}`
 ------------------------------------------------------------------------------------------------
 
 Use ``ForwardDiff.gradient`` to differentiate functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))
+.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))
 
     Compute :math:`\nabla f(\vec{x})`, storing the output in ``out``. It is highly advised
     to preallocate ``cfg`` yourself (see the `AbstractConfig
@@ -34,23 +38,23 @@ Jacobians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.jacobian`` to differentiate functions of the form ``f(::AbstractArray)::AbstractArray``.
 
-.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be called as
     ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``. The output
     matrix is stored in ``out``.
 
-.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`.
 
-.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be
     called as ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``.
@@ -60,13 +64,13 @@ Hessians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.hessian`` to perform second-order differentiation on functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute :math:`\mathbf{H}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute and return :math:`\mathbf{H}(f)(\vec{x})`.
 
@@ -77,66 +81,63 @@ For the sake of convenience and performance, all "extra" information used by For
 API methods is bundled up in the ``ForwardDiff.AbstractConfig`` family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff's  API
 methods, such as `chunk size <advanced_usage.html#configuring-chunk-size>`_, work buffers,
-multithreading configurations, and perturbation seed configurations.
+and perturbation seed configurations.
 
 ForwardDiff's basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.
 
-Note that for all constructors below, the chunk size ``N`` may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to `specify the chunk size manually when possible
+Note that for all constructors below, the chunk size ``N`` may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to `specify the chunk size manually when possible
 <advanced_usage.html#configuring-chunk-size>`_.
 
-.. function:: ForwardDiff.GradientConfig{N}(x)
+Note also that configurations constructed for a specific function ``f`` cannot
+be reused to differentiate other functions (though can be reused to differentiate
+``f`` at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass ``nothing`` as the function argument.
+While this is more flexible, this decreases ForwardDiff's ability to catch
+and prevent `perturbation confusion`_.
 
-    Construct a ``GradientConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``GradientConfig`` instance contains all the work buffers required
-    by ForwardDiff's gradient/Jacobian methods. If taking the Jacobian of a target function
-    with the form ``f!(y, x)``, use the constructor ``ForwardDiff.GradientConfig{N}(y, x)``
-    instead.
+.. function:: ForwardDiff.GradientConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
+
+    Construct a ``GradientConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``GradientConfig``
+    instance contains all the work buffers required by ForwardDiff's gradient
+    methods.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.JacobianConfig{N}(x)
+.. function:: ForwardDiff.JacobianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Exactly like ``ForwardDiff.GradientConfig{N}(x)``, but returns a `JacobianConfig`
-    instead.
+    Exactly like the ``GradientConfig`` constructor, but returns a ``JacobianConfig`` instead.
 
-.. function:: ForwardDiff.JacobianConfig{N}(y, x)
+.. function:: ForwardDiff.JacobianConfig(f!, y, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``JacobianConfig`` instance based on the type and shape of the output vector
-    ``y`` and the input vector ``x``. The returned ``JacobianConfig`` instance contains all
-    the work buffers required by  ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` with a
-    target function of the form ``f!(y, x)``.
+    Construct a ``JacobianConfig`` instance based on the type of ``f!``, and the
+    types/shapes of the output vector ``y`` and the input vector ``x``. The
+    returned ``JacobianConfig`` instance contains all the work buffers required
+    by ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` when the target
+    function takes the form ``f!(y, x)``.
 
     This constructor does not store/modify ``y`` or ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(x)
+.. function:: ForwardDiff.HessianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``HessianConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``HessianConfig`` instance contains all the work buffers required
-    by ForwardDiff's Hessian methods. If using
-    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``, use the constructor
-    ``ForwardDiff.HessianConfig{N}(out, x)`` instead.
+    Construct a ``HessianConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``HessianConfig`` instance contains
+    all the work buffers required by ForwardDiff's Hessian methods. If using
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, f, x)``, use the constructor
+    ``ForwardDiff.HessianConfig(f, out, x, chunk)`` instead.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)
+.. function:: ForwardDiff.HessianConfig(f, out::DiffBase.DiffResult, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct an ``HessianConfig`` instance based on the type and shape of the storage in
-    ``out`` and the input vector ``x``. The returned ``HessianConfig`` instance contains
-    all the work buffers required by ``ForwardDiff.hessian!(out::DiffBase.DiffResult,
-    args...)``.
+    Construct an ``HessianConfig`` instance based on the type of ``f``, types/storage
+    in ``out``, and type/shape of the input vector ``x``. The returned ``HessianConfig``
+    instance contains all the work buffers required by
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``.
 
     This constructor does not store/modify ``out`` or ``x``.
 
-.. function:: ForwardDiff.MultithreadConfig(cfg::AbstractConfig)
-
-    Wrap the given ``cfg`` in a ``MultithreadConfig`` instance, which can then be passed to
-    gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-    methods do not yet support multithreading.
-
-    Note that multithreaded ForwardDiff API methods will attempt to use all available
-    threads. In the future, once Julia exposes more fine-grained threading primitives,
-    a ``MultithreadConfig`` constructor may be added which takes in a user-provided subset
-    of thread IDs instead of using all available threads.
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
diff --git a/docs/_rst/source/conf.py b/docs/_rst/source/conf.py
index 0f4ba663..6e45536b 100644
--- a/docs/_rst/source/conf.py
+++ b/docs/_rst/source/conf.py
@@ -57,9 +57,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.2'
+version = '0.5'
 # The full version, including alpha/beta/rc tags.
-release = '0.2.3'
+release = '0.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/_rst/source/contributing.rst b/docs/_rst/source/contributing.rst
index cb669228..7de30bdc 100644
--- a/docs/_rst/source/contributing.rst
+++ b/docs/_rst/source/contributing.rst
@@ -40,9 +40,7 @@ To see a list of functions to pick from, look at ``ForwardDiff.AUTO_DEFINED_UNAR
      ⋮
 
 Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to ``src/dual.jl``, scroll down to the ``Special Cases`` section, and
-look at the functions under ``Manually Optimized`` (further optimizations to these functions
-are always welcome, if you can come up with something clever).
+already been done, go to ``src/dual.jl`` and scroll down to the ``Special Cases`` section.
 
 The functions in ``ForwardDiff.AUTO_DEFINED_UNARY_FUNCS`` are automatically tested as part
 of ForwardDiff's test suite, so you don't need to write tests yourself. You can test your
diff --git a/docs/_rst/source/how_it_works.rst b/docs/_rst/source/how_it_works.rst
index 20a96e49..dd8114cb 100644
--- a/docs/_rst/source/how_it_works.rst
+++ b/docs/_rst/source/how_it_works.rst
@@ -9,25 +9,26 @@ Julia. There are two key components of this implementation: the ``Dual`` type, a
 Dual Number Implementation
 --------------------------
 
-Partial derivatives are stored in the ``Partials{N,T}`` type:
+Partial derivatives are stored in the ``Partials{N,V}`` type:
 
 .. code-block:: julia
 
-    immutable Partials{N,T}
-        values::NTuple{N,T}
+    struct Partials{N,V} <: AbstractVector{V}
+        values::NTuple{N,V}
     end
 
-Overtop of this container type, ForwardDiff implements the ``Dual{N,T}`` type:
+Overtop of this container type, ForwardDiff implements the ``Dual{T,V,N}`` type:
 
 .. code-block:: julia
 
-    immutable Dual{N,T<:Real} <: Real
-        value::T
-        partials::Partials{N,T}
+    struct Dual{T,V<:Real,N} <: Real
+        value::V
+        partials::Partials{N,V}
     end
 
-This type represents an ``N``-dimensional `dual number`_ with the following mathematical
-behavior:
+This type represents an ``N``-dimensional `dual number`_ coupled with a tag
+parameter `T` in order to prevent `perturbation confusion`_. This dual number
+type is implemented to have the following mathematical behavior:
 
 .. math::
 
@@ -44,22 +45,23 @@ can be overloaded on ``Dual`` like so:
 
 .. code-block:: julia
 
-    Base.sin(d::Dual) = Dual(sin(value(d)), cos(value(d)) * partials(d))
+    Base.sin(d::Dual{T}) where {T} = Dual{T}(sin(value(d)), cos(value(d)) * partials(d))
 
 If we assume that a general function ``f`` is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a ``Dual`` number and looking at the output.
 
-We won't dicuss higher-order differentiation in detail, but the reader is encouraged to
+We won't discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about `hyper-dual numbers`_, which extend dual numbers to higher orders by introducing
 extra :math:`\epsilon` terms that can cross-multiply. ForwardDiff's ``Dual`` number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the ``Dual`` type to nest within each other. For example, a second-order
-hyper-dual number has the type ``Dual{N,Dual{N,T}}``, a third-order hyper-dual number has
-the type ``Dual{N,Dual{N,Dual{N,T}}}``, and so on.
+hyper-dual number has the type ``Dual{T,Dual{S,V,M},N}``, a third-order hyper-dual number has
+the type ``Dual{T,Dual{S,Dual{R,V,K},M},N}``, and so on.
 
 .. _`dual number`: https://en.wikipedia.org/wiki/Dual_number
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`hyper-dual numbers`: https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf
 
 ForwardDiff's API
diff --git a/docs/_rst/source/install.rst b/docs/_rst/source/install.rst
index bb7668d6..4f3573b6 100644
--- a/docs/_rst/source/install.rst
+++ b/docs/_rst/source/install.rst
@@ -7,4 +7,4 @@ To install ForwardDiff, simply use Julia's package manager:
 
     julia> Pkg.add("ForwardDiff")
 
-The current version of ForwardDiff supports Julia v0.4 and v0.5.
+The current version of ForwardDiff supports Julia v0.6.
diff --git a/docs/_rst/source/limitations.rst b/docs/_rst/source/limitations.rst
index 9862d12b..65ecb75a 100644
--- a/docs/_rst/source/limitations.rst
+++ b/docs/_rst/source/limitations.rst
@@ -12,10 +12,7 @@ function being differentiated):
 
 - **The target function must be written generically enough to accept numbers of type ``T<:Real`` as input  (or arrays of these numbers).** The function doesn't require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see `this comment`_ for an example).
 
-- **Nested differentiation of closures is dangerous.** Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See `the relevant issue`_ for details.
-
 - **The types of array inputs must be subtypes of** ``AbstractArray`` **.** Non-``AbstractArray`` array-like types are not officially supported.
 
 .. _`this comment`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790
-.. _`the relevant issue`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`this file`: https://github.com/JuliaDiff/ForwardDiff.jl/blob/master/src/cache.jl
diff --git a/docs/_rst/source/upgrade.rst b/docs/_rst/source/upgrade.rst
index 5fc2ef82..a0afb6af 100644
--- a/docs/_rst/source/upgrade.rst
+++ b/docs/_rst/source/upgrade.rst
@@ -14,11 +14,11 @@ functions to reference them:
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     using ForwardDiff
     hessian(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     using ForwardDiff
     ForwardDiff.hessian(f, x)
 
@@ -27,26 +27,32 @@ Setting Chunk Size
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.gradient(f, x; chunk_size = 10)
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     ForwardDiff.gradient(f, x, Chunk{10}())
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{10}(x))
 
+    # ForwardDiff v0.5 & above
+    ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig(f, x ForwardDiff.Chunk{N}()))
+
 Enabling Multithreading
 -----------------------
 
 .. code-block:: julia
 
-    # old v0.1/v0.2 style
+    # ForwardDiff v0.1 & v0.2
     ForwardDiff.gradient(f, x; multithread = true)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig(x)))
 
+    # ForwardDiff v0.5 & above
+    error("ForwardDiff no longer supports internal multithreading.")
+
 Retrieving Lower-Order Results
 ------------------------------
 
@@ -55,20 +61,20 @@ For more detail, see our documentation on `retrieving lower-order results
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     answer, results = ForwardDiff.hessian(f, x, AllResults)
     v = ForwardDiff.value(results)
     g = ForwardDiff.gradient(results)
     h = ForwardDiff.hessian(results) # == answer
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     out = HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
     v = ForwardDiff.value(out)
     g = ForwardDiff.gradient(out)
     h = ForwardDiff.hessian(out)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & above
     using DiffBase
     out = DiffBase.HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
@@ -86,10 +92,10 @@ derivatives by composing existing API functions. For example, here's how to reim
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.tensor(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     function tensor(f, x)
         n = length(x)
         out = ForwardDiff.jacobian(y -> ForwardDiff.hessian(f, y), x)
@@ -108,26 +114,26 @@ ForwardDiff's API functions, see `our API documentation <basic_api.html>`_.
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     df = ForwardDiff.derivative(f)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     df = x -> ForwardDiff.derivative(f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place gradient function of f
     gf! = ForwardDiff.gradient(f, mutates = true)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     gf! = (out, x) -> ForwardDiff.gradient!(out, f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place Jacobian function of f!(y, x):
     jf! = ForwardDiff.jacobian(f!, mutates = true, output_length = length(y))
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     jf! = (out, y, x) -> ForwardDiff.jacobian!(out, f!, y, x)
diff --git a/docs/_sources/advanced_usage.txt b/docs/_sources/advanced_usage.txt
index 576cc4ee..df0c887d 100644
--- a/docs/_sources/advanced_usage.txt
+++ b/docs/_sources/advanced_usage.txt
@@ -9,7 +9,7 @@ Accessing Lower-Order Results
 
 Let's say you want to calculate the value, gradient, and Hessian of some function ``f`` at
 an input ``x``. You could execute ``f(x)``, ``ForwardDiff.gradient(f, x)`` and
-``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to  accomplish
+``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to accomplish
 this task!**
 
 In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
@@ -37,7 +37,7 @@ For example:
 
 .. code-block:: julia
 
-    julia> import ForwardDiff
+    julia> using ForwardDiff: GradientConfig, Chunk, gradient!
 
     # let's use a Rosenbrock function as our target function
     julia> function rosenbrock(x)
@@ -58,25 +58,25 @@ For example:
     julia> out = similar(x);
 
     # construct GradientConfig with chunk size of 1
-    julia> cfg1 = ForwardDiff.GradientConfig{1}(x);
+    julia> cfg1 = GradientConfig(rosenbrock, x, Chunk{1}());
 
     # construct GradientConfig with chunk size of 4
-    julia> cfg4 = ForwardDiff.GradientConfig{4}(x);
+    julia> cfg4 = GradientConfig(rosenbrock, x, Chunk{4}());
 
     # construct GradientConfig with chunk size of 10
-    julia> cfg10 = ForwardDiff.GradientConfig{10}(x);
+    julia> cfg10 = GradientConfig(rosenbrock, x, Chunk{10}());
 
     # (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg1);
-      0.408305 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg1);
+      0.775139 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg4);
-      0.295764 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg4);
+      0.386459 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg10);
-      0.267396 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg10);
+      0.282529 seconds (4 allocations: 160 bytes)
 
 If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:
@@ -85,10 +85,10 @@ based on your input vector:
 
     # The GradientConfig constructor will automatically select a
     # chunk size in one is not explicitly provided
-    julia> cfg = ForwardDiff.GradientConfig(x);
+    julia> cfg = ForwardDiff.GradientConfig(rosenbrock, x);
 
     julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg);
-    0.266920 seconds (4 allocations: 160 bytes)
+      0.281853 seconds (4 allocations: 160 bytes)
 
 If your input dimension is a constant, you should explicitly select a chunk size rather than
 relying on ForwardDiff's heuristic. There are two reasons for this. The first is that
@@ -130,8 +130,8 @@ aren't sensitive to the input and thus cause ForwardDiff to incorrectly return `
 
     # the dual number's perturbation component is zero, so this
     # variable should not propagate derivative information
-    julia> log(ForwardDiff.Dual(0.0, 0.0))
-    Dual(-Inf,NaN) # oops, this NaN should be 0.0
+    julia> log(ForwardDiff.Dual{:tag}(0.0, 0.0))
+    Dual{:tag}(-Inf,NaN) # oops, this NaN should be 0.0
 
 Here, ForwardDiff computes the derivative of ``log(0.0)`` as ``NaN`` and then propagates
 this derivative by multiplying it by the perturbation component. Usually, ForwardDiff can
@@ -153,7 +153,6 @@ In the future, we plan on allowing users and downstream library authors to dynam
 enable ``NaN``-safe mode via the ``AbstractConfig`` API (see `the relevant issue
 <https://github.com/JuliaDiff/ForwardDiff.jl/issues/181>`_).
 
-
 Hessian of a vector-valued function
 -----------------------------------
 
@@ -163,17 +162,17 @@ For example:
 
 .. code-block:: julia
 
-    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(sin, x), [1,2,3])
-    9×3 Array{Float64,2}:
-     -0.841471   0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.909297  -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.14112
+    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(cumprod, x), [1,2,3])
+    9×3 Array{Int64,2}:
+     0  0  0
+     0  1  0
+     0  3  2
+     0  0  0
+     1  0  0
+     3  0  1
+     0  0  0
+     0  0  0
+     2  1  0
 
 Since this functionality is composed from ForwardDiff's existing API rather than built into
 it, you're free to construct a ``vector_hessian`` function which suits your needs. For
@@ -190,22 +189,22 @@ expensive operation):
        end
     vector_hessian (generic function with 1 method)
 
-    julia> vector_hessian(sin, [1, 2, 3])
-    3×3×3 Array{Float64,3}:
+    julia> vector_hessian(cumprod, [1, 2, 3])
+    3×3×3 Array{Int64,3}:
     [:, :, 1] =
-     -0.841471   0.0   0.0
-     -0.0       -0.0  -0.0
-     -0.0       -0.0  -0.0
+     0  0  0
+     0  1  0
+     0  3  2
 
     [:, :, 2] =
-      0.0   0.0        0.0
-     -0.0  -0.909297  -0.0
-     -0.0  -0.0       -0.0
+     0  0  0
+     1  0  0
+     3  0  1
 
     [:, :, 3] =
-      0.0   0.0   0.0
-     -0.0  -0.0  -0.0
-     -0.0  -0.0  -0.14112
+     0  0  0
+     0  0  0
+     2  1  0
 
 Likewise, you could write a version of ``vector_hessian`` which supports functions of the
 form ``f!(y, x)``, or perhaps an in-place Jacobian with ``ForwardDiff.jacobian!``.
@@ -232,10 +231,10 @@ SIMD instructions (i.e. not starting Julia with ``-O3``):
     julia> using ForwardDiff: Dual
 
     julia> a = Dual(1., 2., 3., 4.)
-    Dual(1.0,2.0,3.0,4.0)
+    Dual{Void}(1.0,2.0,3.0,4.0)
 
     julia> b = Dual(5., 6., 7., 8.)
-    Dual(5.0,6.0,7.0,8.0)
+    Dual{Void}(5.0,6.0,7.0,8.0)
 
     julia> @code_llvm a + b
 
diff --git a/docs/_sources/basic_api.txt b/docs/_sources/basic_api.txt
index 28eb6392..96115abd 100644
--- a/docs/_sources/basic_api.txt
+++ b/docs/_sources/basic_api.txt
@@ -4,22 +4,26 @@ Basic ForwardDiff API
 Derivatives of :math:`f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}`
 --------------------------------------------------------------------------------------------------
 
-Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real)::Real`` and ``f(::Real)::AbstractArray``.
+Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real...)::Real`` and ``f(::Real...)::AbstractArray``.
 
 .. function:: ForwardDiff.derivative!(out, f, x)
 
-    Compute :math:`f'(x)`, storing the output in ``out``.
+    Compute :math:`f'(x)`, storing the output in ``out``. If ``x`` is a ``Tuple``,
+    then ``f`` will be called as ``f(x...)`` and the derivatives with respect to
+    each element in `x` will be stored in the respective element of ``out`` (which
+    should also be a ``Tuple``).
 
 .. function:: ForwardDiff.derivative(f, x)
 
-    Compute and return :math:`f'(x)`.
+    Compute and return :math:`f'(x)`. If ``x`` is a ``Tuple``, ``f`` will be
+    called as ``f(x...)``, and a ``Tuple`` of derivatives will be returned.
 
 Gradients of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k} \to \mathbb{R}`
 ------------------------------------------------------------------------------------------------
 
 Use ``ForwardDiff.gradient`` to differentiate functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))
+.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))
 
     Compute :math:`\nabla f(\vec{x})`, storing the output in ``out``. It is highly advised
     to preallocate ``cfg`` yourself (see the `AbstractConfig
@@ -34,23 +38,23 @@ Jacobians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.jacobian`` to differentiate functions of the form ``f(::AbstractArray)::AbstractArray``.
 
-.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be called as
     ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``. The output
     matrix is stored in ``out``.
 
-.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`.
 
-.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be
     called as ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``.
@@ -60,13 +64,13 @@ Hessians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.hessian`` to perform second-order differentiation on functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute :math:`\mathbf{H}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute and return :math:`\mathbf{H}(f)(\vec{x})`.
 
@@ -77,66 +81,63 @@ For the sake of convenience and performance, all "extra" information used by For
 API methods is bundled up in the ``ForwardDiff.AbstractConfig`` family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff's  API
 methods, such as `chunk size <advanced_usage.html#configuring-chunk-size>`_, work buffers,
-multithreading configurations, and perturbation seed configurations.
+and perturbation seed configurations.
 
 ForwardDiff's basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.
 
-Note that for all constructors below, the chunk size ``N`` may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to `specify the chunk size manually when possible
+Note that for all constructors below, the chunk size ``N`` may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to `specify the chunk size manually when possible
 <advanced_usage.html#configuring-chunk-size>`_.
 
-.. function:: ForwardDiff.GradientConfig{N}(x)
+Note also that configurations constructed for a specific function ``f`` cannot
+be reused to differentiate other functions (though can be reused to differentiate
+``f`` at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass ``nothing`` as the function argument.
+While this is more flexible, this decreases ForwardDiff's ability to catch
+and prevent `perturbation confusion`_.
 
-    Construct a ``GradientConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``GradientConfig`` instance contains all the work buffers required
-    by ForwardDiff's gradient/Jacobian methods. If taking the Jacobian of a target function
-    with the form ``f!(y, x)``, use the constructor ``ForwardDiff.GradientConfig{N}(y, x)``
-    instead.
+.. function:: ForwardDiff.GradientConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
+
+    Construct a ``GradientConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``GradientConfig``
+    instance contains all the work buffers required by ForwardDiff's gradient
+    methods.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.JacobianConfig{N}(x)
+.. function:: ForwardDiff.JacobianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Exactly like ``ForwardDiff.GradientConfig{N}(x)``, but returns a `JacobianConfig`
-    instead.
+    Exactly like the ``GradientConfig`` constructor, but returns a ``JacobianConfig`` instead.
 
-.. function:: ForwardDiff.JacobianConfig{N}(y, x)
+.. function:: ForwardDiff.JacobianConfig(f!, y, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``JacobianConfig`` instance based on the type and shape of the output vector
-    ``y`` and the input vector ``x``. The returned ``JacobianConfig`` instance contains all
-    the work buffers required by  ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` with a
-    target function of the form ``f!(y, x)``.
+    Construct a ``JacobianConfig`` instance based on the type of ``f!``, and the
+    types/shapes of the output vector ``y`` and the input vector ``x``. The
+    returned ``JacobianConfig`` instance contains all the work buffers required
+    by ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` when the target
+    function takes the form ``f!(y, x)``.
 
     This constructor does not store/modify ``y`` or ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(x)
+.. function:: ForwardDiff.HessianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``HessianConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``HessianConfig`` instance contains all the work buffers required
-    by ForwardDiff's Hessian methods. If using
-    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``, use the constructor
-    ``ForwardDiff.HessianConfig{N}(out, x)`` instead.
+    Construct a ``HessianConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``HessianConfig`` instance contains
+    all the work buffers required by ForwardDiff's Hessian methods. If using
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, f, x)``, use the constructor
+    ``ForwardDiff.HessianConfig(f, out, x, chunk)`` instead.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)
+.. function:: ForwardDiff.HessianConfig(f, out::DiffBase.DiffResult, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct an ``HessianConfig`` instance based on the type and shape of the storage in
-    ``out`` and the input vector ``x``. The returned ``HessianConfig`` instance contains
-    all the work buffers required by ``ForwardDiff.hessian!(out::DiffBase.DiffResult,
-    args...)``.
+    Construct an ``HessianConfig`` instance based on the type of ``f``, types/storage
+    in ``out``, and type/shape of the input vector ``x``. The returned ``HessianConfig``
+    instance contains all the work buffers required by
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``.
 
     This constructor does not store/modify ``out`` or ``x``.
 
-.. function:: ForwardDiff.MultithreadConfig(cfg::AbstractConfig)
-
-    Wrap the given ``cfg`` in a ``MultithreadConfig`` instance, which can then be passed to
-    gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-    methods do not yet support multithreading.
-
-    Note that multithreaded ForwardDiff API methods will attempt to use all available
-    threads. In the future, once Julia exposes more fine-grained threading primitives,
-    a ``MultithreadConfig`` constructor may be added which takes in a user-provided subset
-    of thread IDs instead of using all available threads.
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
diff --git a/docs/_sources/contributing.txt b/docs/_sources/contributing.txt
index cb669228..7de30bdc 100644
--- a/docs/_sources/contributing.txt
+++ b/docs/_sources/contributing.txt
@@ -40,9 +40,7 @@ To see a list of functions to pick from, look at ``ForwardDiff.AUTO_DEFINED_UNAR
      ⋮
 
 Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to ``src/dual.jl``, scroll down to the ``Special Cases`` section, and
-look at the functions under ``Manually Optimized`` (further optimizations to these functions
-are always welcome, if you can come up with something clever).
+already been done, go to ``src/dual.jl`` and scroll down to the ``Special Cases`` section.
 
 The functions in ``ForwardDiff.AUTO_DEFINED_UNARY_FUNCS`` are automatically tested as part
 of ForwardDiff's test suite, so you don't need to write tests yourself. You can test your
diff --git a/docs/_sources/how_it_works.txt b/docs/_sources/how_it_works.txt
index 20a96e49..dd8114cb 100644
--- a/docs/_sources/how_it_works.txt
+++ b/docs/_sources/how_it_works.txt
@@ -9,25 +9,26 @@ Julia. There are two key components of this implementation: the ``Dual`` type, a
 Dual Number Implementation
 --------------------------
 
-Partial derivatives are stored in the ``Partials{N,T}`` type:
+Partial derivatives are stored in the ``Partials{N,V}`` type:
 
 .. code-block:: julia
 
-    immutable Partials{N,T}
-        values::NTuple{N,T}
+    struct Partials{N,V} <: AbstractVector{V}
+        values::NTuple{N,V}
     end
 
-Overtop of this container type, ForwardDiff implements the ``Dual{N,T}`` type:
+Overtop of this container type, ForwardDiff implements the ``Dual{T,V,N}`` type:
 
 .. code-block:: julia
 
-    immutable Dual{N,T<:Real} <: Real
-        value::T
-        partials::Partials{N,T}
+    struct Dual{T,V<:Real,N} <: Real
+        value::V
+        partials::Partials{N,V}
     end
 
-This type represents an ``N``-dimensional `dual number`_ with the following mathematical
-behavior:
+This type represents an ``N``-dimensional `dual number`_ coupled with a tag
+parameter `T` in order to prevent `perturbation confusion`_. This dual number
+type is implemented to have the following mathematical behavior:
 
 .. math::
 
@@ -44,22 +45,23 @@ can be overloaded on ``Dual`` like so:
 
 .. code-block:: julia
 
-    Base.sin(d::Dual) = Dual(sin(value(d)), cos(value(d)) * partials(d))
+    Base.sin(d::Dual{T}) where {T} = Dual{T}(sin(value(d)), cos(value(d)) * partials(d))
 
 If we assume that a general function ``f`` is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a ``Dual`` number and looking at the output.
 
-We won't dicuss higher-order differentiation in detail, but the reader is encouraged to
+We won't discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about `hyper-dual numbers`_, which extend dual numbers to higher orders by introducing
 extra :math:`\epsilon` terms that can cross-multiply. ForwardDiff's ``Dual`` number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the ``Dual`` type to nest within each other. For example, a second-order
-hyper-dual number has the type ``Dual{N,Dual{N,T}}``, a third-order hyper-dual number has
-the type ``Dual{N,Dual{N,Dual{N,T}}}``, and so on.
+hyper-dual number has the type ``Dual{T,Dual{S,V,M},N}``, a third-order hyper-dual number has
+the type ``Dual{T,Dual{S,Dual{R,V,K},M},N}``, and so on.
 
 .. _`dual number`: https://en.wikipedia.org/wiki/Dual_number
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`hyper-dual numbers`: https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf
 
 ForwardDiff's API
diff --git a/docs/_sources/install.txt b/docs/_sources/install.txt
index bb7668d6..4f3573b6 100644
--- a/docs/_sources/install.txt
+++ b/docs/_sources/install.txt
@@ -7,4 +7,4 @@ To install ForwardDiff, simply use Julia's package manager:
 
     julia> Pkg.add("ForwardDiff")
 
-The current version of ForwardDiff supports Julia v0.4 and v0.5.
+The current version of ForwardDiff supports Julia v0.6.
diff --git a/docs/_sources/limitations.txt b/docs/_sources/limitations.txt
index 9862d12b..65ecb75a 100644
--- a/docs/_sources/limitations.txt
+++ b/docs/_sources/limitations.txt
@@ -12,10 +12,7 @@ function being differentiated):
 
 - **The target function must be written generically enough to accept numbers of type ``T<:Real`` as input  (or arrays of these numbers).** The function doesn't require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see `this comment`_ for an example).
 
-- **Nested differentiation of closures is dangerous.** Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See `the relevant issue`_ for details.
-
 - **The types of array inputs must be subtypes of** ``AbstractArray`` **.** Non-``AbstractArray`` array-like types are not officially supported.
 
 .. _`this comment`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790
-.. _`the relevant issue`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`this file`: https://github.com/JuliaDiff/ForwardDiff.jl/blob/master/src/cache.jl
diff --git a/docs/_sources/upgrade.txt b/docs/_sources/upgrade.txt
index 5fc2ef82..a0afb6af 100644
--- a/docs/_sources/upgrade.txt
+++ b/docs/_sources/upgrade.txt
@@ -14,11 +14,11 @@ functions to reference them:
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     using ForwardDiff
     hessian(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     using ForwardDiff
     ForwardDiff.hessian(f, x)
 
@@ -27,26 +27,32 @@ Setting Chunk Size
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.gradient(f, x; chunk_size = 10)
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     ForwardDiff.gradient(f, x, Chunk{10}())
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{10}(x))
 
+    # ForwardDiff v0.5 & above
+    ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig(f, x ForwardDiff.Chunk{N}()))
+
 Enabling Multithreading
 -----------------------
 
 .. code-block:: julia
 
-    # old v0.1/v0.2 style
+    # ForwardDiff v0.1 & v0.2
     ForwardDiff.gradient(f, x; multithread = true)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig(x)))
 
+    # ForwardDiff v0.5 & above
+    error("ForwardDiff no longer supports internal multithreading.")
+
 Retrieving Lower-Order Results
 ------------------------------
 
@@ -55,20 +61,20 @@ For more detail, see our documentation on `retrieving lower-order results
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     answer, results = ForwardDiff.hessian(f, x, AllResults)
     v = ForwardDiff.value(results)
     g = ForwardDiff.gradient(results)
     h = ForwardDiff.hessian(results) # == answer
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     out = HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
     v = ForwardDiff.value(out)
     g = ForwardDiff.gradient(out)
     h = ForwardDiff.hessian(out)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & above
     using DiffBase
     out = DiffBase.HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
@@ -86,10 +92,10 @@ derivatives by composing existing API functions. For example, here's how to reim
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.tensor(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     function tensor(f, x)
         n = length(x)
         out = ForwardDiff.jacobian(y -> ForwardDiff.hessian(f, y), x)
@@ -108,26 +114,26 @@ ForwardDiff's API functions, see `our API documentation <basic_api.html>`_.
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     df = ForwardDiff.derivative(f)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     df = x -> ForwardDiff.derivative(f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place gradient function of f
     gf! = ForwardDiff.gradient(f, mutates = true)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     gf! = (out, x) -> ForwardDiff.gradient!(out, f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place Jacobian function of f!(y, x):
     jf! = ForwardDiff.jacobian(f!, mutates = true, output_length = length(y))
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     jf! = (out, y, x) -> ForwardDiff.jacobian!(out, f!, y, x)
diff --git a/docs/advanced_usage.html b/docs/advanced_usage.html
index 47bc8814..0d1e7da1 100644
--- a/docs/advanced_usage.html
+++ b/docs/advanced_usage.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Advanced Usage Guide &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Advanced Usage Guide &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Upgrading from Older Versions of ForwardDiff" href="upgrade.html"/>
         <link rel="prev" title="Basic ForwardDiff API" href="basic_api.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -152,7 +152,7 @@ <h1>Advanced Usage Guide<a class="headerlink" href="#advanced-usage-guide" title
 <h2>Accessing Lower-Order Results<a class="headerlink" href="#accessing-lower-order-results" title="Permalink to this headline">¶</a></h2>
 <p>Let&#8217;s say you want to calculate the value, gradient, and Hessian of some function <code class="docutils literal"><span class="pre">f</span></code> at
 an input <code class="docutils literal"><span class="pre">x</span></code>. You could execute <code class="docutils literal"><span class="pre">f(x)</span></code>, <code class="docutils literal"><span class="pre">ForwardDiff.gradient(f,</span> <span class="pre">x)</span></code> and
-<code class="docutils literal"><span class="pre">ForwardDiff.hessian(f,</span> <span class="pre">x)</span></code>, but that would be a <strong>horribly redundant way to  accomplish
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian(f,</span> <span class="pre">x)</span></code>, but that would be a <strong>horribly redundant way to accomplish
 this task!</strong></p>
 <p>In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
 the lower-order derivatives and primal value <code class="docutils literal"><span class="pre">f(x)</span></code>. To retrieve these results in one fell
@@ -172,7 +172,7 @@ <h2>Configuring Chunk Size<a class="headerlink" href="#configuring-chunk-size" t
 a larger chunk size reduces calls to the target function at the cost of more memory
 bandwidth.</p>
 <p>For example:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">import</span> <span class="n">ForwardDiff</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">using</span> <span class="n">ForwardDiff</span><span class="p">:</span> <span class="n">GradientConfig</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">,</span> <span class="n">gradient!</span>
 
 <span class="c"># let&#39;s use a Rosenbrock function as our target function</span>
 <span class="n">julia</span><span class="o">&gt;</span> <span class="k">function</span><span class="nf"> rosenbrock</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
@@ -193,35 +193,35 @@ <h2>Configuring Chunk Size<a class="headerlink" href="#configuring-chunk-size" t
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">out</span> <span class="o">=</span> <span class="n">similar</span><span class="p">(</span><span class="n">x</span><span class="p">);</span>
 
 <span class="c"># construct GradientConfig with chunk size of 1</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg1</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">1</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg1</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">1</span><span class="p">}());</span>
 
 <span class="c"># construct GradientConfig with chunk size of 4</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg4</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">4</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg4</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">4</span><span class="p">}());</span>
 
 <span class="c"># construct GradientConfig with chunk size of 10</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg10</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">10</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg10</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">10</span><span class="p">}());</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg1</span><span class="p">);</span>
-  <span class="mf">0.408305</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg1</span><span class="p">);</span>
+  <span class="mf">0.775139</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg4</span><span class="p">);</span>
-  <span class="mf">0.295764</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg4</span><span class="p">);</span>
+  <span class="mf">0.386459</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg10</span><span class="p">);</span>
-  <span class="mf">0.267396</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg10</span><span class="p">);</span>
+  <span class="mf">0.282529</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:</p>
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># The GradientConfig constructor will automatically select a</span>
 <span class="c"># chunk size in one is not explicitly provided</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">);</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg</span><span class="p">);</span>
-<span class="mf">0.266920</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+  <span class="mf">0.281853</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>If your input dimension is a constant, you should explicitly select a chunk size rather than
@@ -256,8 +256,8 @@ <h2>Fixing issues with NaN/Inf return values<a class="headerlink" href="#fixing-
 <code class="docutils literal"><span class="pre">Inf</span></code> derivatives. For example:</p>
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># the dual number&#39;s perturbation component is zero, so this</span>
 <span class="c"># variable should not propagate derivative information</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">log</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Dual</span><span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">))</span>
-<span class="n">Dual</span><span class="p">(</span><span class="o">-</span><span class="nb">Inf</span><span class="p">,</span><span class="n">NaN</span><span class="p">)</span> <span class="c"># oops, this NaN should be 0.0</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">log</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Dual</span><span class="p">{:</span><span class="n">tag</span><span class="p">}(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">))</span>
+<span class="n">Dual</span><span class="p">{:</span><span class="n">tag</span><span class="p">}(</span><span class="o">-</span><span class="nb">Inf</span><span class="p">,</span><span class="n">NaN</span><span class="p">)</span> <span class="c"># oops, this NaN should be 0.0</span>
 </pre></div>
 </div>
 <p>Here, ForwardDiff computes the derivative of <code class="docutils literal"><span class="pre">log(0.0)</span></code> as <code class="docutils literal"><span class="pre">NaN</span></code> and then propagates
@@ -281,17 +281,17 @@ <h2>Hessian of a vector-valued function<a class="headerlink" href="#hessian-of-a
 <p>While ForwardDiff does not have a built-in function for taking Hessians of vector-valued
 functions, you can easily compose calls to <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code> to accomplish this.
 For example:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">sin</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">])</span>
-<span class="mi">9</span><span class="n">×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Float64</span><span class="p">,</span><span class="mi">2</span><span class="p">}:</span>
- <span class="o">-</span><span class="mf">0.841471</span>   <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="mf">0.0</span>        <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.909297</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="mf">0.0</span>        <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.14112</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">cumprod</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">])</span>
+<span class="mi">9</span><span class="n">×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">2</span><span class="p">}:</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">1</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">3</span>  <span class="mi">2</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">1</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">3</span>  <span class="mi">0</span>  <span class="mi">1</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">2</span>  <span class="mi">1</span>  <span class="mi">0</span>
 </pre></div>
 </div>
 <p>Since this functionality is composed from ForwardDiff&#8217;s existing API rather than built into
@@ -306,22 +306,22 @@ <h2>Hessian of a vector-valued function<a class="headerlink" href="#hessian-of-a
    <span class="k">end</span>
 <span class="n">vector_hessian</span> <span class="p">(</span><span class="n">generic</span> <span class="k">function</span><span class="nf"> with</span> <span class="mi">1</span> <span class="n">method</span><span class="p">)</span>
 
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">vector_hessian</span><span class="p">(</span><span class="n">sin</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
-<span class="mi">3</span><span class="n">×3×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Float64</span><span class="p">,</span><span class="mi">3</span><span class="p">}:</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">vector_hessian</span><span class="p">(</span><span class="n">cumprod</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
+<span class="mi">3</span><span class="n">×3×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">3</span><span class="p">}:</span>
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span>
- <span class="o">-</span><span class="mf">0.841471</span>   <span class="mf">0.0</span>   <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">1</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">3</span>  <span class="mi">2</span>
 
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span>
-  <span class="mf">0.0</span>   <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.909297</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">1</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">3</span>  <span class="mi">0</span>  <span class="mi">1</span>
 
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">3</span><span class="p">]</span> <span class="o">=</span>
-  <span class="mf">0.0</span>   <span class="mf">0.0</span>   <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.14112</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">2</span>  <span class="mi">1</span>  <span class="mi">0</span>
 </pre></div>
 </div>
 <p>Likewise, you could write a version of <code class="docutils literal"><span class="pre">vector_hessian</span></code> which supports functions of the
@@ -340,10 +340,10 @@ <h2>SIMD Vectorization<a class="headerlink" href="#simd-vectorization" title="Pe
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">using</span> <span class="n">ForwardDiff</span><span class="p">:</span> <span class="n">Dual</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">a</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span> <span class="mf">2.</span><span class="p">,</span> <span class="mf">3.</span><span class="p">,</span> <span class="mf">4.</span><span class="p">)</span>
-<span class="n">Dual</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span><span class="mf">2.0</span><span class="p">,</span><span class="mf">3.0</span><span class="p">,</span><span class="mf">4.0</span><span class="p">)</span>
+<span class="n">Dual</span><span class="p">{</span><span class="n">Void</span><span class="p">}(</span><span class="mf">1.0</span><span class="p">,</span><span class="mf">2.0</span><span class="p">,</span><span class="mf">3.0</span><span class="p">,</span><span class="mf">4.0</span><span class="p">)</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">b</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="mf">5.</span><span class="p">,</span> <span class="mf">6.</span><span class="p">,</span> <span class="mf">7.</span><span class="p">,</span> <span class="mf">8.</span><span class="p">)</span>
-<span class="n">Dual</span><span class="p">(</span><span class="mf">5.0</span><span class="p">,</span><span class="mf">6.0</span><span class="p">,</span><span class="mf">7.0</span><span class="p">,</span><span class="mf">8.0</span><span class="p">)</span>
+<span class="n">Dual</span><span class="p">{</span><span class="n">Void</span><span class="p">}(</span><span class="mf">5.0</span><span class="p">,</span><span class="mf">6.0</span><span class="p">,</span><span class="mf">7.0</span><span class="p">,</span><span class="mf">8.0</span><span class="p">)</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">code_llvm</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span>
 
@@ -445,7 +445,7 @@ <h2>SIMD Vectorization<a class="headerlink" href="#simd-vectorization" title="Pe
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/basic_api.html b/docs/basic_api.html
index 882019e8..862d63fb 100644
--- a/docs/basic_api.html
+++ b/docs/basic_api.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Basic ForwardDiff API &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Basic ForwardDiff API &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Advanced Usage Guide" href="advanced_usage.html"/>
         <link rel="prev" title="Limitations of ForwardDiff" href="limitations.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -148,17 +148,21 @@
 <h1>Basic ForwardDiff API<a class="headerlink" href="#basic-forwarddiff-api" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="derivatives-of">
 <h2>Derivatives of <span class="math">\(f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}\)</span><a class="headerlink" href="#derivatives-of" title="Permalink to this headline">¶</a></h2>
-<p>Use <code class="docutils literal"><span class="pre">ForwardDiff.derivative</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::Real)::Real</span></code> and <code class="docutils literal"><span class="pre">f(::Real)::AbstractArray</span></code>.</p>
+<p>Use <code class="docutils literal"><span class="pre">ForwardDiff.derivative</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::Real...)::Real</span></code> and <code class="docutils literal"><span class="pre">f(::Real...)::AbstractArray</span></code>.</p>
 <dl class="function">
 <dt>
 <code class="descname">ForwardDiff.derivative!(out, f, x)</code></dt>
-<dd><p>Compute <span class="math">\(f'(x)\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>.</p>
+<dd><p>Compute <span class="math">\(f'(x)\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. If <code class="docutils literal"><span class="pre">x</span></code> is a <code class="docutils literal"><span class="pre">Tuple</span></code>,
+then <code class="docutils literal"><span class="pre">f</span></code> will be called as <code class="docutils literal"><span class="pre">f(x...)</span></code> and the derivatives with respect to
+each element in <cite>x</cite> will be stored in the respective element of <code class="docutils literal"><span class="pre">out</span></code> (which
+should also be a <code class="docutils literal"><span class="pre">Tuple</span></code>).</p>
 </dd></dl>
 
 <dl class="function">
 <dt id="ForwardDiff.derivative">
 <code class="descclassname">ForwardDiff.</code><code class="descname">derivative</code><span class="sig-paren">(</span><em>f</em>, <em>x</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.derivative" title="Permalink to this definition">¶</a></dt>
-<dd><p>Compute and return <span class="math">\(f'(x)\)</span>.</p>
+<dd><p>Compute and return <span class="math">\(f'(x)\)</span>. If <code class="docutils literal"><span class="pre">x</span></code> is a <code class="docutils literal"><span class="pre">Tuple</span></code>, <code class="docutils literal"><span class="pre">f</span></code> will be
+called as <code class="docutils literal"><span class="pre">f(x...)</span></code>, and a <code class="docutils literal"><span class="pre">Tuple</span></code> of derivatives will be returned.</p>
 </dd></dl>
 
 </div>
@@ -167,7 +171,7 @@ <h2>Gradients of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.gradient</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::Real</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))</code></dt>
+<code class="descname">ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\nabla f(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly advised
 to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
@@ -184,14 +188,14 @@ <h2>Jacobians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::AbstractArray</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))</code></dt>
+<code class="descname">ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly
 advised to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))</code></dt>
+<code class="descname">ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, where <span class="math">\(f(\vec{x})\)</span> can be called as
 <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code> such that the output of <span class="math">\(f(\vec{x})\)</span> is stored in <code class="docutils literal"><span class="pre">y</span></code>. The output
 matrix is stored in <code class="docutils literal"><span class="pre">out</span></code>.</p>
@@ -199,13 +203,13 @@ <h2>Jacobians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 
 <dl class="function">
 <dt id="ForwardDiff.jacobian">
-<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.jacobian" title="Permalink to this definition">¶</a></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(f</em>, <em>x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.jacobian" title="Permalink to this definition">¶</a></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(y</em>, <em>x)</em><span class="sig-paren">)</span></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(f!</em>, <em>y</em>, <em>x)</em><span class="sig-paren">)</span></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, where <span class="math">\(f(\vec{x})\)</span> can be
 called as <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code> such that the output of <span class="math">\(f(\vec{x})\)</span> is stored in <code class="docutils literal"><span class="pre">y</span></code>.</p>
 </dd></dl>
@@ -216,14 +220,14 @@ <h2>Hessians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \times
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.hessian</span></code> to perform second-order differentiation on functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::Real</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))</code></dt>
+<code class="descname">ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{H}(f)(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly
 advised to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
 
 <dl class="function">
 <dt id="ForwardDiff.hessian">
-<code class="descclassname">ForwardDiff.</code><code class="descname">hessian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.HessianConfig(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.hessian" title="Permalink to this definition">¶</a></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">hessian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.HessianConfig(f</em>, <em>x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.hessian" title="Permalink to this definition">¶</a></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{H}(f)(\vec{x})\)</span>.</p>
 </dd></dl>
 
@@ -234,73 +238,66 @@ <h2>The <code class="docutils literal"><span class="pre">AbstractConfig</span></
 API methods is bundled up in the <code class="docutils literal"><span class="pre">ForwardDiff.AbstractConfig</span></code> family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff&#8217;s  API
 methods, such as <a class="reference external" href="advanced_usage.html#configuring-chunk-size">chunk size</a>, work buffers,
-multithreading configurations, and perturbation seed configurations.</p>
+and perturbation seed configurations.</p>
 <p>ForwardDiff&#8217;s basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.</p>
-<p>Note that for all constructors below, the chunk size <code class="docutils literal"><span class="pre">N</span></code> may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to <a class="reference external" href="advanced_usage.html#configuring-chunk-size">specify the chunk size manually when possible</a>.</p>
+<p>Note that for all constructors below, the chunk size <code class="docutils literal"><span class="pre">N</span></code> may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to <a class="reference external" href="advanced_usage.html#configuring-chunk-size">specify the chunk size manually when possible</a>.</p>
+<p>Note also that configurations constructed for a specific function <code class="docutils literal"><span class="pre">f</span></code> cannot
+be reused to differentiate other functions (though can be reused to differentiate
+<code class="docutils literal"><span class="pre">f</span></code> at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass <code class="docutils literal"><span class="pre">nothing</span></code> as the function argument.
+While this is more flexible, this decreases ForwardDiff&#8217;s ability to catch
+and prevent <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">perturbation confusion</a>.</p>
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.GradientConfig{N}(x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance based on the type and shape of the input vector
-<code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance contains all the work buffers required
-by ForwardDiff&#8217;s gradient/Jacobian methods. If taking the Jacobian of a target function
-with the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>, use the constructor <code class="docutils literal"><span class="pre">ForwardDiff.GradientConfig{N}(y,</span> <span class="pre">x)</span></code>
-instead.</p>
+<dt id="ForwardDiff.GradientConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">GradientConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.GradientConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code> and
+type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">GradientConfig</span></code>
+instance contains all the work buffers required by ForwardDiff&#8217;s gradient
+methods.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.JacobianConfig{N}(x)</code></dt>
-<dd><p>Exactly like <code class="docutils literal"><span class="pre">ForwardDiff.GradientConfig{N}(x)</span></code>, but returns a <cite>JacobianConfig</cite>
-instead.</p>
+<dt id="ForwardDiff.JacobianConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">JacobianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.JacobianConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Exactly like the <code class="docutils literal"><span class="pre">GradientConfig</span></code> constructor, but returns a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instead.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.JacobianConfig{N}(y, x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance based on the type and shape of the output vector
-<code class="docutils literal"><span class="pre">y</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance contains all
-the work buffers required by  <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code>/<code class="docutils literal"><span class="pre">ForwardDiff.jacobian!</span></code> with a
-target function of the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>.</p>
+<code class="descclassname">ForwardDiff.</code><code class="descname">JacobianConfig</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f!</span></code>, and the
+types/shapes of the output vector <code class="docutils literal"><span class="pre">y</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The
+returned <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance contains all the work buffers required
+by <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code>/<code class="docutils literal"><span class="pre">ForwardDiff.jacobian!</span></code> when the target
+function takes the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">y</span></code> or <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.HessianConfig{N}(x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type and shape of the input vector
-<code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains all the work buffers required
-by ForwardDiff&#8217;s Hessian methods. If using
-<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">args...)</span></code>, use the constructor
-<code class="docutils literal"><span class="pre">ForwardDiff.HessianConfig{N}(out,</span> <span class="pre">x)</span></code> instead.</p>
+<dt id="ForwardDiff.HessianConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">HessianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.HessianConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code> and
+type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains
+all the work buffers required by ForwardDiff&#8217;s Hessian methods. If using
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">f,</span> <span class="pre">x)</span></code>, use the constructor
+<code class="docutils literal"><span class="pre">ForwardDiff.HessianConfig(f,</span> <span class="pre">out,</span> <span class="pre">x,</span> <span class="pre">chunk)</span></code> instead.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)</code></dt>
-<dd><p>Construct an <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type and shape of the storage in
-<code class="docutils literal"><span class="pre">out</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains
-all the work buffers required by <code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span>
-<span class="pre">args...)</span></code>.</p>
+<code class="descclassname">ForwardDiff.</code><code class="descname">HessianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>out::DiffBase.DiffResult</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span></dt>
+<dd><p>Construct an <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code>, types/storage
+in <code class="docutils literal"><span class="pre">out</span></code>, and type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code>
+instance contains all the work buffers required by
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">args...)</span></code>.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">out</span></code> or <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
-<dl class="function">
-<dt id="ForwardDiff.MultithreadConfig">
-<code class="descclassname">ForwardDiff.</code><code class="descname">MultithreadConfig</code><span class="sig-paren">(</span><em>cfg::AbstractConfig</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.MultithreadConfig" title="Permalink to this definition">¶</a></dt>
-<dd><p>Wrap the given <code class="docutils literal"><span class="pre">cfg</span></code> in a <code class="docutils literal"><span class="pre">MultithreadConfig</span></code> instance, which can then be passed to
-gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-methods do not yet support multithreading.</p>
-<p>Note that multithreaded ForwardDiff API methods will attempt to use all available
-threads. In the future, once Julia exposes more fine-grained threading primitives,
-a <code class="docutils literal"><span class="pre">MultithreadConfig</span></code> constructor may be added which takes in a user-provided subset
-of thread IDs instead of using all available threads.</p>
-</dd></dl>
-
 </div>
 </div>
 
@@ -345,7 +342,7 @@ <h2>The <code class="docutils literal"><span class="pre">AbstractConfig</span></
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/contributing.html b/docs/contributing.html
index a2102a06..5a1973b7 100644
--- a/docs/contributing.html
+++ b/docs/contributing.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>How to Contribute &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>How to Contribute &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="prev" title="How ForwardDiff Works" href="how_it_works.html"/> 
 
   
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -181,9 +181,7 @@ <h2>Manually Optimizing Unary Functions<a class="headerlink" href="#manually-opt
 </pre></div>
 </div>
 <p>Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to <code class="docutils literal"><span class="pre">src/dual.jl</span></code>, scroll down to the <code class="docutils literal"><span class="pre">Special</span> <span class="pre">Cases</span></code> section, and
-look at the functions under <code class="docutils literal"><span class="pre">Manually</span> <span class="pre">Optimized</span></code> (further optimizations to these functions
-are always welcome, if you can come up with something clever).</p>
+already been done, go to <code class="docutils literal"><span class="pre">src/dual.jl</span></code> and scroll down to the <code class="docutils literal"><span class="pre">Special</span> <span class="pre">Cases</span></code> section.</p>
 <p>The functions in <code class="docutils literal"><span class="pre">ForwardDiff.AUTO_DEFINED_UNARY_FUNCS</span></code> are automatically tested as part
 of ForwardDiff&#8217;s test suite, so you don&#8217;t need to write tests yourself. You can test your
 changes by running <code class="docutils literal"><span class="pre">Pkg.test(&quot;ForwardDiff&quot;)</span></code>.</p>
@@ -254,7 +252,7 @@ <h3>Manually Adding Functions to ForwardDiff<a class="headerlink" href="#manuall
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/genindex.html b/docs/genindex.html
index ee28917c..28e76d10 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -9,7 +9,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Index &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Index &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -31,7 +31,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/> 
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/> 
 
   
   <script src="_static/js/modernizr.min.js"></script>
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -153,17 +153,25 @@ <h2 id="F">F</h2>
   </dt>
 
       
+  <dt><a href="basic_api.html#ForwardDiff.GradientConfig">ForwardDiff.GradientConfig() (built-in function)</a>
+  </dt>
+
+      
   <dt><a href="basic_api.html#ForwardDiff.hessian">ForwardDiff.hessian() (built-in function)</a>
   </dt>
 
   </dl></td>
   <td style="width: 33%" valign="top"><dl>
       
+  <dt><a href="basic_api.html#ForwardDiff.HessianConfig">ForwardDiff.HessianConfig() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.HessianConfig">[1]</a>
+  </dt>
+
+      
   <dt><a href="basic_api.html#ForwardDiff.jacobian">ForwardDiff.jacobian() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.jacobian">[1]</a>
   </dt>
 
       
-  <dt><a href="basic_api.html#ForwardDiff.MultithreadConfig">ForwardDiff.MultithreadConfig() (built-in function)</a>
+  <dt><a href="basic_api.html#ForwardDiff.JacobianConfig">ForwardDiff.JacobianConfig() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.JacobianConfig">[1]</a>
   </dt>
 
   </dl></td>
@@ -202,7 +210,7 @@ <h2 id="F">F</h2>
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/how_it_works.html b/docs/how_it_works.html
index b662c2f1..f08795d4 100644
--- a/docs/how_it_works.html
+++ b/docs/how_it_works.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>How ForwardDiff Works &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>How ForwardDiff Works &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="How to Contribute" href="contributing.html"/>
         <link rel="prev" title="Upgrading from Older Versions of ForwardDiff" href="upgrade.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -147,21 +147,22 @@ <h1>How ForwardDiff Works<a class="headerlink" href="#how-forwarddiff-works" tit
 Julia. There are two key components of this implementation: the <code class="docutils literal"><span class="pre">Dual</span></code> type, and the API.</p>
 <div class="section" id="dual-number-implementation">
 <h2>Dual Number Implementation<a class="headerlink" href="#dual-number-implementation" title="Permalink to this headline">¶</a></h2>
-<p>Partial derivatives are stored in the <code class="docutils literal"><span class="pre">Partials{N,T}</span></code> type:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="k">immutable</span> <span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
-    <span class="n">values</span><span class="p">::</span><span class="n">NTuple</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
+<p>Partial derivatives are stored in the <code class="docutils literal"><span class="pre">Partials{N,V}</span></code> type:</p>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">struct</span> <span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">AbstractVector</span><span class="p">{</span><span class="n">V</span><span class="p">}</span>
+    <span class="n">values</span><span class="p">::</span><span class="n">NTuple</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span>
 <span class="k">end</span>
 </pre></div>
 </div>
-<p>Overtop of this container type, ForwardDiff implements the <code class="docutils literal"><span class="pre">Dual{N,T}</span></code> type:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="k">immutable</span> <span class="n">Dual</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="o">&lt;:</span><span class="n">Real</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">Real</span>
-    <span class="n">value</span><span class="p">::</span><span class="n">T</span>
-    <span class="n">partials</span><span class="p">::</span><span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
+<p>Overtop of this container type, ForwardDiff implements the <code class="docutils literal"><span class="pre">Dual{T,V,N}</span></code> type:</p>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">struct</span> <span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">,</span><span class="n">V</span><span class="o">&lt;:</span><span class="n">Real</span><span class="p">,</span><span class="n">N</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">Real</span>
+    <span class="n">value</span><span class="p">::</span><span class="n">V</span>
+    <span class="n">partials</span><span class="p">::</span><span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span>
 <span class="k">end</span>
 </pre></div>
 </div>
-<p>This type represents an <code class="docutils literal"><span class="pre">N</span></code>-dimensional <a class="reference external" href="https://en.wikipedia.org/wiki/Dual_number">dual number</a> with the following mathematical
-behavior:</p>
+<p>This type represents an <code class="docutils literal"><span class="pre">N</span></code>-dimensional <a class="reference external" href="https://en.wikipedia.org/wiki/Dual_number">dual number</a> coupled with a tag
+parameter <cite>T</cite> in order to prevent <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">perturbation confusion</a>. This dual number
+type is implemented to have the following mathematical behavior:</p>
 <div class="math">
 \[f(a + \sum_{i=1}^N b_i \epsilon_i) = f(a) + f'(a) \sum_{i=1}^N b_i \epsilon_i\]</div>
 <p>where the <span class="math">\(a\)</span> component is stored in the <code class="docutils literal"><span class="pre">value</span></code> field and the <span class="math">\(b\)</span>
@@ -171,20 +172,20 @@ <h2>Dual Number Implementation<a class="headerlink" href="#dual-number-implement
 number are overloaded to evaluate both the original function, <em>and</em> evaluate the derivative
 of the function, propogating the derivative via multiplication. For example, <code class="docutils literal"><span class="pre">Base.sin</span></code>
 can be overloaded on <code class="docutils literal"><span class="pre">Dual</span></code> like so:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">Base</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">d</span><span class="p">::</span><span class="n">Dual</span><span class="p">)</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="n">sin</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">)),</span> <span class="n">cos</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">))</span> <span class="o">*</span> <span class="n">partials</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">Base</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">d</span><span class="p">::</span><span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">})</span> <span class="n">where</span> <span class="p">{</span><span class="n">T</span><span class="p">}</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">sin</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">)),</span> <span class="n">cos</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">))</span> <span class="o">*</span> <span class="n">partials</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
 </pre></div>
 </div>
 <p>If we assume that a general function <code class="docutils literal"><span class="pre">f</span></code> is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a <code class="docutils literal"><span class="pre">Dual</span></code> number and looking at the output.</p>
-<p>We won&#8217;t dicuss higher-order differentiation in detail, but the reader is encouraged to
+<p>We won&#8217;t discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about <a class="reference external" href="https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf">hyper-dual numbers</a>, which extend dual numbers to higher orders by introducing
 extra <span class="math">\(\epsilon\)</span> terms that can cross-multiply. ForwardDiff&#8217;s <code class="docutils literal"><span class="pre">Dual</span></code> number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the <code class="docutils literal"><span class="pre">Dual</span></code> type to nest within each other. For example, a second-order
-hyper-dual number has the type <code class="docutils literal"><span class="pre">Dual{N,Dual{N,T}}</span></code>, a third-order hyper-dual number has
-the type <code class="docutils literal"><span class="pre">Dual{N,Dual{N,Dual{N,T}}}</span></code>, and so on.</p>
+hyper-dual number has the type <code class="docutils literal"><span class="pre">Dual{T,Dual{S,V,M},N}</span></code>, a third-order hyper-dual number has
+the type <code class="docutils literal"><span class="pre">Dual{T,Dual{S,Dual{R,V,K},M},N}</span></code>, and so on.</p>
 </div>
 <div class="section" id="forwarddiff-s-api">
 <h2>ForwardDiff&#8217;s API<a class="headerlink" href="#forwarddiff-s-api" title="Permalink to this headline">¶</a></h2>
@@ -282,7 +283,7 @@ <h2>ForwardDiff&#8217;s API<a class="headerlink" href="#forwarddiff-s-api" title
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/index.html b/docs/index.html
index 15dd0f4d..2adec17e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>ForwardDiff.jl &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>ForwardDiff.jl &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="#"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="#"/>
         <link rel="next" title="Installation and Version Requirements" href="install.html"/> 
 
   
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -243,7 +243,7 @@ <h2>Publications<a class="headerlink" href="#publications" title="Permalink to t
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/install.html b/docs/install.html
index ef01aaa8..757faa3c 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Installation and Version Requirements &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Installation and Version Requirements &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Limitations of ForwardDiff" href="limitations.html"/>
         <link rel="prev" title="ForwardDiff.jl" href="index.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -143,7 +143,7 @@ <h1>Installation and Version Requirements<a class="headerlink" href="#installati
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">Pkg</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s">&quot;ForwardDiff&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>The current version of ForwardDiff supports Julia v0.4 and v0.5.</p>
+<p>The current version of ForwardDiff supports Julia v0.6.</p>
 </div>
 
 
@@ -187,7 +187,7 @@ <h1>Installation and Version Requirements<a class="headerlink" href="#installati
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/limitations.html b/docs/limitations.html
index f994cac3..dc02b46e 100644
--- a/docs/limitations.html
+++ b/docs/limitations.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Limitations of ForwardDiff &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Limitations of ForwardDiff &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Basic ForwardDiff API" href="basic_api.html"/>
         <link rel="prev" title="Installation and Version Requirements" href="install.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -147,7 +147,6 @@ <h1>Limitations of ForwardDiff<a class="headerlink" href="#limitations-of-forwar
 <li><strong>The target function can only be composed of generic Julia functions.</strong> ForwardDiff cannot propagate derivative information through non-Julia code. Thus, your function may not work if it makes calls to external, non-Julia programs, e.g. uses explicit BLAS calls instead of <code class="docutils literal"><span class="pre">Ax_mul_Bx</span></code>-style functions.</li>
 <li><strong>The target function must be unary (i.e., only accept a single argument).</strong> There is an exception to this rule for ForwardDiff&#8217;s <code class="docutils literal"><span class="pre">jacobian</span></code> API; see <a class="reference external" href="basic_api.html">the API documentation</a> for details.</li>
 <li><strong>The target function must be written generically enough to accept numbers of type ``T&lt;:Real`` as input  (or arrays of these numbers).</strong> The function doesn&#8217;t require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790">this comment</a> for an example).</li>
-<li><strong>Nested differentiation of closures is dangerous.</strong> Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">the relevant issue</a> for details.</li>
 <li><strong>The types of array inputs must be subtypes of</strong> <code class="docutils literal"><span class="pre">AbstractArray</span></code> <strong>.</strong> Non-<code class="docutils literal"><span class="pre">AbstractArray</span></code> array-like types are not officially supported.</li>
 </ul>
 </div>
@@ -193,7 +192,7 @@ <h1>Limitations of ForwardDiff<a class="headerlink" href="#limitations-of-forwar
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/objects.inv b/docs/objects.inv
index 7396bc2e..fc5db8c9 100644
--- a/docs/objects.inv
+++ b/docs/objects.inv
@@ -1,7 +1,9 @@
 # Sphinx inventory version 2
 # Project: ForwardDiff.jl
-# Version: 0.2
+# Version: 0.5
 # The remainder of this file is compressed using zlib.
-xڕS�N�0��+V�k��r+��JTT�±r�M�������8N��-���l�7�t܈%����KG�6��B��*h?�J�
-GZ�%칥b�[��]#��a�n�@C�耧�*��r��j���:M�������W����B�N\	]���ʠ����v`�zȎ��:�
-���a�Y�B+gh��*�x������T@&��R�h����+R��(�)�1�?��ˣ�2�Y�jP��Z�OV�nGn�i�f�)��"��K(��kFarC�SeO��K��hjh8�d\	xFc��#�{2�Y&���2�7���}��e�k��c:��l*D��^�8'��u��X$?����,���Z�}�����6��<��QNa5����$P,���ڍ/FV��
\ No newline at end of file
+xڝS�N�0��+V�k��r+��"�p��x�.8v�O'ŦHPn���xg���fˍ�RY�].��hUR����p��Ê[*��������)��U��Ak���Hoy�W��
+4��6x����Zǐǉ^�x����pU�Xz�+�ą���!�t
+�,:��'�l��2!i��+�r�Vޑ�Rq�G��ނ�0
+�nP��"_�s�]�`#�
+e0"%�#���G9����8�Pu��p@cW����z�$��j�f�)��"��K(��kzarC�SeK��K��;hh�;�d\	xFc��#�{2�Y&���2�7���]��e����a:�n4�ý^b?'���\�H~��y�Y�7�d��C���amJ�kx����4�ퟷ$P�-����'�Ƕz
\ No newline at end of file
diff --git a/docs/search.html b/docs/search.html
index da0d1bf4..e5ef7f1b 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Search &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Search &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/> 
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/> 
 
   
   <script src="_static/js/modernizr.min.js"></script>
@@ -58,7 +58,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -176,7 +176,7 @@
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 61ed8ea5..541ae921 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({envversion:49,filenames:["advanced_usage","index"],objects:{},objnames:{},objtypes:{},terms:{"byte":0,"case":0,"default":0,"final":0,"import":0,"new":1,"true":0,"try":0,"void":0,"while":[0,1],_70842:0,_70852:0,abl:0,about:1,abstractconfig:[0,1],accomplish:0,accuraci:1,actual:0,add:0,addit:0,advantag:1,affect:0,algorithm:1,align:0,all:0,alloc:0,allow:0,almost:0,also:0,amen:0,ani:1,answer:0,api:[0,1],appropri:0,aren:0,arg:0,arithmet:0,arrai:0,articl:1,arxiv:1,assert:0,attempt:0,author:[0,1],automat:[0,1],bandwidth:0,base:0,basic:[0,1],befor:0,behavior:0,behvaior:0,benchmark:0,best:0,better:0,binari:0,bitcast:0,bitcod:0,block:0,both:1,bring:0,buffer:0,build:0,built:0,calcul:0,call:0,callabl:1,can:[0,1],cast:0,caus:0,certain:0,cfg10:0,cfg1:0,cfg4:0,cfg:0,check:0,cite:1,code:0,code_llvm:0,common:1,compil:0,complet:0,compon:0,compos:0,comput:0,conjunct:0,constant:0,construct:0,constructor:0,consult:0,contribut:1,copi:0,correct:0,cost:0,could:0,cours:0,creat:1,data:0,decreas:0,defin:0,depend:[0,1],deriv:[0,1],describ:0,develop:1,diffbas:0,differenc:1,different:0,differenti:1,diffresult:0,dimens:0,directori:0,disabl:0,divid:0,document:[0,1],doe:0,doesn:0,doubl:0,downstream:0,dual:[0,1],dure:0,dynam:0,each:0,easili:0,element:0,eltyp:0,emit:0,enabl:[0,1],end:0,entir:0,erron:0,error:0,essenti:0,evalu:[0,1],even:0,evenli:0,exampl:0,execut:0,exist:0,expens:0,explic:0,explicitli:0,extra:0,fadd:0,featur:0,fell:0,find:1,fine:0,finit:1,first:0,flag:0,float64:0,follow:1,form:0,forward:1,forwarddiff:0,free:0,from:[0,1],futur:0,gener:[0,1],getelementptr:0,github:1,gradient:[0,1],gradientconfig:0,guess:0,have:0,heavili:0,here:0,heurist:0,higher:[0,1],hold:0,horribl:0,how:[0,1],howev:0,http:1,i32:0,i64:0,ident:0,imag:0,implement:1,inbound:0,incorrectli:0,increas:0,inform:0,inher:0,input:0,insert:0,installat:1,instead:0,instruct:0,isinf:0,isnan:0,jacobian:[0,1],journal:1,julia:[0,1],julia_:0,kindli:1,know:0,larger:0,learn:[0,1],length:0,let:0,librari:0,likewis:0,limit:1,llvm:0,load:0,local:0,locat:0,log:0,lubin:1,machin:0,major:0,make:0,mani:0,manual:[0,1],matrix:0,memori:0,method:[0,1],might:0,mirror:0,mode:[0,1],modul:0,more:0,multipli:0,multithread:1,mutat:0,nansafe_mode_enabled:0,nativ:1,need:0,noalia:0,non:1,note:0,notic:0,number:[0,1],object:1,older:1,one:0,onli:0,oop:0,oper:0,optim:0,optimiz:1,org:1,other:[0,1],otherwis:0,our:0,out:0,outperform:1,output:0,over:1,overcom:0,own:0,packag:0,page:1,papamark:1,paper:1,partial:0,perform:[0,1],perhap:0,perturb:0,pick:0,place:0,plan:0,pleas:0,poison:0,possibl:0,pre:0,preserv:0,prevent:0,primal:0,process:0,propag:0,proport:0,provid:0,question:0,rand:0,rather:0,realiti:0,realli:1,reason:0,rebuild:0,reduc:0,redund:0,relev:0,reli:0,remain:0,remaind:0,request:1,requir:[0,1],reshap:0,resourc:1,ret:0,retriev:[0,1],revel:1,revelslubinpapamarkou2016:1,rosenbrock:0,run:0,runtim:0,sacrif:0,safe:0,sai:0,scope:0,second:0,see:0,seen:0,select:0,sensit:0,set:[0,1],sever:0,shape:0,should:0,show:0,similar:0,sin:0,sinc:0,slpvectorizerpass:0,smaller:0,some:0,sourc:0,specif:0,speed:1,speedup:0,squar:0,src:0,sret:0,stabil:0,stabl:0,start:0,state:0,store:0,suit:0,support:0,swoop:0,system:0,take:[0,1],target:0,task:0,techniqu:[0,1],tensor:0,than:0,thi:[0,1],thing:0,thu:0,time:0,titl:1,top:0,tune:0,two:0,type:[0,1],unari:1,undefin:0,unexport:1,unstabl:0,upgrade:1,url:1,user:[0,1],usual:0,usualli:0,util:0,vari:1,variabl:0,vector_hessian:0,version:[0,1],via:0,wai:0,want:0,well:0,what:0,when:0,where:0,wherea:0,whether:0,which:0,wikipedia:1,without:0,word:0,work:[0,1],would:0,write:0,year:1,yield:0,you:[0,1],your:[0,1],zero:0},titles:["Advanced Usage Guide","ForwardDiff.jl"],titleterms:{"function":0,"public":1,"return":0,access:0,advanc:0,chunk:0,configur:0,fix:0,forwarddiff:1,guid:0,hessian:0,inf:0,issu:0,lower:0,nan:0,order:0,result:0,simd:0,size:0,usage:0,valu:0,vector:0}})
\ No newline at end of file
+Search.setIndex({envversion:49,filenames:["advanced_usage","basic_api","contributing","how_it_works","index","install","limitations","upgrade"],objects:{ForwardDiff:{GradientConfig:[1,0,1,""],HessianConfig:[1,0,1,""],JacobianConfig:[1,0,1,""],derivative:[1,0,1,""],gradient:[1,0,1,""],hessian:[1,0,1,""],jacobian:[1,0,1,""]}},objnames:{"0":["py","function","Python function"]},objtypes:{"0":"py:function"},terms:{"abstract":3,"break":6,"byte":0,"case":[0,1,2,3],"catch":1,"default":[0,1,7],"export":7,"final":0,"import":2,"long":6,"true":[0,7],"try":0,"void":0,"while":[0,1,4],_70842:0,_70852:0,abil:1,abl:0,about:[3,4,7],abov:[3,7],abs2:2,abstractarrai:[1,6],abstractconfig:0,abstractvector:3,accept:[2,6],accomplish:[0,2],accuraci:4,actual:[0,2],add:[0,2,5],addit:[0,2,3],advantag:4,advis:1,affect:0,after:2,algorithm:4,align:0,all:[0,1,2,6,7],alloc:[0,1],allow:[0,1,3],allresult:7,almost:0,alreadi:2,also:[0,1,6],amen:0,ani:[1,4,6],answer:[0,7],api:0,apply:2,appropri:[0,2],aren:[0,2],arg:[0,1],argument:[1,6],arithmet:0,around:2,arrai:[0,2,6],articl:4,arxiv:4,assert:0,assign:6,assum:3,atan2:2,attempt:0,author:[0,4],auto:2,auto_defined_unary_funcs:2,automat:[0,1,2,3,4,7],avoid:[6,7],awai:3,ax_mul_bx:6,bandwidth:0,base:[0,1,3],basic:0,been:2,befor:0,behavior:[0,3],behvaior:0,below:1,benchmark:0,best:0,better:0,between:7,binari:0,bitcast:0,bitcod:0,blas:6,block:0,both:[3,4],branch:2,brief:2,bring:0,buffer:[0,1],build:0,built:0,bundl:1,calcul:[0,3],call:[0,1,3,6],callabl:4,can:[0,1,2,3,4,6,7],cannot:[1,6],cast:0,caus:0,cbrt:2,central:3,certain:0,cfg10:0,cfg1:0,cfg4:0,cfg:[0,1],chain:3,chang:[2,7],check:[0,2],chunk_siz:7,cite:4,clearer:7,code:[0,2,3,6,7],code_llvm:0,collect:6,comment:6,common:4,compil:0,complet:0,compon:[0,3],compos:[0,3,6,7],comput:[0,1],conflict:7,confus:[1,3],conjunct:0,consid:2,constant:0,construct:[0,1],constructor:[0,1],consult:0,contain:[1,3],conveni:1,copi:0,correct:0,cost:0,could:0,coupl:3,cours:0,creat:4,cross:3,cumprod:0,current:[5,7],data:0,decreas:[0,1],defin:[0,2,7],definit:2,depend:[0,4],deriv:0,describ:0,descript:2,detail:[3,6,7],develop:4,diffbas:[0,1,7],differ:1,differenc:4,different:0,differenti:[1,2,3,4,6],diffresult:[0,1],dimens:0,dimension:[3,7],directori:0,disabl:0,discuss:3,divid:0,document:[0,4,6,7],doe:[0,1,2,3],doesn:[0,6],don:[2,3],done:2,doubl:0,down:2,downstream:0,drastic:1,dual:[0,2],dualtest:2,dure:0,dynam:0,each:[0,1,3],easi:[2,3],easiest:2,easili:[0,1],element:[0,1,2],elementari:3,eltyp:0,emit:0,enabl:[0,3,4],encourag:3,end:[0,3,7],enough:6,entir:[0,3],erron:0,error:[0,7],essenti:[0,2],evalu:[0,3,4],even:0,evenli:0,everyth:2,exactli:1,examin:3,exampl:[0,2,3,6,7],except:6,execut:[0,3],exist:[0,2,7],exp2:2,exp:2,expens:0,explic:0,explicit:6,explicitli:[0,7],explictli:1,expm1:2,extend:3,extern:6,extra:[0,1,3],extract:3,fadd:0,fairli:2,famili:1,familiar:3,featur:[0,3,7],feed:1,fell:0,few:[2,7],field:3,find:4,fine:0,finit:4,first:0,flag:0,flexibl:[1,7],float64:0,follow:[3,4],fork:2,form:[0,1],forward:[3,4],forwarddiff:0,free:0,from:[0,2,3,4],fulli:7,futur:0,gener:[0,2,3,4,6,7],getelementptr:0,github:[2,4],gradient:0,gradientconfig:[0,1,7],guess:0,have:[0,2,3,7],heavili:0,help:7,here:[0,2,3,6,7],hessianconfig:1,hessianresult:7,heurist:0,higher:[0,3,4],highli:1,hold:0,horribl:0,how:0,howev:[0,1],http:4,hyper:3,i32:0,i64:0,ident:0,imag:0,improv:2,inbound:0,incorrectli:0,increas:0,inform:[0,1,3,6],inher:0,inject:6,input:[0,1,3,6],insert:0,instal:5,installat:4,instanc:[1,3],instead:[0,1,6,7],instruct:0,int64:0,intern:7,introduc:3,inv:2,isinf:0,isnan:0,jacobian:0,jacobianconfig:1,job:3,journal:4,julia:[0,2,3,4,5,6],julia_:0,kei:3,kindli:4,know:0,larger:0,lead:7,learn:[0,3,4,7],length:[0,7],less:7,let:[0,3],librari:0,like:[1,3,6],likewis:0,limit:4,link:2,list:[2,6],llvm:0,load:0,local:0,locat:0,log10:2,log1p:2,log2:2,log:[0,2],longer:7,look:[2,3],lubin:4,machin:0,magic:7,mai:[1,2,6],maintain:7,major:0,make:[0,2,3,6],manag:5,mani:0,manual:[0,1],master:2,mathemat:3,matrix:[0,1],mean:6,memori:[0,1],mention:2,merg:2,method:[0,1,4],might:0,mirror:0,mode:[0,3,4],modifi:1,modul:0,more:[0,1,7],multipl:3,multipli:[0,3],multithread:4,multithreadconfig:7,must:[6,7],mutat:[0,7],name:2,namespac:7,nansafe_mode_enabled:0,nativ:4,natur:[3,6],need:[0,2],nest:3,newcom:2,noalia:0,non:[2,4,6],note:[0,1],noth:1,notic:0,now:7,ntupl:3,number:[0,2],numer:3,object:4,obvious:2,occur:7,offici:6,old:7,older:4,omit:1,once:2,one:0,onli:[0,6],oop:0,open:2,oper:0,optim:[0,2],org:4,origin:3,other:[0,1,3,4,7],otherwis:0,our:[0,3,7],out:[0,1,7],outlin:2,outperform:4,output:[0,1,3],output_length:7,over:4,overcom:0,overload:3,overtop:3,own:[0,7],packag:[0,2,3,5,7],page:4,papamark:4,paper:4,paramet:[1,3],pariti:7,part:2,partial:[0,3],pass:[1,2,3],perform:[0,1,4],performantli:3,perhap:0,perturb:[0,1,3],pick:[0,2],pkg:[2,5],place:[0,2,7],plan:0,pleas:0,plethora:3,poison:0,possibl:[0,1],pre:0,prealloc:1,preserv:0,prevent:[0,1,3],primal:0,process:[0,2,3],program:6,propag:[0,6],properti:3,propog:3,proport:0,provid:[0,1,2,3,7],qualifi:7,question:0,rand:0,rather:0,reader:3,real:[1,3,6],realiti:[0,3],realli:4,reason:0,rebuild:0,recomend:1,reduc:[0,1],redund:0,refer:[6,7],rehash:3,reimplement:7,relev:[0,2],reli:0,remain:0,remaind:0,replac:2,repositori:2,repres:3,request:4,requir:[0,1,2,4],reshap:[0,7],resolv:2,resourc:4,respect:1,ret:0,retriev:[0,4],reus:1,revel:4,revelslubinpapamarkou2016:4,revolv:2,roadblock:6,rosenbrock:0,rule:[2,3,6],run:[0,2,6],runtim:[0,6],sacrif:0,safe:0,sai:0,sake:1,same:2,scope:0,scroll:2,second:[0,1,3],section:[1,2],see:[0,1,2,6,7],seed:[1,3],seen:[0,6],select:[0,1],sensit:0,set:[0,4],sever:[0,1],shape:[0,1],share:2,should:[0,1,2],show:0,signatur:6,similar:[0,3],simpl:3,simpli:5,sin:[2,3],sinc:[0,7],singl:6,slpvectorizerpass:0,smaller:0,some:[0,2,6,7],sourc:0,special:2,specif:[0,1,6],specifi:1,speed:4,speedup:0,sqrt:2,squar:0,src:[0,2],sret:0,stabil:0,stabl:0,start:0,state:0,storag:[1,6],store:[0,1,3],struct:3,style:6,submit:2,substanti:2,subtyp:6,suit:[0,2],suitabl:2,support:[0,2,3,5,6,7],sure:2,swoop:0,symbol:2,system:0,tag:[0,3],take:[0,1,3,4,7],tan:2,target:[0,1,3,6],task:0,techniqu:[0,4,6],tensor:[0,7],term:3,test:2,than:0,thei:2,them:[1,3,7],thi:[0,1,2,3,4,6,7],thing:0,third:3,though:1,through:[2,6],thu:[0,3,6,7],time:0,titl:[2,4],top:0,transform:7,tune:0,tupl:1,tutori:2,two:[0,2,3],type:0,undefin:0,understand:3,unexport:4,unstabl:0,upgrade:4,url:4,usag:1,use:1,user:[0,1,3,4,6,7],usual:0,usualli:0,util:0,vari:4,variabl:0,vector_hessian:0,version:[0,4],via:0,wai:[0,2,3],want:0,well:[0,3,6],what:[0,2],whatev:2,when:[0,1,2],where:[0,1,3],wherea:0,whether:0,which:[0,1,2,3],wikipedia:4,wish:2,within:[3,6],without:[0,2,3],won:3,word:0,work:[0,1,2],workflow:2,would:[0,3],write:[0,2],written:6,year:4,yield:0,you:[0,1,2,4,7],your:[0,2,4,6],yourself:[1,2],zero:0},titles:["Advanced Usage Guide","Basic ForwardDiff API","How to Contribute","How ForwardDiff Works","ForwardDiff.jl","Installation and Version Requirements","Limitations of ForwardDiff","Upgrading from Older Versions of ForwardDiff"],titleterms:{"function":[0,2,7],"new":2,"public":4,"return":0,abstractconfig:1,access:0,adding:2,advanc:0,api:[1,3,7],basic:1,calculu:2,chunk:[0,7],configur:0,contribut:2,creat:7,deriv:1,differenti:7,dual:3,enabl:7,fix:0,forwarddiff:[1,2,3,4,6,7],from:7,gradient:1,guid:0,hessian:[0,1],higher:7,how:[2,3],implement:[2,3],inf:0,installat:5,issu:0,jacobian:1,limit:6,lower:[0,7],manual:2,multithread:7,nan:0,number:3,older:7,optimiz:2,order:[0,7],requir:5,result:[0,7],retriev:7,set:7,simd:0,size:[0,7],type:1,unari:2,unexport:7,upgrade:7,usage:0,valu:0,vector:0,version:[5,7],via:2,work:3}})
\ No newline at end of file
diff --git a/docs/upgrade.html b/docs/upgrade.html
index eb86c1f4..3965ac72 100644
--- a/docs/upgrade.html
+++ b/docs/upgrade.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Upgrading from Older Versions of ForwardDiff &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Upgrading from Older Versions of ForwardDiff &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="How ForwardDiff Works" href="how_it_works.html"/>
         <link rel="prev" title="Advanced Usage Guide" href="advanced_usage.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -154,11 +154,11 @@ <h1>Upgrading from Older Versions of ForwardDiff<a class="headerlink" href="#upg
 <h2>Unexported API Functions<a class="headerlink" href="#unexported-api-functions" title="Permalink to this headline">¶</a></h2>
 <p>In order to avoid namespace conflicts with other packages, <a class="reference external" href="basic_api.html">ForwardDiff&#8217;s API functions</a> are no longer exported by default. Thus, you must now fully qualify the
 functions to reference them:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="k">using</span> <span class="n">ForwardDiff</span>
 <span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="k">using</span> <span class="n">ForwardDiff</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
@@ -166,44 +166,50 @@ <h2>Unexported API Functions<a class="headerlink" href="#unexported-api-function
 </div>
 <div class="section" id="setting-chunk-size">
 <h2>Setting Chunk Size<a class="headerlink" href="#setting-chunk-size" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">;</span> <span class="n">chunk_size</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span>
 
-<span class="c"># old v0.2 style</span>
+<span class="c"># ForwardDiff v0.2</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">10</span><span class="p">}())</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; v0.4</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">10</span><span class="p">}(</span><span class="n">x</span><span class="p">))</span>
+
+<span class="c"># ForwardDiff v0.5 &amp; above</span>
+<span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Chunk</span><span class="p">{</span><span class="n">N</span><span class="p">}()))</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="enabling-multithreading">
 <h2>Enabling Multithreading<a class="headerlink" href="#enabling-multithreading" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1/v0.2 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1 &amp; v0.2</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">;</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">true</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; v0.4</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">MultithreadConfig</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
+
+<span class="c"># ForwardDiff v0.5 &amp; above</span>
+<span class="nb">error</span><span class="p">(</span><span class="s">&quot;ForwardDiff no longer supports internal multithreading.&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="retrieving-lower-order-results">
 <h2>Retrieving Lower-Order Results<a class="headerlink" href="#retrieving-lower-order-results" title="Permalink to this headline">¶</a></h2>
 <p>For more detail, see our documentation on <a class="reference external" href="advanced_usage.html#accessing-lower-order-results">retrieving lower-order results</a>.</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">answer</span><span class="p">,</span> <span class="n">results</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">AllResults</span><span class="p">)</span>
 <span class="n">v</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">value</span><span class="p">(</span><span class="n">results</span><span class="p">)</span>
 <span class="n">g</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">results</span><span class="p">)</span>
 <span class="n">h</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="c"># == answer</span>
 
-<span class="c"># old v0.2 style</span>
+<span class="c"># ForwardDiff v0.2</span>
 <span class="n">out</span> <span class="o">=</span> <span class="n">HessianResult</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 <span class="n">v</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">value</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 <span class="n">g</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 <span class="n">h</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; above</span>
 <span class="k">using</span> <span class="n">DiffBase</span>
 <span class="n">out</span> <span class="o">=</span> <span class="n">DiffBase</span><span class="o">.</span><span class="n">HessianResult</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
@@ -219,10 +225,10 @@ <h2>Higher-Order Differentiation<a class="headerlink" href="#higher-order-differ
 provides the <code class="docutils literal"><span class="pre">tensor</span></code> function. Instead, users can take higher-order/higher-dimensional
 derivatives by composing existing API functions. For example, here&#8217;s how to reimplement
 <code class="docutils literal"><span class="pre">tensor</span></code>:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="k">function</span><span class="nf"> tensor</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
     <span class="n">n</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
     <span class="n">out</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">y</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">y</span><span class="p">),</span> <span class="n">x</span><span class="p">)</span>
@@ -239,26 +245,26 @@ <h2>Creating Differentiation Functions<a class="headerlink" href="#creating-diff
 functions. Instead, users explicitly define their own functions using ForwardDiff&#8217;s API.
 This leads to clearer code, less &#8220;magic&#8221;, and more flexibility. To learn how about
 ForwardDiff&#8217;s API functions, see <a class="reference external" href="basic_api.html">our API documentation</a>.</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">derivative</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">derivative</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="c"># in-place gradient function of f</span>
 <span class="n">gf!</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">mutates</span> <span class="o">=</span> <span class="n">true</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">gf!</span> <span class="o">=</span> <span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="c"># in-place Jacobian function of f!(y, x):</span>
 <span class="n">jf!</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">f!</span><span class="p">,</span> <span class="n">mutates</span> <span class="o">=</span> <span class="n">true</span><span class="p">,</span> <span class="n">output_length</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">jf!</span> <span class="o">=</span> <span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f!</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
@@ -306,7 +312,7 @@ <h2>Creating Differentiation Functions<a class="headerlink" href="#creating-diff
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true

From 200846c3df9191ed2613b1858e6e91f9bc4bd682 Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Thu, 13 Apr 2017 12:37:08 -0400
Subject: [PATCH 26/26] remove stack-allocated/low-dimensional
 gradient/Jacobian routines

---
 src/derivative.jl      | 49 -------------------------------
 test/DerivativeTest.jl | 66 ------------------------------------------
 2 files changed, 115 deletions(-)

diff --git a/src/derivative.jl b/src/derivative.jl
index abb4b7b3..4d8449a5 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -7,30 +7,12 @@
     return extract_derivative(f(Dual{T}(x, one(x))))
 end
 
-@generated function derivative(f::F, x::NTuple{N,Real}) where {F,N}
-    args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
-    return quote
-        $(Expr(:meta, :inline))
-        T = typeof(Tag(F, typeof(x)))
-        extract_derivative(f($(args...)), Chunk{N}())
-    end
-end
-
 @inline function derivative!(out, f::F, x::R) where {F,R<:Real}
     T = typeof(Tag(F, typeof(x)))
     extract_derivative!(out, f(Dual{T}(x, one(x))))
     return out
 end
 
-@generated function derivative!(out::NTuple{N,Any}, f::F, x::NTuple{N,Real}) where {F,N}
-    args = [:(Dual{T}(x[$i], Val{N}, Val{$i})) for i in 1:N]
-    return quote
-        $(Expr(:meta, :inline))
-        T = typeof(Tag(F, typeof(x)))
-        extract_derivative!(out, f($(args...)))
-    end
-end
-
 #####################
 # result extraction #
 #####################
@@ -42,44 +24,13 @@ end
 @inline extract_derivative(y::Real) = zero(y)
 @inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
 
-@generated function extract_derivative(y::Dual{T,V,N}, ::Chunk{N}) where {T,V,N}
-    return quote
-        $(Expr(:meta, :inline))
-        $(Expr(:tuple, [:(partials(y, $i)) for i in 1:N]...))
-    end
-end
-
-@generated function extract_derivative(y::AbstractArray, ::Chunk{N}) where {N}
-    return quote
-        $(Expr(:meta, :inline))
-        V = valtype(eltype(y))
-        out = $(Expr(:tuple, [:(similar(y, V)) for i in 1:N]...))
-        return extract_derivative!(out, y)
-    end
-end
 # mutating #
 #----------#
 
-@generated function extract_derivative!(out::NTuple{N,Any}, y) where {N}
-    return quote
-        $(Expr(:meta, :inline))
-        $(Expr(:block, [:(extract_derivative!(out[$i], y, $i)) for i in 1:N]...))
-        return out
-    end
-end
-
 extract_derivative!(out::AbstractArray, y::AbstractArray) = map!(extract_derivative, out, y)
-extract_derivative!(out::AbstractArray, y::AbstractArray, p) = map!(x -> partials(x, p), out, y)
-extract_derivative!(out::Union{AbstractArray,Base.Ref}, y::Dual, p) = (out[] = partials(y, p); out)
 
 function extract_derivative!(out::DiffResult, y)
     DiffBase.value!(value, out, y)
     DiffBase.derivative!(extract_derivative, out, y)
     return out
 end
-
-function extract_derivative!(out::DiffResult, y, p)
-    DiffBase.value!(value, out, y)
-    DiffBase.derivative!(x -> partials(x, p), out, y)
-    return out
-end
diff --git a/test/DerivativeTest.jl b/test/DerivativeTest.jl
index 901a66cb..85300ba3 100644
--- a/test/DerivativeTest.jl
+++ b/test/DerivativeTest.jl
@@ -45,70 +45,4 @@ for f in DiffBase.NUMBER_TO_ARRAY_FUNCS
     @test isapprox(DiffBase.derivative(out), d)
 end
 
-##################
-# n-ary versions #
-##################
-
-# (::Real, ::Real) -> ::Real #
-#----------------------------#
-
-f(a, b) = sin(a) * tan(b)
-
-a, b = rand(2)
-
-valf = f(a, b)
-∇f = ForwardDiff.gradient(x -> f(x...), [a, b])
-
-@test collect(ForwardDiff.derivative(f, (a, b))) == ∇f
-
-out = (DiffBase.DiffResult(zero(a), zero(a)), DiffBase.DiffResult(zero(b), zero(b)))
-ForwardDiff.derivative!(out, f, (a, b))
-@test DiffBase.value(out[1]) == DiffBase.value(out[2]) == valf
-@test [DiffBase.derivative(out[1]), DiffBase.derivative(out[2])] == ∇f
-
-out = (Base.RefValue(zero(a)), DiffBase.DiffResult(zero(b), zero(b)))
-ForwardDiff.derivative!(out, f, (a, b))
-@test DiffBase.value(out[2]) == valf
-@test [out[1][], DiffBase.derivative(out[2])] == ∇f
-
-out = (DiffBase.DiffResult(zero(a), zero(a)), [zero(b)])
-ForwardDiff.derivative!(out, f, (a, b))
-@test DiffBase.value(out[1]) == valf
-@test [DiffBase.derivative(out[1]), out[2][]] == ∇f
-
-out = (Base.RefValue(zero(a)), [zero(b)])
-ForwardDiff.derivative!(out, f, (a, b))
-@test [out[1][], out[2][]] == ∇f
-
-# (::Real, ::Real) -> ::Vector #
-#------------------------------#
-
-g(a, b) = cos.([f(a, b), f(b, a)]) .+ b .- a
-
-a, b = rand(2)
-
-valg = g(a, b)
-Jg = ForwardDiff.jacobian(x -> g(x...), [a, b])
-
-@test hcat(ForwardDiff.derivative(g, (a, b))...) == Jg
-
-out = (DiffBase.DiffResult(similar(valg), similar(valg)), DiffBase.DiffResult(similar(valg), similar(valg)))
-ForwardDiff.derivative!(out, g, (a, b))
-@test DiffBase.value(out[1]) == DiffBase.value(out[2]) == valg
-@test hcat(DiffBase.derivative(out[1]), DiffBase.derivative(out[2])) == Jg
-
-out = (similar(valg), DiffBase.DiffResult(similar(valg), similar(valg)))
-ForwardDiff.derivative!(out, g, (a, b))
-@test DiffBase.value(out[2]) == valg
-@test hcat(out[1], DiffBase.derivative(out[2])) == Jg
-
-out = (DiffBase.DiffResult(similar(valg), similar(valg)), similar(valg))
-ForwardDiff.derivative!(out, g, (a, b))
-@test DiffBase.value(out[1]) == valg
-@test hcat(DiffBase.derivative(out[1]), out[2]) == Jg
-
-out = (similar(valg), similar(valg))
-ForwardDiff.derivative!(out, g, (a, b))
-@test hcat(out[1], out[2]) == Jg
-
 end # module