diff --git a/.travis.yml b/.travis.yml
index 8bf16fe6..69b0bf6b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
 language: julia
 julia:
-    - 0.5
     - 0.6
     - nightly
 notifications:
diff --git a/REQUIRE b/REQUIRE
index 3809d461..7f2f5e18 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,6 +1,5 @@
-julia 0.5
+julia 0.6-
 DiffBase 0.0.3
-Compat 0.19.0
 Calculus 0.2.0
 NaNMath 0.2.2
 SpecialFunctions 0.1.0
diff --git a/benchmark/ForwardDiffBenchmarks.jl b/benchmarks/benchmarks.jl
similarity index 91%
rename from benchmark/ForwardDiffBenchmarks.jl
rename to benchmarks/benchmarks.jl
index 88312f5c..75523594 100644
--- a/benchmark/ForwardDiffBenchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,5 +1,3 @@
-module ForwardDiffBenchmarks
-
 using ForwardDiff, DiffBase
 using BenchmarkTools
 
@@ -39,11 +37,11 @@ for f in (DiffBase.VECTOR_TO_NUMBER_FUNCS..., DiffBase.MATRIX_TO_NUMBER_FUNCS...
         fval[length(x)] = @benchmarkable $(f)($x)
 
         gout = DiffBase.DiffResult(y, similar(x, typeof(y)))
-        gcfg = ForwardDiff.Config(x)
+        gcfg = ForwardDiff.GradientConfig(nothing, x)
         fgrad[length(x)] = @benchmarkable ForwardDiff.gradient!($gout, $f, $x, $gcfg)
 
         hout = DiffBase.DiffResult(y, similar(x, typeof(y)), similar(x, typeof(y), length(x), length(x)))
-        hcfg = ForwardDiff.HessianConfig(hout, x)
+        hcfg = ForwardDiff.HessianConfig(nothing, hout, x)
         fhess[length(x)] = @benchmarkable ForwardDiff.hessian!($hout, $f, $x, $hcfg)
     end
 end
@@ -56,9 +54,7 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
         fval[length(x)] = @benchmarkable $(f)($x)
 
         out = DiffBase.JacobianResult(y, x)
-        cfg = ForwardDiff.Config(x)
-        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $x, $cfg)
+        cfg = ForwardDiff.JacobianConfig(nothing, y, x)
+        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $y, $x, $cfg)
     end
 end
-
-end # module
diff --git a/benchmark/cpp/.gitignore b/benchmarks/cpp/.gitignore
similarity index 100%
rename from benchmark/cpp/.gitignore
rename to benchmarks/cpp/.gitignore
diff --git a/benchmark/cpp/Makefile b/benchmarks/cpp/Makefile
similarity index 100%
rename from benchmark/cpp/Makefile
rename to benchmarks/cpp/Makefile
diff --git a/benchmark/cpp/benchmarks.cpp b/benchmarks/cpp/benchmarks.cpp
similarity index 100%
rename from benchmark/cpp/benchmarks.cpp
rename to benchmarks/cpp/benchmarks.cpp
diff --git a/benchmark/cpp/benchmarks.h b/benchmarks/cpp/benchmarks.h
similarity index 100%
rename from benchmark/cpp/benchmarks.h
rename to benchmarks/cpp/benchmarks.h
diff --git a/benchmark/cpp/dual1.cpp b/benchmarks/cpp/dual1.cpp
similarity index 100%
rename from benchmark/cpp/dual1.cpp
rename to benchmarks/cpp/dual1.cpp
diff --git a/benchmark/cpp/dual2.cpp b/benchmarks/cpp/dual2.cpp
similarity index 100%
rename from benchmark/cpp/dual2.cpp
rename to benchmarks/cpp/dual2.cpp
diff --git a/benchmark/cpp/dual3.cpp b/benchmarks/cpp/dual3.cpp
similarity index 100%
rename from benchmark/cpp/dual3.cpp
rename to benchmarks/cpp/dual3.cpp
diff --git a/benchmark/cpp/dual4.cpp b/benchmarks/cpp/dual4.cpp
similarity index 100%
rename from benchmark/cpp/dual4.cpp
rename to benchmarks/cpp/dual4.cpp
diff --git a/benchmark/cpp/dual5.cpp b/benchmarks/cpp/dual5.cpp
similarity index 100%
rename from benchmark/cpp/dual5.cpp
rename to benchmarks/cpp/dual5.cpp
diff --git a/benchmark/py/algopy_benchmarks.py b/benchmarks/py/algopy_benchmarks.py
similarity index 100%
rename from benchmark/py/algopy_benchmarks.py
rename to benchmarks/py/algopy_benchmarks.py
diff --git a/benchmark/py/autograd_benchmarks.py b/benchmarks/py/autograd_benchmarks.py
similarity index 100%
rename from benchmark/py/autograd_benchmarks.py
rename to benchmarks/py/autograd_benchmarks.py
diff --git a/docs/_rst/source/advanced_usage.rst b/docs/_rst/source/advanced_usage.rst
index 576cc4ee..df0c887d 100644
--- a/docs/_rst/source/advanced_usage.rst
+++ b/docs/_rst/source/advanced_usage.rst
@@ -9,7 +9,7 @@ Accessing Lower-Order Results
 
 Let's say you want to calculate the value, gradient, and Hessian of some function ``f`` at
 an input ``x``. You could execute ``f(x)``, ``ForwardDiff.gradient(f, x)`` and
-``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to  accomplish
+``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to accomplish
 this task!**
 
 In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
@@ -37,7 +37,7 @@ For example:
 
 .. code-block:: julia
 
-    julia> import ForwardDiff
+    julia> using ForwardDiff: GradientConfig, Chunk, gradient!
 
     # let's use a Rosenbrock function as our target function
     julia> function rosenbrock(x)
@@ -58,25 +58,25 @@ For example:
     julia> out = similar(x);
 
     # construct GradientConfig with chunk size of 1
-    julia> cfg1 = ForwardDiff.GradientConfig{1}(x);
+    julia> cfg1 = GradientConfig(rosenbrock, x, Chunk{1}());
 
     # construct GradientConfig with chunk size of 4
-    julia> cfg4 = ForwardDiff.GradientConfig{4}(x);
+    julia> cfg4 = GradientConfig(rosenbrock, x, Chunk{4}());
 
     # construct GradientConfig with chunk size of 10
-    julia> cfg10 = ForwardDiff.GradientConfig{10}(x);
+    julia> cfg10 = GradientConfig(rosenbrock, x, Chunk{10}());
 
     # (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg1);
-      0.408305 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg1);
+      0.775139 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg4);
-      0.295764 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg4);
+      0.386459 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg10);
-      0.267396 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg10);
+      0.282529 seconds (4 allocations: 160 bytes)
 
 If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:
@@ -85,10 +85,10 @@ based on your input vector:
 
     # The GradientConfig constructor will automatically select a
     # chunk size in one is not explicitly provided
-    julia> cfg = ForwardDiff.GradientConfig(x);
+    julia> cfg = ForwardDiff.GradientConfig(rosenbrock, x);
 
     julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg);
-    0.266920 seconds (4 allocations: 160 bytes)
+      0.281853 seconds (4 allocations: 160 bytes)
 
 If your input dimension is a constant, you should explicitly select a chunk size rather than
 relying on ForwardDiff's heuristic. There are two reasons for this. The first is that
@@ -130,8 +130,8 @@ aren't sensitive to the input and thus cause ForwardDiff to incorrectly return `
 
     # the dual number's perturbation component is zero, so this
     # variable should not propagate derivative information
-    julia> log(ForwardDiff.Dual(0.0, 0.0))
-    Dual(-Inf,NaN) # oops, this NaN should be 0.0
+    julia> log(ForwardDiff.Dual{:tag}(0.0, 0.0))
+    Dual{:tag}(-Inf,NaN) # oops, this NaN should be 0.0
 
 Here, ForwardDiff computes the derivative of ``log(0.0)`` as ``NaN`` and then propagates
 this derivative by multiplying it by the perturbation component. Usually, ForwardDiff can
@@ -153,7 +153,6 @@ In the future, we plan on allowing users and downstream library authors to dynam
 enable ``NaN``-safe mode via the ``AbstractConfig`` API (see `the relevant issue
 <https://github.com/JuliaDiff/ForwardDiff.jl/issues/181>`_).
 
-
 Hessian of a vector-valued function
 -----------------------------------
 
@@ -163,17 +162,17 @@ For example:
 
 .. code-block:: julia
 
-    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(sin, x), [1,2,3])
-    9×3 Array{Float64,2}:
-     -0.841471   0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.909297  -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.14112
+    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(cumprod, x), [1,2,3])
+    9×3 Array{Int64,2}:
+     0  0  0
+     0  1  0
+     0  3  2
+     0  0  0
+     1  0  0
+     3  0  1
+     0  0  0
+     0  0  0
+     2  1  0
 
 Since this functionality is composed from ForwardDiff's existing API rather than built into
 it, you're free to construct a ``vector_hessian`` function which suits your needs. For
@@ -190,22 +189,22 @@ expensive operation):
        end
     vector_hessian (generic function with 1 method)
 
-    julia> vector_hessian(sin, [1, 2, 3])
-    3×3×3 Array{Float64,3}:
+    julia> vector_hessian(cumprod, [1, 2, 3])
+    3×3×3 Array{Int64,3}:
     [:, :, 1] =
-     -0.841471   0.0   0.0
-     -0.0       -0.0  -0.0
-     -0.0       -0.0  -0.0
+     0  0  0
+     0  1  0
+     0  3  2
 
     [:, :, 2] =
-      0.0   0.0        0.0
-     -0.0  -0.909297  -0.0
-     -0.0  -0.0       -0.0
+     0  0  0
+     1  0  0
+     3  0  1
 
     [:, :, 3] =
-      0.0   0.0   0.0
-     -0.0  -0.0  -0.0
-     -0.0  -0.0  -0.14112
+     0  0  0
+     0  0  0
+     2  1  0
 
 Likewise, you could write a version of ``vector_hessian`` which supports functions of the
 form ``f!(y, x)``, or perhaps an in-place Jacobian with ``ForwardDiff.jacobian!``.
@@ -232,10 +231,10 @@ SIMD instructions (i.e. not starting Julia with ``-O3``):
     julia> using ForwardDiff: Dual
 
     julia> a = Dual(1., 2., 3., 4.)
-    Dual(1.0,2.0,3.0,4.0)
+    Dual{Void}(1.0,2.0,3.0,4.0)
 
     julia> b = Dual(5., 6., 7., 8.)
-    Dual(5.0,6.0,7.0,8.0)
+    Dual{Void}(5.0,6.0,7.0,8.0)
 
     julia> @code_llvm a + b
 
diff --git a/docs/_rst/source/basic_api.rst b/docs/_rst/source/basic_api.rst
index 28eb6392..96115abd 100644
--- a/docs/_rst/source/basic_api.rst
+++ b/docs/_rst/source/basic_api.rst
@@ -4,22 +4,26 @@ Basic ForwardDiff API
 Derivatives of :math:`f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}`
 --------------------------------------------------------------------------------------------------
 
-Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real)::Real`` and ``f(::Real)::AbstractArray``.
+Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real...)::Real`` and ``f(::Real...)::AbstractArray``.
 
 .. function:: ForwardDiff.derivative!(out, f, x)
 
-    Compute :math:`f'(x)`, storing the output in ``out``.
+    Compute :math:`f'(x)`, storing the output in ``out``. If ``x`` is a ``Tuple``,
+    then ``f`` will be called as ``f(x...)`` and the derivatives with respect to
+    each element in `x` will be stored in the respective element of ``out`` (which
+    should also be a ``Tuple``).
 
 .. function:: ForwardDiff.derivative(f, x)
 
-    Compute and return :math:`f'(x)`.
+    Compute and return :math:`f'(x)`. If ``x`` is a ``Tuple``, ``f`` will be
+    called as ``f(x...)``, and a ``Tuple`` of derivatives will be returned.
 
 Gradients of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k} \to \mathbb{R}`
 ------------------------------------------------------------------------------------------------
 
 Use ``ForwardDiff.gradient`` to differentiate functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))
+.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))
 
     Compute :math:`\nabla f(\vec{x})`, storing the output in ``out``. It is highly advised
     to preallocate ``cfg`` yourself (see the `AbstractConfig
@@ -34,23 +38,23 @@ Jacobians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.jacobian`` to differentiate functions of the form ``f(::AbstractArray)::AbstractArray``.
 
-.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be called as
     ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``. The output
     matrix is stored in ``out``.
 
-.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`.
 
-.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be
     called as ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``.
@@ -60,13 +64,13 @@ Hessians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.hessian`` to perform second-order differentiation on functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute :math:`\mathbf{H}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute and return :math:`\mathbf{H}(f)(\vec{x})`.
 
@@ -77,66 +81,63 @@ For the sake of convenience and performance, all "extra" information used by For
 API methods is bundled up in the ``ForwardDiff.AbstractConfig`` family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff's  API
 methods, such as `chunk size <advanced_usage.html#configuring-chunk-size>`_, work buffers,
-multithreading configurations, and perturbation seed configurations.
+and perturbation seed configurations.
 
 ForwardDiff's basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.
 
-Note that for all constructors below, the chunk size ``N`` may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to `specify the chunk size manually when possible
+Note that for all constructors below, the chunk size ``N`` may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to `specify the chunk size manually when possible
 <advanced_usage.html#configuring-chunk-size>`_.
 
-.. function:: ForwardDiff.GradientConfig{N}(x)
+Note also that configurations constructed for a specific function ``f`` cannot
+be reused to differentiate other functions (though can be reused to differentiate
+``f`` at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass ``nothing`` as the function argument.
+While this is more flexible, this decreases ForwardDiff's ability to catch
+and prevent `perturbation confusion`_.
 
-    Construct a ``GradientConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``GradientConfig`` instance contains all the work buffers required
-    by ForwardDiff's gradient/Jacobian methods. If taking the Jacobian of a target function
-    with the form ``f!(y, x)``, use the constructor ``ForwardDiff.GradientConfig{N}(y, x)``
-    instead.
+.. function:: ForwardDiff.GradientConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
+
+    Construct a ``GradientConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``GradientConfig``
+    instance contains all the work buffers required by ForwardDiff's gradient
+    methods.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.JacobianConfig{N}(x)
+.. function:: ForwardDiff.JacobianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Exactly like ``ForwardDiff.GradientConfig{N}(x)``, but returns a `JacobianConfig`
-    instead.
+    Exactly like the ``GradientConfig`` constructor, but returns a ``JacobianConfig`` instead.
 
-.. function:: ForwardDiff.JacobianConfig{N}(y, x)
+.. function:: ForwardDiff.JacobianConfig(f!, y, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``JacobianConfig`` instance based on the type and shape of the output vector
-    ``y`` and the input vector ``x``. The returned ``JacobianConfig`` instance contains all
-    the work buffers required by  ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` with a
-    target function of the form ``f!(y, x)``.
+    Construct a ``JacobianConfig`` instance based on the type of ``f!``, and the
+    types/shapes of the output vector ``y`` and the input vector ``x``. The
+    returned ``JacobianConfig`` instance contains all the work buffers required
+    by ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` when the target
+    function takes the form ``f!(y, x)``.
 
     This constructor does not store/modify ``y`` or ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(x)
+.. function:: ForwardDiff.HessianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``HessianConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``HessianConfig`` instance contains all the work buffers required
-    by ForwardDiff's Hessian methods. If using
-    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``, use the constructor
-    ``ForwardDiff.HessianConfig{N}(out, x)`` instead.
+    Construct a ``HessianConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``HessianConfig`` instance contains
+    all the work buffers required by ForwardDiff's Hessian methods. If using
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, f, x)``, use the constructor
+    ``ForwardDiff.HessianConfig(f, out, x, chunk)`` instead.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)
+.. function:: ForwardDiff.HessianConfig(f, out::DiffBase.DiffResult, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct an ``HessianConfig`` instance based on the type and shape of the storage in
-    ``out`` and the input vector ``x``. The returned ``HessianConfig`` instance contains
-    all the work buffers required by ``ForwardDiff.hessian!(out::DiffBase.DiffResult,
-    args...)``.
+    Construct an ``HessianConfig`` instance based on the type of ``f``, types/storage
+    in ``out``, and type/shape of the input vector ``x``. The returned ``HessianConfig``
+    instance contains all the work buffers required by
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``.
 
     This constructor does not store/modify ``out`` or ``x``.
 
-.. function:: ForwardDiff.MultithreadConfig(cfg::AbstractConfig)
-
-    Wrap the given ``cfg`` in a ``MultithreadConfig`` instance, which can then be passed to
-    gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-    methods do not yet support multithreading.
-
-    Note that multithreaded ForwardDiff API methods will attempt to use all available
-    threads. In the future, once Julia exposes more fine-grained threading primitives,
-    a ``MultithreadConfig`` constructor may be added which takes in a user-provided subset
-    of thread IDs instead of using all available threads.
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
diff --git a/docs/_rst/source/conf.py b/docs/_rst/source/conf.py
index 0f4ba663..6e45536b 100644
--- a/docs/_rst/source/conf.py
+++ b/docs/_rst/source/conf.py
@@ -57,9 +57,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.2'
+version = '0.5'
 # The full version, including alpha/beta/rc tags.
-release = '0.2.3'
+release = '0.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/_rst/source/contributing.rst b/docs/_rst/source/contributing.rst
index cb669228..7de30bdc 100644
--- a/docs/_rst/source/contributing.rst
+++ b/docs/_rst/source/contributing.rst
@@ -40,9 +40,7 @@ To see a list of functions to pick from, look at ``ForwardDiff.AUTO_DEFINED_UNAR
      ⋮
 
 Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to ``src/dual.jl``, scroll down to the ``Special Cases`` section, and
-look at the functions under ``Manually Optimized`` (further optimizations to these functions
-are always welcome, if you can come up with something clever).
+already been done, go to ``src/dual.jl`` and scroll down to the ``Special Cases`` section.
 
 The functions in ``ForwardDiff.AUTO_DEFINED_UNARY_FUNCS`` are automatically tested as part
 of ForwardDiff's test suite, so you don't need to write tests yourself. You can test your
diff --git a/docs/_rst/source/how_it_works.rst b/docs/_rst/source/how_it_works.rst
index 20a96e49..dd8114cb 100644
--- a/docs/_rst/source/how_it_works.rst
+++ b/docs/_rst/source/how_it_works.rst
@@ -9,25 +9,26 @@ Julia. There are two key components of this implementation: the ``Dual`` type, a
 Dual Number Implementation
 --------------------------
 
-Partial derivatives are stored in the ``Partials{N,T}`` type:
+Partial derivatives are stored in the ``Partials{N,V}`` type:
 
 .. code-block:: julia
 
-    immutable Partials{N,T}
-        values::NTuple{N,T}
+    struct Partials{N,V} <: AbstractVector{V}
+        values::NTuple{N,V}
     end
 
-Overtop of this container type, ForwardDiff implements the ``Dual{N,T}`` type:
+Overtop of this container type, ForwardDiff implements the ``Dual{T,V,N}`` type:
 
 .. code-block:: julia
 
-    immutable Dual{N,T<:Real} <: Real
-        value::T
-        partials::Partials{N,T}
+    struct Dual{T,V<:Real,N} <: Real
+        value::V
+        partials::Partials{N,V}
     end
 
-This type represents an ``N``-dimensional `dual number`_ with the following mathematical
-behavior:
+This type represents an ``N``-dimensional `dual number`_ coupled with a tag
+parameter `T` in order to prevent `perturbation confusion`_. This dual number
+type is implemented to have the following mathematical behavior:
 
 .. math::
 
@@ -44,22 +45,23 @@ can be overloaded on ``Dual`` like so:
 
 .. code-block:: julia
 
-    Base.sin(d::Dual) = Dual(sin(value(d)), cos(value(d)) * partials(d))
+    Base.sin(d::Dual{T}) where {T} = Dual{T}(sin(value(d)), cos(value(d)) * partials(d))
 
 If we assume that a general function ``f`` is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a ``Dual`` number and looking at the output.
 
-We won't dicuss higher-order differentiation in detail, but the reader is encouraged to
+We won't discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about `hyper-dual numbers`_, which extend dual numbers to higher orders by introducing
 extra :math:`\epsilon` terms that can cross-multiply. ForwardDiff's ``Dual`` number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the ``Dual`` type to nest within each other. For example, a second-order
-hyper-dual number has the type ``Dual{N,Dual{N,T}}``, a third-order hyper-dual number has
-the type ``Dual{N,Dual{N,Dual{N,T}}}``, and so on.
+hyper-dual number has the type ``Dual{T,Dual{S,V,M},N}``, a third-order hyper-dual number has
+the type ``Dual{T,Dual{S,Dual{R,V,K},M},N}``, and so on.
 
 .. _`dual number`: https://en.wikipedia.org/wiki/Dual_number
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`hyper-dual numbers`: https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf
 
 ForwardDiff's API
diff --git a/docs/_rst/source/install.rst b/docs/_rst/source/install.rst
index bb7668d6..4f3573b6 100644
--- a/docs/_rst/source/install.rst
+++ b/docs/_rst/source/install.rst
@@ -7,4 +7,4 @@ To install ForwardDiff, simply use Julia's package manager:
 
     julia> Pkg.add("ForwardDiff")
 
-The current version of ForwardDiff supports Julia v0.4 and v0.5.
+The current version of ForwardDiff supports Julia v0.6.
diff --git a/docs/_rst/source/limitations.rst b/docs/_rst/source/limitations.rst
index 9862d12b..65ecb75a 100644
--- a/docs/_rst/source/limitations.rst
+++ b/docs/_rst/source/limitations.rst
@@ -12,10 +12,7 @@ function being differentiated):
 
 - **The target function must be written generically enough to accept numbers of type ``T<:Real`` as input  (or arrays of these numbers).** The function doesn't require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see `this comment`_ for an example).
 
-- **Nested differentiation of closures is dangerous.** Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See `the relevant issue`_ for details.
-
 - **The types of array inputs must be subtypes of** ``AbstractArray`` **.** Non-``AbstractArray`` array-like types are not officially supported.
 
 .. _`this comment`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790
-.. _`the relevant issue`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`this file`: https://github.com/JuliaDiff/ForwardDiff.jl/blob/master/src/cache.jl
diff --git a/docs/_rst/source/upgrade.rst b/docs/_rst/source/upgrade.rst
index 5fc2ef82..a0afb6af 100644
--- a/docs/_rst/source/upgrade.rst
+++ b/docs/_rst/source/upgrade.rst
@@ -14,11 +14,11 @@ functions to reference them:
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     using ForwardDiff
     hessian(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     using ForwardDiff
     ForwardDiff.hessian(f, x)
 
@@ -27,26 +27,32 @@ Setting Chunk Size
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.gradient(f, x; chunk_size = 10)
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     ForwardDiff.gradient(f, x, Chunk{10}())
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{10}(x))
 
+    # ForwardDiff v0.5 & above
+    ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig(f, x ForwardDiff.Chunk{N}()))
+
 Enabling Multithreading
 -----------------------
 
 .. code-block:: julia
 
-    # old v0.1/v0.2 style
+    # ForwardDiff v0.1 & v0.2
     ForwardDiff.gradient(f, x; multithread = true)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig(x)))
 
+    # ForwardDiff v0.5 & above
+    error("ForwardDiff no longer supports internal multithreading.")
+
 Retrieving Lower-Order Results
 ------------------------------
 
@@ -55,20 +61,20 @@ For more detail, see our documentation on `retrieving lower-order results
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     answer, results = ForwardDiff.hessian(f, x, AllResults)
     v = ForwardDiff.value(results)
     g = ForwardDiff.gradient(results)
     h = ForwardDiff.hessian(results) # == answer
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     out = HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
     v = ForwardDiff.value(out)
     g = ForwardDiff.gradient(out)
     h = ForwardDiff.hessian(out)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & above
     using DiffBase
     out = DiffBase.HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
@@ -86,10 +92,10 @@ derivatives by composing existing API functions. For example, here's how to reim
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.tensor(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     function tensor(f, x)
         n = length(x)
         out = ForwardDiff.jacobian(y -> ForwardDiff.hessian(f, y), x)
@@ -108,26 +114,26 @@ ForwardDiff's API functions, see `our API documentation <basic_api.html>`_.
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     df = ForwardDiff.derivative(f)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     df = x -> ForwardDiff.derivative(f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place gradient function of f
     gf! = ForwardDiff.gradient(f, mutates = true)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     gf! = (out, x) -> ForwardDiff.gradient!(out, f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place Jacobian function of f!(y, x):
     jf! = ForwardDiff.jacobian(f!, mutates = true, output_length = length(y))
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     jf! = (out, y, x) -> ForwardDiff.jacobian!(out, f!, y, x)
diff --git a/docs/_sources/advanced_usage.txt b/docs/_sources/advanced_usage.txt
index 576cc4ee..df0c887d 100644
--- a/docs/_sources/advanced_usage.txt
+++ b/docs/_sources/advanced_usage.txt
@@ -9,7 +9,7 @@ Accessing Lower-Order Results
 
 Let's say you want to calculate the value, gradient, and Hessian of some function ``f`` at
 an input ``x``. You could execute ``f(x)``, ``ForwardDiff.gradient(f, x)`` and
-``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to  accomplish
+``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to accomplish
 this task!**
 
 In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
@@ -37,7 +37,7 @@ For example:
 
 .. code-block:: julia
 
-    julia> import ForwardDiff
+    julia> using ForwardDiff: GradientConfig, Chunk, gradient!
 
     # let's use a Rosenbrock function as our target function
     julia> function rosenbrock(x)
@@ -58,25 +58,25 @@ For example:
     julia> out = similar(x);
 
     # construct GradientConfig with chunk size of 1
-    julia> cfg1 = ForwardDiff.GradientConfig{1}(x);
+    julia> cfg1 = GradientConfig(rosenbrock, x, Chunk{1}());
 
     # construct GradientConfig with chunk size of 4
-    julia> cfg4 = ForwardDiff.GradientConfig{4}(x);
+    julia> cfg4 = GradientConfig(rosenbrock, x, Chunk{4}());
 
     # construct GradientConfig with chunk size of 10
-    julia> cfg10 = ForwardDiff.GradientConfig{10}(x);
+    julia> cfg10 = GradientConfig(rosenbrock, x, Chunk{10}());
 
     # (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg1);
-      0.408305 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg1);
+      0.775139 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg4);
-      0.295764 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg4);
+      0.386459 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg10);
-      0.267396 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg10);
+      0.282529 seconds (4 allocations: 160 bytes)
 
 If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:
@@ -85,10 +85,10 @@ based on your input vector:
 
     # The GradientConfig constructor will automatically select a
     # chunk size in one is not explicitly provided
-    julia> cfg = ForwardDiff.GradientConfig(x);
+    julia> cfg = ForwardDiff.GradientConfig(rosenbrock, x);
 
     julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg);
-    0.266920 seconds (4 allocations: 160 bytes)
+      0.281853 seconds (4 allocations: 160 bytes)
 
 If your input dimension is a constant, you should explicitly select a chunk size rather than
 relying on ForwardDiff's heuristic. There are two reasons for this. The first is that
@@ -130,8 +130,8 @@ aren't sensitive to the input and thus cause ForwardDiff to incorrectly return `
 
     # the dual number's perturbation component is zero, so this
     # variable should not propagate derivative information
-    julia> log(ForwardDiff.Dual(0.0, 0.0))
-    Dual(-Inf,NaN) # oops, this NaN should be 0.0
+    julia> log(ForwardDiff.Dual{:tag}(0.0, 0.0))
+    Dual{:tag}(-Inf,NaN) # oops, this NaN should be 0.0
 
 Here, ForwardDiff computes the derivative of ``log(0.0)`` as ``NaN`` and then propagates
 this derivative by multiplying it by the perturbation component. Usually, ForwardDiff can
@@ -153,7 +153,6 @@ In the future, we plan on allowing users and downstream library authors to dynam
 enable ``NaN``-safe mode via the ``AbstractConfig`` API (see `the relevant issue
 <https://github.com/JuliaDiff/ForwardDiff.jl/issues/181>`_).
 
-
 Hessian of a vector-valued function
 -----------------------------------
 
@@ -163,17 +162,17 @@ For example:
 
 .. code-block:: julia
 
-    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(sin, x), [1,2,3])
-    9×3 Array{Float64,2}:
-     -0.841471   0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.909297  -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.14112
+    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(cumprod, x), [1,2,3])
+    9×3 Array{Int64,2}:
+     0  0  0
+     0  1  0
+     0  3  2
+     0  0  0
+     1  0  0
+     3  0  1
+     0  0  0
+     0  0  0
+     2  1  0
 
 Since this functionality is composed from ForwardDiff's existing API rather than built into
 it, you're free to construct a ``vector_hessian`` function which suits your needs. For
@@ -190,22 +189,22 @@ expensive operation):
        end
     vector_hessian (generic function with 1 method)
 
-    julia> vector_hessian(sin, [1, 2, 3])
-    3×3×3 Array{Float64,3}:
+    julia> vector_hessian(cumprod, [1, 2, 3])
+    3×3×3 Array{Int64,3}:
     [:, :, 1] =
-     -0.841471   0.0   0.0
-     -0.0       -0.0  -0.0
-     -0.0       -0.0  -0.0
+     0  0  0
+     0  1  0
+     0  3  2
 
     [:, :, 2] =
-      0.0   0.0        0.0
-     -0.0  -0.909297  -0.0
-     -0.0  -0.0       -0.0
+     0  0  0
+     1  0  0
+     3  0  1
 
     [:, :, 3] =
-      0.0   0.0   0.0
-     -0.0  -0.0  -0.0
-     -0.0  -0.0  -0.14112
+     0  0  0
+     0  0  0
+     2  1  0
 
 Likewise, you could write a version of ``vector_hessian`` which supports functions of the
 form ``f!(y, x)``, or perhaps an in-place Jacobian with ``ForwardDiff.jacobian!``.
@@ -232,10 +231,10 @@ SIMD instructions (i.e. not starting Julia with ``-O3``):
     julia> using ForwardDiff: Dual
 
     julia> a = Dual(1., 2., 3., 4.)
-    Dual(1.0,2.0,3.0,4.0)
+    Dual{Void}(1.0,2.0,3.0,4.0)
 
     julia> b = Dual(5., 6., 7., 8.)
-    Dual(5.0,6.0,7.0,8.0)
+    Dual{Void}(5.0,6.0,7.0,8.0)
 
     julia> @code_llvm a + b
 
diff --git a/docs/_sources/basic_api.txt b/docs/_sources/basic_api.txt
index 28eb6392..96115abd 100644
--- a/docs/_sources/basic_api.txt
+++ b/docs/_sources/basic_api.txt
@@ -4,22 +4,26 @@ Basic ForwardDiff API
 Derivatives of :math:`f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}`
 --------------------------------------------------------------------------------------------------
 
-Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real)::Real`` and ``f(::Real)::AbstractArray``.
+Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real...)::Real`` and ``f(::Real...)::AbstractArray``.
 
 .. function:: ForwardDiff.derivative!(out, f, x)
 
-    Compute :math:`f'(x)`, storing the output in ``out``.
+    Compute :math:`f'(x)`, storing the output in ``out``. If ``x`` is a ``Tuple``,
+    then ``f`` will be called as ``f(x...)`` and the derivatives with respect to
+    each element in `x` will be stored in the respective element of ``out`` (which
+    should also be a ``Tuple``).
 
 .. function:: ForwardDiff.derivative(f, x)
 
-    Compute and return :math:`f'(x)`.
+    Compute and return :math:`f'(x)`. If ``x`` is a ``Tuple``, ``f`` will be
+    called as ``f(x...)``, and a ``Tuple`` of derivatives will be returned.
 
 Gradients of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k} \to \mathbb{R}`
 ------------------------------------------------------------------------------------------------
 
 Use ``ForwardDiff.gradient`` to differentiate functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))
+.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))
 
     Compute :math:`\nabla f(\vec{x})`, storing the output in ``out``. It is highly advised
     to preallocate ``cfg`` yourself (see the `AbstractConfig
@@ -34,23 +38,23 @@ Jacobians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.jacobian`` to differentiate functions of the form ``f(::AbstractArray)::AbstractArray``.
 
-.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be called as
     ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``. The output
     matrix is stored in ``out``.
 
-.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`.
 
-.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be
     called as ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``.
@@ -60,13 +64,13 @@ Hessians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.hessian`` to perform second-order differentiation on functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute :math:`\mathbf{H}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute and return :math:`\mathbf{H}(f)(\vec{x})`.
 
@@ -77,66 +81,63 @@ For the sake of convenience and performance, all "extra" information used by For
 API methods is bundled up in the ``ForwardDiff.AbstractConfig`` family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff's  API
 methods, such as `chunk size <advanced_usage.html#configuring-chunk-size>`_, work buffers,
-multithreading configurations, and perturbation seed configurations.
+and perturbation seed configurations.
 
 ForwardDiff's basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.
 
-Note that for all constructors below, the chunk size ``N`` may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to `specify the chunk size manually when possible
+Note that for all constructors below, the chunk size ``N`` may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to `specify the chunk size manually when possible
 <advanced_usage.html#configuring-chunk-size>`_.
 
-.. function:: ForwardDiff.GradientConfig{N}(x)
+Note also that configurations constructed for a specific function ``f`` cannot
+be reused to differentiate other functions (though can be reused to differentiate
+``f`` at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass ``nothing`` as the function argument.
+While this is more flexible, this decreases ForwardDiff's ability to catch
+and prevent `perturbation confusion`_.
 
-    Construct a ``GradientConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``GradientConfig`` instance contains all the work buffers required
-    by ForwardDiff's gradient/Jacobian methods. If taking the Jacobian of a target function
-    with the form ``f!(y, x)``, use the constructor ``ForwardDiff.GradientConfig{N}(y, x)``
-    instead.
+.. function:: ForwardDiff.GradientConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
+
+    Construct a ``GradientConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``GradientConfig``
+    instance contains all the work buffers required by ForwardDiff's gradient
+    methods.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.JacobianConfig{N}(x)
+.. function:: ForwardDiff.JacobianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Exactly like ``ForwardDiff.GradientConfig{N}(x)``, but returns a `JacobianConfig`
-    instead.
+    Exactly like the ``GradientConfig`` constructor, but returns a ``JacobianConfig`` instead.
 
-.. function:: ForwardDiff.JacobianConfig{N}(y, x)
+.. function:: ForwardDiff.JacobianConfig(f!, y, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``JacobianConfig`` instance based on the type and shape of the output vector
-    ``y`` and the input vector ``x``. The returned ``JacobianConfig`` instance contains all
-    the work buffers required by  ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` with a
-    target function of the form ``f!(y, x)``.
+    Construct a ``JacobianConfig`` instance based on the type of ``f!``, and the
+    types/shapes of the output vector ``y`` and the input vector ``x``. The
+    returned ``JacobianConfig`` instance contains all the work buffers required
+    by ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` when the target
+    function takes the form ``f!(y, x)``.
 
     This constructor does not store/modify ``y`` or ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(x)
+.. function:: ForwardDiff.HessianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``HessianConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``HessianConfig`` instance contains all the work buffers required
-    by ForwardDiff's Hessian methods. If using
-    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``, use the constructor
-    ``ForwardDiff.HessianConfig{N}(out, x)`` instead.
+    Construct a ``HessianConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``HessianConfig`` instance contains
+    all the work buffers required by ForwardDiff's Hessian methods. If using
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, f, x)``, use the constructor
+    ``ForwardDiff.HessianConfig(f, out, x, chunk)`` instead.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)
+.. function:: ForwardDiff.HessianConfig(f, out::DiffBase.DiffResult, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct an ``HessianConfig`` instance based on the type and shape of the storage in
-    ``out`` and the input vector ``x``. The returned ``HessianConfig`` instance contains
-    all the work buffers required by ``ForwardDiff.hessian!(out::DiffBase.DiffResult,
-    args...)``.
+    Construct an ``HessianConfig`` instance based on the type of ``f``, types/storage
+    in ``out``, and type/shape of the input vector ``x``. The returned ``HessianConfig``
+    instance contains all the work buffers required by
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``.
 
     This constructor does not store/modify ``out`` or ``x``.
 
-.. function:: ForwardDiff.MultithreadConfig(cfg::AbstractConfig)
-
-    Wrap the given ``cfg`` in a ``MultithreadConfig`` instance, which can then be passed to
-    gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-    methods do not yet support multithreading.
-
-    Note that multithreaded ForwardDiff API methods will attempt to use all available
-    threads. In the future, once Julia exposes more fine-grained threading primitives,
-    a ``MultithreadConfig`` constructor may be added which takes in a user-provided subset
-    of thread IDs instead of using all available threads.
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
diff --git a/docs/_sources/contributing.txt b/docs/_sources/contributing.txt
index cb669228..7de30bdc 100644
--- a/docs/_sources/contributing.txt
+++ b/docs/_sources/contributing.txt
@@ -40,9 +40,7 @@ To see a list of functions to pick from, look at ``ForwardDiff.AUTO_DEFINED_UNAR
      ⋮
 
 Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to ``src/dual.jl``, scroll down to the ``Special Cases`` section, and
-look at the functions under ``Manually Optimized`` (further optimizations to these functions
-are always welcome, if you can come up with something clever).
+already been done, go to ``src/dual.jl`` and scroll down to the ``Special Cases`` section.
 
 The functions in ``ForwardDiff.AUTO_DEFINED_UNARY_FUNCS`` are automatically tested as part
 of ForwardDiff's test suite, so you don't need to write tests yourself. You can test your
diff --git a/docs/_sources/how_it_works.txt b/docs/_sources/how_it_works.txt
index 20a96e49..dd8114cb 100644
--- a/docs/_sources/how_it_works.txt
+++ b/docs/_sources/how_it_works.txt
@@ -9,25 +9,26 @@ Julia. There are two key components of this implementation: the ``Dual`` type, a
 Dual Number Implementation
 --------------------------
 
-Partial derivatives are stored in the ``Partials{N,T}`` type:
+Partial derivatives are stored in the ``Partials{N,V}`` type:
 
 .. code-block:: julia
 
-    immutable Partials{N,T}
-        values::NTuple{N,T}
+    struct Partials{N,V} <: AbstractVector{V}
+        values::NTuple{N,V}
     end
 
-Overtop of this container type, ForwardDiff implements the ``Dual{N,T}`` type:
+Overtop of this container type, ForwardDiff implements the ``Dual{T,V,N}`` type:
 
 .. code-block:: julia
 
-    immutable Dual{N,T<:Real} <: Real
-        value::T
-        partials::Partials{N,T}
+    struct Dual{T,V<:Real,N} <: Real
+        value::V
+        partials::Partials{N,V}
     end
 
-This type represents an ``N``-dimensional `dual number`_ with the following mathematical
-behavior:
+This type represents an ``N``-dimensional `dual number`_ coupled with a tag
+parameter `T` in order to prevent `perturbation confusion`_. This dual number
+type is implemented to have the following mathematical behavior:
 
 .. math::
 
@@ -44,22 +45,23 @@ can be overloaded on ``Dual`` like so:
 
 .. code-block:: julia
 
-    Base.sin(d::Dual) = Dual(sin(value(d)), cos(value(d)) * partials(d))
+    Base.sin(d::Dual{T}) where {T} = Dual{T}(sin(value(d)), cos(value(d)) * partials(d))
 
 If we assume that a general function ``f`` is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a ``Dual`` number and looking at the output.
 
-We won't dicuss higher-order differentiation in detail, but the reader is encouraged to
+We won't discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about `hyper-dual numbers`_, which extend dual numbers to higher orders by introducing
 extra :math:`\epsilon` terms that can cross-multiply. ForwardDiff's ``Dual`` number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the ``Dual`` type to nest within each other. For example, a second-order
-hyper-dual number has the type ``Dual{N,Dual{N,T}}``, a third-order hyper-dual number has
-the type ``Dual{N,Dual{N,Dual{N,T}}}``, and so on.
+hyper-dual number has the type ``Dual{T,Dual{S,V,M},N}``, a third-order hyper-dual number has
+the type ``Dual{T,Dual{S,Dual{R,V,K},M},N}``, and so on.
 
 .. _`dual number`: https://en.wikipedia.org/wiki/Dual_number
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`hyper-dual numbers`: https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf
 
 ForwardDiff's API
diff --git a/docs/_sources/install.txt b/docs/_sources/install.txt
index bb7668d6..4f3573b6 100644
--- a/docs/_sources/install.txt
+++ b/docs/_sources/install.txt
@@ -7,4 +7,4 @@ To install ForwardDiff, simply use Julia's package manager:
 
     julia> Pkg.add("ForwardDiff")
 
-The current version of ForwardDiff supports Julia v0.4 and v0.5.
+The current version of ForwardDiff supports Julia v0.6.
diff --git a/docs/_sources/limitations.txt b/docs/_sources/limitations.txt
index 9862d12b..65ecb75a 100644
--- a/docs/_sources/limitations.txt
+++ b/docs/_sources/limitations.txt
@@ -12,10 +12,7 @@ function being differentiated):
 
 - **The target function must be written generically enough to accept numbers of type ``T<:Real`` as input  (or arrays of these numbers).** The function doesn't require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see `this comment`_ for an example).
 
-- **Nested differentiation of closures is dangerous.** Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See `the relevant issue`_ for details.
-
 - **The types of array inputs must be subtypes of** ``AbstractArray`` **.** Non-``AbstractArray`` array-like types are not officially supported.
 
 .. _`this comment`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790
-.. _`the relevant issue`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
 .. _`this file`: https://github.com/JuliaDiff/ForwardDiff.jl/blob/master/src/cache.jl
diff --git a/docs/_sources/upgrade.txt b/docs/_sources/upgrade.txt
index 5fc2ef82..a0afb6af 100644
--- a/docs/_sources/upgrade.txt
+++ b/docs/_sources/upgrade.txt
@@ -14,11 +14,11 @@ functions to reference them:
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     using ForwardDiff
     hessian(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     using ForwardDiff
     ForwardDiff.hessian(f, x)
 
@@ -27,26 +27,32 @@ Setting Chunk Size
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.gradient(f, x; chunk_size = 10)
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     ForwardDiff.gradient(f, x, Chunk{10}())
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{10}(x))
 
+    # ForwardDiff v0.5 & above
+    ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig(f, x ForwardDiff.Chunk{N}()))
+
 Enabling Multithreading
 -----------------------
 
 .. code-block:: julia
 
-    # old v0.1/v0.2 style
+    # ForwardDiff v0.1 & v0.2
     ForwardDiff.gradient(f, x; multithread = true)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & v0.4
     ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig(x)))
 
+    # ForwardDiff v0.5 & above
+    error("ForwardDiff no longer supports internal multithreading.")
+
 Retrieving Lower-Order Results
 ------------------------------
 
@@ -55,20 +61,20 @@ For more detail, see our documentation on `retrieving lower-order results
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     answer, results = ForwardDiff.hessian(f, x, AllResults)
     v = ForwardDiff.value(results)
     g = ForwardDiff.gradient(results)
     h = ForwardDiff.hessian(results) # == answer
 
-    # old v0.2 style
+    # ForwardDiff v0.2
     out = HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
     v = ForwardDiff.value(out)
     g = ForwardDiff.gradient(out)
     h = ForwardDiff.hessian(out)
 
-    # current v0.3 style
+    # ForwardDiff v0.3 & above
     using DiffBase
     out = DiffBase.HessianResult(x)
     ForwardDiff.hessian!(out, f, x)
@@ -86,10 +92,10 @@ derivatives by composing existing API functions. For example, here's how to reim
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     ForwardDiff.tensor(f, x)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     function tensor(f, x)
         n = length(x)
         out = ForwardDiff.jacobian(y -> ForwardDiff.hessian(f, y), x)
@@ -108,26 +114,26 @@ ForwardDiff's API functions, see `our API documentation <basic_api.html>`_.
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     df = ForwardDiff.derivative(f)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     df = x -> ForwardDiff.derivative(f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place gradient function of f
     gf! = ForwardDiff.gradient(f, mutates = true)
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     gf! = (out, x) -> ForwardDiff.gradient!(out, f, x)
 
 .. code-block:: julia
 
-    # old v0.1 style
+    # ForwardDiff v0.1
     # in-place Jacobian function of f!(y, x):
     jf! = ForwardDiff.jacobian(f!, mutates = true, output_length = length(y))
 
-    # current v0.3 style (since v0.2)
+    # ForwardDiff v0.2 & above
     jf! = (out, y, x) -> ForwardDiff.jacobian!(out, f!, y, x)
diff --git a/docs/advanced_usage.html b/docs/advanced_usage.html
index 47bc8814..0d1e7da1 100644
--- a/docs/advanced_usage.html
+++ b/docs/advanced_usage.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Advanced Usage Guide &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Advanced Usage Guide &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Upgrading from Older Versions of ForwardDiff" href="upgrade.html"/>
         <link rel="prev" title="Basic ForwardDiff API" href="basic_api.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -152,7 +152,7 @@ <h1>Advanced Usage Guide<a class="headerlink" href="#advanced-usage-guide" title
 <h2>Accessing Lower-Order Results<a class="headerlink" href="#accessing-lower-order-results" title="Permalink to this headline">¶</a></h2>
 <p>Let&#8217;s say you want to calculate the value, gradient, and Hessian of some function <code class="docutils literal"><span class="pre">f</span></code> at
 an input <code class="docutils literal"><span class="pre">x</span></code>. You could execute <code class="docutils literal"><span class="pre">f(x)</span></code>, <code class="docutils literal"><span class="pre">ForwardDiff.gradient(f,</span> <span class="pre">x)</span></code> and
-<code class="docutils literal"><span class="pre">ForwardDiff.hessian(f,</span> <span class="pre">x)</span></code>, but that would be a <strong>horribly redundant way to  accomplish
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian(f,</span> <span class="pre">x)</span></code>, but that would be a <strong>horribly redundant way to accomplish
 this task!</strong></p>
 <p>In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
 the lower-order derivatives and primal value <code class="docutils literal"><span class="pre">f(x)</span></code>. To retrieve these results in one fell
@@ -172,7 +172,7 @@ <h2>Configuring Chunk Size<a class="headerlink" href="#configuring-chunk-size" t
 a larger chunk size reduces calls to the target function at the cost of more memory
 bandwidth.</p>
 <p>For example:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">import</span> <span class="n">ForwardDiff</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">using</span> <span class="n">ForwardDiff</span><span class="p">:</span> <span class="n">GradientConfig</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">,</span> <span class="n">gradient!</span>
 
 <span class="c"># let&#39;s use a Rosenbrock function as our target function</span>
 <span class="n">julia</span><span class="o">&gt;</span> <span class="k">function</span><span class="nf"> rosenbrock</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
@@ -193,35 +193,35 @@ <h2>Configuring Chunk Size<a class="headerlink" href="#configuring-chunk-size" t
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">out</span> <span class="o">=</span> <span class="n">similar</span><span class="p">(</span><span class="n">x</span><span class="p">);</span>
 
 <span class="c"># construct GradientConfig with chunk size of 1</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg1</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">1</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg1</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">1</span><span class="p">}());</span>
 
 <span class="c"># construct GradientConfig with chunk size of 4</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg4</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">4</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg4</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">4</span><span class="p">}());</span>
 
 <span class="c"># construct GradientConfig with chunk size of 10</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg10</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">10</span><span class="p">}(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg10</span> <span class="o">=</span> <span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">10</span><span class="p">}());</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg1</span><span class="p">);</span>
-  <span class="mf">0.408305</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg1</span><span class="p">);</span>
+  <span class="mf">0.775139</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg4</span><span class="p">);</span>
-  <span class="mf">0.295764</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg4</span><span class="p">);</span>
+  <span class="mf">0.386459</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 
 <span class="c"># (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg10</span><span class="p">);</span>
-  <span class="mf">0.267396</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg10</span><span class="p">);</span>
+  <span class="mf">0.282529</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:</p>
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># The GradientConfig constructor will automatically select a</span>
 <span class="c"># chunk size in one is not explicitly provided</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">x</span><span class="p">);</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">cfg</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">);</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">rosenbrock</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">cfg</span><span class="p">);</span>
-<span class="mf">0.266920</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
+  <span class="mf">0.281853</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">4</span> <span class="n">allocations</span><span class="p">:</span> <span class="mi">160</span> <span class="n">bytes</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>If your input dimension is a constant, you should explicitly select a chunk size rather than
@@ -256,8 +256,8 @@ <h2>Fixing issues with NaN/Inf return values<a class="headerlink" href="#fixing-
 <code class="docutils literal"><span class="pre">Inf</span></code> derivatives. For example:</p>
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># the dual number&#39;s perturbation component is zero, so this</span>
 <span class="c"># variable should not propagate derivative information</span>
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">log</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Dual</span><span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">))</span>
-<span class="n">Dual</span><span class="p">(</span><span class="o">-</span><span class="nb">Inf</span><span class="p">,</span><span class="n">NaN</span><span class="p">)</span> <span class="c"># oops, this NaN should be 0.0</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">log</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Dual</span><span class="p">{:</span><span class="n">tag</span><span class="p">}(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">))</span>
+<span class="n">Dual</span><span class="p">{:</span><span class="n">tag</span><span class="p">}(</span><span class="o">-</span><span class="nb">Inf</span><span class="p">,</span><span class="n">NaN</span><span class="p">)</span> <span class="c"># oops, this NaN should be 0.0</span>
 </pre></div>
 </div>
 <p>Here, ForwardDiff computes the derivative of <code class="docutils literal"><span class="pre">log(0.0)</span></code> as <code class="docutils literal"><span class="pre">NaN</span></code> and then propagates
@@ -281,17 +281,17 @@ <h2>Hessian of a vector-valued function<a class="headerlink" href="#hessian-of-a
 <p>While ForwardDiff does not have a built-in function for taking Hessians of vector-valued
 functions, you can easily compose calls to <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code> to accomplish this.
 For example:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">sin</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">])</span>
-<span class="mi">9</span><span class="n">×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Float64</span><span class="p">,</span><span class="mi">2</span><span class="p">}:</span>
- <span class="o">-</span><span class="mf">0.841471</span>   <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="mf">0.0</span>        <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.909297</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="mf">0.0</span>        <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.14112</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">cumprod</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">])</span>
+<span class="mi">9</span><span class="n">×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">2</span><span class="p">}:</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">1</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">3</span>  <span class="mi">2</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">1</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">3</span>  <span class="mi">0</span>  <span class="mi">1</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">2</span>  <span class="mi">1</span>  <span class="mi">0</span>
 </pre></div>
 </div>
 <p>Since this functionality is composed from ForwardDiff&#8217;s existing API rather than built into
@@ -306,22 +306,22 @@ <h2>Hessian of a vector-valued function<a class="headerlink" href="#hessian-of-a
    <span class="k">end</span>
 <span class="n">vector_hessian</span> <span class="p">(</span><span class="n">generic</span> <span class="k">function</span><span class="nf"> with</span> <span class="mi">1</span> <span class="n">method</span><span class="p">)</span>
 
-<span class="n">julia</span><span class="o">&gt;</span> <span class="n">vector_hessian</span><span class="p">(</span><span class="n">sin</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
-<span class="mi">3</span><span class="n">×3×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Float64</span><span class="p">,</span><span class="mi">3</span><span class="p">}:</span>
+<span class="n">julia</span><span class="o">&gt;</span> <span class="n">vector_hessian</span><span class="p">(</span><span class="n">cumprod</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
+<span class="mi">3</span><span class="n">×3×3</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">3</span><span class="p">}:</span>
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span>
- <span class="o">-</span><span class="mf">0.841471</span>   <span class="mf">0.0</span>   <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">1</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">3</span>  <span class="mi">2</span>
 
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span>
-  <span class="mf">0.0</span>   <span class="mf">0.0</span>        <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.909297</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>       <span class="o">-</span><span class="mf">0.0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">1</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">3</span>  <span class="mi">0</span>  <span class="mi">1</span>
 
 <span class="p">[:,</span> <span class="p">:,</span> <span class="mi">3</span><span class="p">]</span> <span class="o">=</span>
-  <span class="mf">0.0</span>   <span class="mf">0.0</span>   <span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>
- <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.0</span>  <span class="o">-</span><span class="mf">0.14112</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">0</span>  <span class="mi">0</span>  <span class="mi">0</span>
+ <span class="mi">2</span>  <span class="mi">1</span>  <span class="mi">0</span>
 </pre></div>
 </div>
 <p>Likewise, you could write a version of <code class="docutils literal"><span class="pre">vector_hessian</span></code> which supports functions of the
@@ -340,10 +340,10 @@ <h2>SIMD Vectorization<a class="headerlink" href="#simd-vectorization" title="Pe
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="k">using</span> <span class="n">ForwardDiff</span><span class="p">:</span> <span class="n">Dual</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">a</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span> <span class="mf">2.</span><span class="p">,</span> <span class="mf">3.</span><span class="p">,</span> <span class="mf">4.</span><span class="p">)</span>
-<span class="n">Dual</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span><span class="mf">2.0</span><span class="p">,</span><span class="mf">3.0</span><span class="p">,</span><span class="mf">4.0</span><span class="p">)</span>
+<span class="n">Dual</span><span class="p">{</span><span class="n">Void</span><span class="p">}(</span><span class="mf">1.0</span><span class="p">,</span><span class="mf">2.0</span><span class="p">,</span><span class="mf">3.0</span><span class="p">,</span><span class="mf">4.0</span><span class="p">)</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="n">b</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="mf">5.</span><span class="p">,</span> <span class="mf">6.</span><span class="p">,</span> <span class="mf">7.</span><span class="p">,</span> <span class="mf">8.</span><span class="p">)</span>
-<span class="n">Dual</span><span class="p">(</span><span class="mf">5.0</span><span class="p">,</span><span class="mf">6.0</span><span class="p">,</span><span class="mf">7.0</span><span class="p">,</span><span class="mf">8.0</span><span class="p">)</span>
+<span class="n">Dual</span><span class="p">{</span><span class="n">Void</span><span class="p">}(</span><span class="mf">5.0</span><span class="p">,</span><span class="mf">6.0</span><span class="p">,</span><span class="mf">7.0</span><span class="p">,</span><span class="mf">8.0</span><span class="p">)</span>
 
 <span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">code_llvm</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span>
 
@@ -445,7 +445,7 @@ <h2>SIMD Vectorization<a class="headerlink" href="#simd-vectorization" title="Pe
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/basic_api.html b/docs/basic_api.html
index 882019e8..862d63fb 100644
--- a/docs/basic_api.html
+++ b/docs/basic_api.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Basic ForwardDiff API &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Basic ForwardDiff API &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Advanced Usage Guide" href="advanced_usage.html"/>
         <link rel="prev" title="Limitations of ForwardDiff" href="limitations.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -148,17 +148,21 @@
 <h1>Basic ForwardDiff API<a class="headerlink" href="#basic-forwarddiff-api" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="derivatives-of">
 <h2>Derivatives of <span class="math">\(f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}\)</span><a class="headerlink" href="#derivatives-of" title="Permalink to this headline">¶</a></h2>
-<p>Use <code class="docutils literal"><span class="pre">ForwardDiff.derivative</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::Real)::Real</span></code> and <code class="docutils literal"><span class="pre">f(::Real)::AbstractArray</span></code>.</p>
+<p>Use <code class="docutils literal"><span class="pre">ForwardDiff.derivative</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::Real...)::Real</span></code> and <code class="docutils literal"><span class="pre">f(::Real...)::AbstractArray</span></code>.</p>
 <dl class="function">
 <dt>
 <code class="descname">ForwardDiff.derivative!(out, f, x)</code></dt>
-<dd><p>Compute <span class="math">\(f'(x)\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>.</p>
+<dd><p>Compute <span class="math">\(f'(x)\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. If <code class="docutils literal"><span class="pre">x</span></code> is a <code class="docutils literal"><span class="pre">Tuple</span></code>,
+then <code class="docutils literal"><span class="pre">f</span></code> will be called as <code class="docutils literal"><span class="pre">f(x...)</span></code> and the derivatives with respect to
+each element in <cite>x</cite> will be stored in the respective element of <code class="docutils literal"><span class="pre">out</span></code> (which
+should also be a <code class="docutils literal"><span class="pre">Tuple</span></code>).</p>
 </dd></dl>
 
 <dl class="function">
 <dt id="ForwardDiff.derivative">
 <code class="descclassname">ForwardDiff.</code><code class="descname">derivative</code><span class="sig-paren">(</span><em>f</em>, <em>x</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.derivative" title="Permalink to this definition">¶</a></dt>
-<dd><p>Compute and return <span class="math">\(f'(x)\)</span>.</p>
+<dd><p>Compute and return <span class="math">\(f'(x)\)</span>. If <code class="docutils literal"><span class="pre">x</span></code> is a <code class="docutils literal"><span class="pre">Tuple</span></code>, <code class="docutils literal"><span class="pre">f</span></code> will be
+called as <code class="docutils literal"><span class="pre">f(x...)</span></code>, and a <code class="docutils literal"><span class="pre">Tuple</span></code> of derivatives will be returned.</p>
 </dd></dl>
 
 </div>
@@ -167,7 +171,7 @@ <h2>Gradients of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.gradient</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::Real</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))</code></dt>
+<code class="descname">ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\nabla f(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly advised
 to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
@@ -184,14 +188,14 @@ <h2>Jacobians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code> to differentiate functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::AbstractArray</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))</code></dt>
+<code class="descname">ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly
 advised to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))</code></dt>
+<code class="descname">ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, where <span class="math">\(f(\vec{x})\)</span> can be called as
 <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code> such that the output of <span class="math">\(f(\vec{x})\)</span> is stored in <code class="docutils literal"><span class="pre">y</span></code>. The output
 matrix is stored in <code class="docutils literal"><span class="pre">out</span></code>.</p>
@@ -199,13 +203,13 @@ <h2>Jacobians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \time
 
 <dl class="function">
 <dt id="ForwardDiff.jacobian">
-<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.jacobian" title="Permalink to this definition">¶</a></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(f</em>, <em>x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.jacobian" title="Permalink to this definition">¶</a></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(y</em>, <em>x)</em><span class="sig-paren">)</span></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">jacobian</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>cfg = ForwardDiff.JacobianConfig(f!</em>, <em>y</em>, <em>x)</em><span class="sig-paren">)</span></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{J}(f)(\vec{x})\)</span>, where <span class="math">\(f(\vec{x})\)</span> can be
 called as <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code> such that the output of <span class="math">\(f(\vec{x})\)</span> is stored in <code class="docutils literal"><span class="pre">y</span></code>.</p>
 </dd></dl>
@@ -216,14 +220,14 @@ <h2>Hessians of <span class="math">\(f(x) : \mathbb{R}^{n_1} \times \dots \times
 <p>Use <code class="docutils literal"><span class="pre">ForwardDiff.hessian</span></code> to perform second-order differentiation on functions of the form <code class="docutils literal"><span class="pre">f(::AbstractArray)::Real</span></code>.</p>
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))</code></dt>
+<code class="descname">ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))</code></dt>
 <dd><p>Compute <span class="math">\(\mathbf{H}(f)(\vec{x})\)</span>, storing the output in <code class="docutils literal"><span class="pre">out</span></code>. It is highly
 advised to preallocate <code class="docutils literal"><span class="pre">cfg</span></code> yourself (see the <a class="reference external" href="basic_api.html#the-abstractconfig-types">AbstractConfig</a> section below).</p>
 </dd></dl>
 
 <dl class="function">
 <dt id="ForwardDiff.hessian">
-<code class="descclassname">ForwardDiff.</code><code class="descname">hessian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.HessianConfig(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.hessian" title="Permalink to this definition">¶</a></dt>
+<code class="descclassname">ForwardDiff.</code><code class="descname">hessian</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>cfg = ForwardDiff.HessianConfig(f</em>, <em>x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.hessian" title="Permalink to this definition">¶</a></dt>
 <dd><p>Compute and return <span class="math">\(\mathbf{H}(f)(\vec{x})\)</span>.</p>
 </dd></dl>
 
@@ -234,73 +238,66 @@ <h2>The <code class="docutils literal"><span class="pre">AbstractConfig</span></
 API methods is bundled up in the <code class="docutils literal"><span class="pre">ForwardDiff.AbstractConfig</span></code> family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff&#8217;s  API
 methods, such as <a class="reference external" href="advanced_usage.html#configuring-chunk-size">chunk size</a>, work buffers,
-multithreading configurations, and perturbation seed configurations.</p>
+and perturbation seed configurations.</p>
 <p>ForwardDiff&#8217;s basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.</p>
-<p>Note that for all constructors below, the chunk size <code class="docutils literal"><span class="pre">N</span></code> may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to <a class="reference external" href="advanced_usage.html#configuring-chunk-size">specify the chunk size manually when possible</a>.</p>
+<p>Note that for all constructors below, the chunk size <code class="docutils literal"><span class="pre">N</span></code> may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to <a class="reference external" href="advanced_usage.html#configuring-chunk-size">specify the chunk size manually when possible</a>.</p>
+<p>Note also that configurations constructed for a specific function <code class="docutils literal"><span class="pre">f</span></code> cannot
+be reused to differentiate other functions (though can be reused to differentiate
+<code class="docutils literal"><span class="pre">f</span></code> at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass <code class="docutils literal"><span class="pre">nothing</span></code> as the function argument.
+While this is more flexible, this decreases ForwardDiff&#8217;s ability to catch
+and prevent <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">perturbation confusion</a>.</p>
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.GradientConfig{N}(x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance based on the type and shape of the input vector
-<code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance contains all the work buffers required
-by ForwardDiff&#8217;s gradient/Jacobian methods. If taking the Jacobian of a target function
-with the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>, use the constructor <code class="docutils literal"><span class="pre">ForwardDiff.GradientConfig{N}(y,</span> <span class="pre">x)</span></code>
-instead.</p>
+<dt id="ForwardDiff.GradientConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">GradientConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.GradientConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">GradientConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code> and
+type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">GradientConfig</span></code>
+instance contains all the work buffers required by ForwardDiff&#8217;s gradient
+methods.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.JacobianConfig{N}(x)</code></dt>
-<dd><p>Exactly like <code class="docutils literal"><span class="pre">ForwardDiff.GradientConfig{N}(x)</span></code>, but returns a <cite>JacobianConfig</cite>
-instead.</p>
+<dt id="ForwardDiff.JacobianConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">JacobianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.JacobianConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Exactly like the <code class="docutils literal"><span class="pre">GradientConfig</span></code> constructor, but returns a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instead.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.JacobianConfig{N}(y, x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance based on the type and shape of the output vector
-<code class="docutils literal"><span class="pre">y</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance contains all
-the work buffers required by  <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code>/<code class="docutils literal"><span class="pre">ForwardDiff.jacobian!</span></code> with a
-target function of the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>.</p>
+<code class="descclassname">ForwardDiff.</code><code class="descname">JacobianConfig</code><span class="sig-paren">(</span><em>f!</em>, <em>y</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f!</span></code>, and the
+types/shapes of the output vector <code class="docutils literal"><span class="pre">y</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The
+returned <code class="docutils literal"><span class="pre">JacobianConfig</span></code> instance contains all the work buffers required
+by <code class="docutils literal"><span class="pre">ForwardDiff.jacobian</span></code>/<code class="docutils literal"><span class="pre">ForwardDiff.jacobian!</span></code> when the target
+function takes the form <code class="docutils literal"><span class="pre">f!(y,</span> <span class="pre">x)</span></code>.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">y</span></code> or <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
-<dt>
-<code class="descname">ForwardDiff.HessianConfig{N}(x)</code></dt>
-<dd><p>Construct a <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type and shape of the input vector
-<code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains all the work buffers required
-by ForwardDiff&#8217;s Hessian methods. If using
-<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">args...)</span></code>, use the constructor
-<code class="docutils literal"><span class="pre">ForwardDiff.HessianConfig{N}(out,</span> <span class="pre">x)</span></code> instead.</p>
+<dt id="ForwardDiff.HessianConfig">
+<code class="descclassname">ForwardDiff.</code><code class="descname">HessianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.HessianConfig" title="Permalink to this definition">¶</a></dt>
+<dd><p>Construct a <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code> and
+type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains
+all the work buffers required by ForwardDiff&#8217;s Hessian methods. If using
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">f,</span> <span class="pre">x)</span></code>, use the constructor
+<code class="docutils literal"><span class="pre">ForwardDiff.HessianConfig(f,</span> <span class="pre">out,</span> <span class="pre">x,</span> <span class="pre">chunk)</span></code> instead.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
 <dl class="function">
 <dt>
-<code class="descname">ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)</code></dt>
-<dd><p>Construct an <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type and shape of the storage in
-<code class="docutils literal"><span class="pre">out</span></code> and the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance contains
-all the work buffers required by <code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span>
-<span class="pre">args...)</span></code>.</p>
+<code class="descclassname">ForwardDiff.</code><code class="descname">HessianConfig</code><span class="sig-paren">(</span><em>f</em>, <em>out::DiffBase.DiffResult</em>, <em>x</em>, <em>chunk::ForwardDiff.Chunk{N} = Chunk(x)</em><span class="sig-paren">)</span></dt>
+<dd><p>Construct an <code class="docutils literal"><span class="pre">HessianConfig</span></code> instance based on the type of <code class="docutils literal"><span class="pre">f</span></code>, types/storage
+in <code class="docutils literal"><span class="pre">out</span></code>, and type/shape of the input vector <code class="docutils literal"><span class="pre">x</span></code>. The returned <code class="docutils literal"><span class="pre">HessianConfig</span></code>
+instance contains all the work buffers required by
+<code class="docutils literal"><span class="pre">ForwardDiff.hessian!(out::DiffBase.DiffResult,</span> <span class="pre">args...)</span></code>.</p>
 <p>This constructor does not store/modify <code class="docutils literal"><span class="pre">out</span></code> or <code class="docutils literal"><span class="pre">x</span></code>.</p>
 </dd></dl>
 
-<dl class="function">
-<dt id="ForwardDiff.MultithreadConfig">
-<code class="descclassname">ForwardDiff.</code><code class="descname">MultithreadConfig</code><span class="sig-paren">(</span><em>cfg::AbstractConfig</em><span class="sig-paren">)</span><a class="headerlink" href="#ForwardDiff.MultithreadConfig" title="Permalink to this definition">¶</a></dt>
-<dd><p>Wrap the given <code class="docutils literal"><span class="pre">cfg</span></code> in a <code class="docutils literal"><span class="pre">MultithreadConfig</span></code> instance, which can then be passed to
-gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-methods do not yet support multithreading.</p>
-<p>Note that multithreaded ForwardDiff API methods will attempt to use all available
-threads. In the future, once Julia exposes more fine-grained threading primitives,
-a <code class="docutils literal"><span class="pre">MultithreadConfig</span></code> constructor may be added which takes in a user-provided subset
-of thread IDs instead of using all available threads.</p>
-</dd></dl>
-
 </div>
 </div>
 
@@ -345,7 +342,7 @@ <h2>The <code class="docutils literal"><span class="pre">AbstractConfig</span></
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/contributing.html b/docs/contributing.html
index a2102a06..5a1973b7 100644
--- a/docs/contributing.html
+++ b/docs/contributing.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>How to Contribute &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>How to Contribute &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="prev" title="How ForwardDiff Works" href="how_it_works.html"/> 
 
   
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -181,9 +181,7 @@ <h2>Manually Optimizing Unary Functions<a class="headerlink" href="#manually-opt
 </pre></div>
 </div>
 <p>Some of these functions may have already been manually optimized. To see what functions have
-already been done, go to <code class="docutils literal"><span class="pre">src/dual.jl</span></code>, scroll down to the <code class="docutils literal"><span class="pre">Special</span> <span class="pre">Cases</span></code> section, and
-look at the functions under <code class="docutils literal"><span class="pre">Manually</span> <span class="pre">Optimized</span></code> (further optimizations to these functions
-are always welcome, if you can come up with something clever).</p>
+already been done, go to <code class="docutils literal"><span class="pre">src/dual.jl</span></code> and scroll down to the <code class="docutils literal"><span class="pre">Special</span> <span class="pre">Cases</span></code> section.</p>
 <p>The functions in <code class="docutils literal"><span class="pre">ForwardDiff.AUTO_DEFINED_UNARY_FUNCS</span></code> are automatically tested as part
 of ForwardDiff&#8217;s test suite, so you don&#8217;t need to write tests yourself. You can test your
 changes by running <code class="docutils literal"><span class="pre">Pkg.test(&quot;ForwardDiff&quot;)</span></code>.</p>
@@ -254,7 +252,7 @@ <h3>Manually Adding Functions to ForwardDiff<a class="headerlink" href="#manuall
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/genindex.html b/docs/genindex.html
index ee28917c..28e76d10 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -9,7 +9,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Index &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Index &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -31,7 +31,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/> 
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/> 
 
   
   <script src="_static/js/modernizr.min.js"></script>
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -153,17 +153,25 @@ <h2 id="F">F</h2>
   </dt>
 
       
+  <dt><a href="basic_api.html#ForwardDiff.GradientConfig">ForwardDiff.GradientConfig() (built-in function)</a>
+  </dt>
+
+      
   <dt><a href="basic_api.html#ForwardDiff.hessian">ForwardDiff.hessian() (built-in function)</a>
   </dt>
 
   </dl></td>
   <td style="width: 33%" valign="top"><dl>
       
+  <dt><a href="basic_api.html#ForwardDiff.HessianConfig">ForwardDiff.HessianConfig() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.HessianConfig">[1]</a>
+  </dt>
+
+      
   <dt><a href="basic_api.html#ForwardDiff.jacobian">ForwardDiff.jacobian() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.jacobian">[1]</a>
   </dt>
 
       
-  <dt><a href="basic_api.html#ForwardDiff.MultithreadConfig">ForwardDiff.MultithreadConfig() (built-in function)</a>
+  <dt><a href="basic_api.html#ForwardDiff.JacobianConfig">ForwardDiff.JacobianConfig() (built-in function)</a>, <a href="basic_api.html#ForwardDiff.JacobianConfig">[1]</a>
   </dt>
 
   </dl></td>
@@ -202,7 +210,7 @@ <h2 id="F">F</h2>
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/how_it_works.html b/docs/how_it_works.html
index b662c2f1..f08795d4 100644
--- a/docs/how_it_works.html
+++ b/docs/how_it_works.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>How ForwardDiff Works &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>How ForwardDiff Works &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="How to Contribute" href="contributing.html"/>
         <link rel="prev" title="Upgrading from Older Versions of ForwardDiff" href="upgrade.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -147,21 +147,22 @@ <h1>How ForwardDiff Works<a class="headerlink" href="#how-forwarddiff-works" tit
 Julia. There are two key components of this implementation: the <code class="docutils literal"><span class="pre">Dual</span></code> type, and the API.</p>
 <div class="section" id="dual-number-implementation">
 <h2>Dual Number Implementation<a class="headerlink" href="#dual-number-implementation" title="Permalink to this headline">¶</a></h2>
-<p>Partial derivatives are stored in the <code class="docutils literal"><span class="pre">Partials{N,T}</span></code> type:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="k">immutable</span> <span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
-    <span class="n">values</span><span class="p">::</span><span class="n">NTuple</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
+<p>Partial derivatives are stored in the <code class="docutils literal"><span class="pre">Partials{N,V}</span></code> type:</p>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">struct</span> <span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">AbstractVector</span><span class="p">{</span><span class="n">V</span><span class="p">}</span>
+    <span class="n">values</span><span class="p">::</span><span class="n">NTuple</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span>
 <span class="k">end</span>
 </pre></div>
 </div>
-<p>Overtop of this container type, ForwardDiff implements the <code class="docutils literal"><span class="pre">Dual{N,T}</span></code> type:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="k">immutable</span> <span class="n">Dual</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="o">&lt;:</span><span class="n">Real</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">Real</span>
-    <span class="n">value</span><span class="p">::</span><span class="n">T</span>
-    <span class="n">partials</span><span class="p">::</span><span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">T</span><span class="p">}</span>
+<p>Overtop of this container type, ForwardDiff implements the <code class="docutils literal"><span class="pre">Dual{T,V,N}</span></code> type:</p>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">struct</span> <span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">,</span><span class="n">V</span><span class="o">&lt;:</span><span class="n">Real</span><span class="p">,</span><span class="n">N</span><span class="p">}</span> <span class="o">&lt;:</span> <span class="n">Real</span>
+    <span class="n">value</span><span class="p">::</span><span class="n">V</span>
+    <span class="n">partials</span><span class="p">::</span><span class="n">Partials</span><span class="p">{</span><span class="n">N</span><span class="p">,</span><span class="n">V</span><span class="p">}</span>
 <span class="k">end</span>
 </pre></div>
 </div>
-<p>This type represents an <code class="docutils literal"><span class="pre">N</span></code>-dimensional <a class="reference external" href="https://en.wikipedia.org/wiki/Dual_number">dual number</a> with the following mathematical
-behavior:</p>
+<p>This type represents an <code class="docutils literal"><span class="pre">N</span></code>-dimensional <a class="reference external" href="https://en.wikipedia.org/wiki/Dual_number">dual number</a> coupled with a tag
+parameter <cite>T</cite> in order to prevent <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">perturbation confusion</a>. This dual number
+type is implemented to have the following mathematical behavior:</p>
 <div class="math">
 \[f(a + \sum_{i=1}^N b_i \epsilon_i) = f(a) + f'(a) \sum_{i=1}^N b_i \epsilon_i\]</div>
 <p>where the <span class="math">\(a\)</span> component is stored in the <code class="docutils literal"><span class="pre">value</span></code> field and the <span class="math">\(b\)</span>
@@ -171,20 +172,20 @@ <h2>Dual Number Implementation<a class="headerlink" href="#dual-number-implement
 number are overloaded to evaluate both the original function, <em>and</em> evaluate the derivative
 of the function, propogating the derivative via multiplication. For example, <code class="docutils literal"><span class="pre">Base.sin</span></code>
 can be overloaded on <code class="docutils literal"><span class="pre">Dual</span></code> like so:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">Base</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">d</span><span class="p">::</span><span class="n">Dual</span><span class="p">)</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">(</span><span class="n">sin</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">)),</span> <span class="n">cos</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">))</span> <span class="o">*</span> <span class="n">partials</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">Base</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">d</span><span class="p">::</span><span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">})</span> <span class="n">where</span> <span class="p">{</span><span class="n">T</span><span class="p">}</span> <span class="o">=</span> <span class="n">Dual</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">sin</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">)),</span> <span class="n">cos</span><span class="p">(</span><span class="n">value</span><span class="p">(</span><span class="n">d</span><span class="p">))</span> <span class="o">*</span> <span class="n">partials</span><span class="p">(</span><span class="n">d</span><span class="p">))</span>
 </pre></div>
 </div>
 <p>If we assume that a general function <code class="docutils literal"><span class="pre">f</span></code> is composed of entirely of these elementary
 functions, then the chain rule enables our derivatives to compose as well. Thus, by
 overloading a plethora of elementary functions, we can differentiate generic functions
 composed of them by passing in a <code class="docutils literal"><span class="pre">Dual</span></code> number and looking at the output.</p>
-<p>We won&#8217;t dicuss higher-order differentiation in detail, but the reader is encouraged to
+<p>We won&#8217;t discuss higher-order differentiation in detail, but the reader is encouraged to
 learn about <a class="reference external" href="https://adl.stanford.edu/hyperdual/Fike_AIAA-2011-886.pdf">hyper-dual numbers</a>, which extend dual numbers to higher orders by introducing
 extra <span class="math">\(\epsilon\)</span> terms that can cross-multiply. ForwardDiff&#8217;s <code class="docutils literal"><span class="pre">Dual</span></code> number
 implementation naturally supports hyper-dual numbers without additional code by allowing
 instances of the <code class="docutils literal"><span class="pre">Dual</span></code> type to nest within each other. For example, a second-order
-hyper-dual number has the type <code class="docutils literal"><span class="pre">Dual{N,Dual{N,T}}</span></code>, a third-order hyper-dual number has
-the type <code class="docutils literal"><span class="pre">Dual{N,Dual{N,Dual{N,T}}}</span></code>, and so on.</p>
+hyper-dual number has the type <code class="docutils literal"><span class="pre">Dual{T,Dual{S,V,M},N}</span></code>, a third-order hyper-dual number has
+the type <code class="docutils literal"><span class="pre">Dual{T,Dual{S,Dual{R,V,K},M},N}</span></code>, and so on.</p>
 </div>
 <div class="section" id="forwarddiff-s-api">
 <h2>ForwardDiff&#8217;s API<a class="headerlink" href="#forwarddiff-s-api" title="Permalink to this headline">¶</a></h2>
@@ -282,7 +283,7 @@ <h2>ForwardDiff&#8217;s API<a class="headerlink" href="#forwarddiff-s-api" title
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/index.html b/docs/index.html
index 15dd0f4d..2adec17e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>ForwardDiff.jl &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>ForwardDiff.jl &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="#"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="#"/>
         <link rel="next" title="Installation and Version Requirements" href="install.html"/> 
 
   
@@ -59,7 +59,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -243,7 +243,7 @@ <h2>Publications<a class="headerlink" href="#publications" title="Permalink to t
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/install.html b/docs/install.html
index ef01aaa8..757faa3c 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Installation and Version Requirements &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Installation and Version Requirements &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Limitations of ForwardDiff" href="limitations.html"/>
         <link rel="prev" title="ForwardDiff.jl" href="index.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -143,7 +143,7 @@ <h1>Installation and Version Requirements<a class="headerlink" href="#installati
 <div class="highlight-julia"><div class="highlight"><pre><span></span><span class="n">julia</span><span class="o">&gt;</span> <span class="n">Pkg</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s">&quot;ForwardDiff&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>The current version of ForwardDiff supports Julia v0.4 and v0.5.</p>
+<p>The current version of ForwardDiff supports Julia v0.6.</p>
 </div>
 
 
@@ -187,7 +187,7 @@ <h1>Installation and Version Requirements<a class="headerlink" href="#installati
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/limitations.html b/docs/limitations.html
index f994cac3..dc02b46e 100644
--- a/docs/limitations.html
+++ b/docs/limitations.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Limitations of ForwardDiff &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Limitations of ForwardDiff &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="Basic ForwardDiff API" href="basic_api.html"/>
         <link rel="prev" title="Installation and Version Requirements" href="install.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -147,7 +147,6 @@ <h1>Limitations of ForwardDiff<a class="headerlink" href="#limitations-of-forwar
 <li><strong>The target function can only be composed of generic Julia functions.</strong> ForwardDiff cannot propagate derivative information through non-Julia code. Thus, your function may not work if it makes calls to external, non-Julia programs, e.g. uses explicit BLAS calls instead of <code class="docutils literal"><span class="pre">Ax_mul_Bx</span></code>-style functions.</li>
 <li><strong>The target function must be unary (i.e., only accept a single argument).</strong> There is an exception to this rule for ForwardDiff&#8217;s <code class="docutils literal"><span class="pre">jacobian</span></code> API; see <a class="reference external" href="basic_api.html">the API documentation</a> for details.</li>
 <li><strong>The target function must be written generically enough to accept numbers of type ``T&lt;:Real`` as input  (or arrays of these numbers).</strong> The function doesn&#8217;t require a specific type signature, as long as the type signature is generic enough to avoid breaking this rule. This also means that any storage assigned used within the function must be generic as well (see <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/136#issuecomment-237941790">this comment</a> for an example).</li>
-<li><strong>Nested differentiation of closures is dangerous.</strong> Differentiating closures is safe, and nested differentation is safe, but you might be vulnerable to a subtle bug if you try to do both. See <a class="reference external" href="https://github.com/JuliaDiff/ForwardDiff.jl/issues/83">the relevant issue</a> for details.</li>
 <li><strong>The types of array inputs must be subtypes of</strong> <code class="docutils literal"><span class="pre">AbstractArray</span></code> <strong>.</strong> Non-<code class="docutils literal"><span class="pre">AbstractArray</span></code> array-like types are not officially supported.</li>
 </ul>
 </div>
@@ -193,7 +192,7 @@ <h1>Limitations of ForwardDiff<a class="headerlink" href="#limitations-of-forwar
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/objects.inv b/docs/objects.inv
index 7396bc2e..fc5db8c9 100644
--- a/docs/objects.inv
+++ b/docs/objects.inv
@@ -1,7 +1,9 @@
 # Sphinx inventory version 2
 # Project: ForwardDiff.jl
-# Version: 0.2
+# Version: 0.5
 # The remainder of this file is compressed using zlib.
-xڕS�N�0��+V�k��r+��JTT�±r�M�������8N��-���l�7�t܈%����KG�6��B��*h?�J�
-GZ�%칥b�[��]#��a�n�@C�耧�*��r��j���:M�������W����B�N\	]���ʠ����v`�zȎ��:�
-���a�Y�B+gh��*�x������T@&��R�h����+R��(�)�1�?��ˣ�2�Y�jP��Z�OV�nGn�i�f�)��"��K(��kFarC�SeO��K��hjh8�d\	xFc��#�{2�Y&���2�7���}��e�k��c:��l*D��^�8'��u��X$?����,���Z�}�����6��<��QNa5����$P,���ڍ/FV��
\ No newline at end of file
+xڝS�N�0��+V�k��r+��"�p��x�.8v�O'ŦHPn���xg���fˍ�RY�].��hUR����p��Ê[*��������)��U��Ak���Hoy�W��
+4��6x����Zǐǉ^�x����pU�Xz�+�ą���!�t
+�,:��'�l��2!i��+�r�Vޑ�Rq�G��ނ�0
+�nP��"_�s�]�`#�
+e0"%�#���G9����8�Pu��p@cW����z�$��j�f�)��"��K(��kzarC�SeK��K��;hh�;�d\	xFc��#�{2�Y&���2�7���]��e����a:�n4�ý^b?'���\�H~��y�Y�7�d��C���amJ�kx����4�ퟷ$P�-����'�Ƕz
\ No newline at end of file
diff --git a/docs/search.html b/docs/search.html
index da0d1bf4..e5ef7f1b 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Search &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Search &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/> 
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/> 
 
   
   <script src="_static/js/modernizr.min.js"></script>
@@ -58,7 +58,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -176,7 +176,7 @@
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 61ed8ea5..541ae921 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({envversion:49,filenames:["advanced_usage","index"],objects:{},objnames:{},objtypes:{},terms:{"byte":0,"case":0,"default":0,"final":0,"import":0,"new":1,"true":0,"try":0,"void":0,"while":[0,1],_70842:0,_70852:0,abl:0,about:1,abstractconfig:[0,1],accomplish:0,accuraci:1,actual:0,add:0,addit:0,advantag:1,affect:0,algorithm:1,align:0,all:0,alloc:0,allow:0,almost:0,also:0,amen:0,ani:1,answer:0,api:[0,1],appropri:0,aren:0,arg:0,arithmet:0,arrai:0,articl:1,arxiv:1,assert:0,attempt:0,author:[0,1],automat:[0,1],bandwidth:0,base:0,basic:[0,1],befor:0,behavior:0,behvaior:0,benchmark:0,best:0,better:0,binari:0,bitcast:0,bitcod:0,block:0,both:1,bring:0,buffer:0,build:0,built:0,calcul:0,call:0,callabl:1,can:[0,1],cast:0,caus:0,certain:0,cfg10:0,cfg1:0,cfg4:0,cfg:0,check:0,cite:1,code:0,code_llvm:0,common:1,compil:0,complet:0,compon:0,compos:0,comput:0,conjunct:0,constant:0,construct:0,constructor:0,consult:0,contribut:1,copi:0,correct:0,cost:0,could:0,cours:0,creat:1,data:0,decreas:0,defin:0,depend:[0,1],deriv:[0,1],describ:0,develop:1,diffbas:0,differenc:1,different:0,differenti:1,diffresult:0,dimens:0,directori:0,disabl:0,divid:0,document:[0,1],doe:0,doesn:0,doubl:0,downstream:0,dual:[0,1],dure:0,dynam:0,each:0,easili:0,element:0,eltyp:0,emit:0,enabl:[0,1],end:0,entir:0,erron:0,error:0,essenti:0,evalu:[0,1],even:0,evenli:0,exampl:0,execut:0,exist:0,expens:0,explic:0,explicitli:0,extra:0,fadd:0,featur:0,fell:0,find:1,fine:0,finit:1,first:0,flag:0,float64:0,follow:1,form:0,forward:1,forwarddiff:0,free:0,from:[0,1],futur:0,gener:[0,1],getelementptr:0,github:1,gradient:[0,1],gradientconfig:0,guess:0,have:0,heavili:0,here:0,heurist:0,higher:[0,1],hold:0,horribl:0,how:[0,1],howev:0,http:1,i32:0,i64:0,ident:0,imag:0,implement:1,inbound:0,incorrectli:0,increas:0,inform:0,inher:0,input:0,insert:0,installat:1,instead:0,instruct:0,isinf:0,isnan:0,jacobian:[0,1],journal:1,julia:[0,1],julia_:0,kindli:1,know:0,larger:0,learn:[0,1],length:0,let:0,librari:0,likewis:0,limit:1,llvm:0,load:0,local:0,locat:0,log:0,lubin:1,machin:0,major:0,make:0,mani:0,manual:[0,1],matrix:0,memori:0,method:[0,1],might:0,mirror:0,mode:[0,1],modul:0,more:0,multipli:0,multithread:1,mutat:0,nansafe_mode_enabled:0,nativ:1,need:0,noalia:0,non:1,note:0,notic:0,number:[0,1],object:1,older:1,one:0,onli:0,oop:0,oper:0,optim:0,optimiz:1,org:1,other:[0,1],otherwis:0,our:0,out:0,outperform:1,output:0,over:1,overcom:0,own:0,packag:0,page:1,papamark:1,paper:1,partial:0,perform:[0,1],perhap:0,perturb:0,pick:0,place:0,plan:0,pleas:0,poison:0,possibl:0,pre:0,preserv:0,prevent:0,primal:0,process:0,propag:0,proport:0,provid:0,question:0,rand:0,rather:0,realiti:0,realli:1,reason:0,rebuild:0,reduc:0,redund:0,relev:0,reli:0,remain:0,remaind:0,request:1,requir:[0,1],reshap:0,resourc:1,ret:0,retriev:[0,1],revel:1,revelslubinpapamarkou2016:1,rosenbrock:0,run:0,runtim:0,sacrif:0,safe:0,sai:0,scope:0,second:0,see:0,seen:0,select:0,sensit:0,set:[0,1],sever:0,shape:0,should:0,show:0,similar:0,sin:0,sinc:0,slpvectorizerpass:0,smaller:0,some:0,sourc:0,specif:0,speed:1,speedup:0,squar:0,src:0,sret:0,stabil:0,stabl:0,start:0,state:0,store:0,suit:0,support:0,swoop:0,system:0,take:[0,1],target:0,task:0,techniqu:[0,1],tensor:0,than:0,thi:[0,1],thing:0,thu:0,time:0,titl:1,top:0,tune:0,two:0,type:[0,1],unari:1,undefin:0,unexport:1,unstabl:0,upgrade:1,url:1,user:[0,1],usual:0,usualli:0,util:0,vari:1,variabl:0,vector_hessian:0,version:[0,1],via:0,wai:0,want:0,well:0,what:0,when:0,where:0,wherea:0,whether:0,which:0,wikipedia:1,without:0,word:0,work:[0,1],would:0,write:0,year:1,yield:0,you:[0,1],your:[0,1],zero:0},titles:["Advanced Usage Guide","ForwardDiff.jl"],titleterms:{"function":0,"public":1,"return":0,access:0,advanc:0,chunk:0,configur:0,fix:0,forwarddiff:1,guid:0,hessian:0,inf:0,issu:0,lower:0,nan:0,order:0,result:0,simd:0,size:0,usage:0,valu:0,vector:0}})
\ No newline at end of file
+Search.setIndex({envversion:49,filenames:["advanced_usage","basic_api","contributing","how_it_works","index","install","limitations","upgrade"],objects:{ForwardDiff:{GradientConfig:[1,0,1,""],HessianConfig:[1,0,1,""],JacobianConfig:[1,0,1,""],derivative:[1,0,1,""],gradient:[1,0,1,""],hessian:[1,0,1,""],jacobian:[1,0,1,""]}},objnames:{"0":["py","function","Python function"]},objtypes:{"0":"py:function"},terms:{"abstract":3,"break":6,"byte":0,"case":[0,1,2,3],"catch":1,"default":[0,1,7],"export":7,"final":0,"import":2,"long":6,"true":[0,7],"try":0,"void":0,"while":[0,1,4],_70842:0,_70852:0,abil:1,abl:0,about:[3,4,7],abov:[3,7],abs2:2,abstractarrai:[1,6],abstractconfig:0,abstractvector:3,accept:[2,6],accomplish:[0,2],accuraci:4,actual:[0,2],add:[0,2,5],addit:[0,2,3],advantag:4,advis:1,affect:0,after:2,algorithm:4,align:0,all:[0,1,2,6,7],alloc:[0,1],allow:[0,1,3],allresult:7,almost:0,alreadi:2,also:[0,1,6],amen:0,ani:[1,4,6],answer:[0,7],api:0,apply:2,appropri:[0,2],aren:[0,2],arg:[0,1],argument:[1,6],arithmet:0,around:2,arrai:[0,2,6],articl:4,arxiv:4,assert:0,assign:6,assum:3,atan2:2,attempt:0,author:[0,4],auto:2,auto_defined_unary_funcs:2,automat:[0,1,2,3,4,7],avoid:[6,7],awai:3,ax_mul_bx:6,bandwidth:0,base:[0,1,3],basic:0,been:2,befor:0,behavior:[0,3],behvaior:0,below:1,benchmark:0,best:0,better:0,between:7,binari:0,bitcast:0,bitcod:0,blas:6,block:0,both:[3,4],branch:2,brief:2,bring:0,buffer:[0,1],build:0,built:0,bundl:1,calcul:[0,3],call:[0,1,3,6],callabl:4,can:[0,1,2,3,4,6,7],cannot:[1,6],cast:0,caus:0,cbrt:2,central:3,certain:0,cfg10:0,cfg1:0,cfg4:0,cfg:[0,1],chain:3,chang:[2,7],check:[0,2],chunk_siz:7,cite:4,clearer:7,code:[0,2,3,6,7],code_llvm:0,collect:6,comment:6,common:4,compil:0,complet:0,compon:[0,3],compos:[0,3,6,7],comput:[0,1],conflict:7,confus:[1,3],conjunct:0,consid:2,constant:0,construct:[0,1],constructor:[0,1],consult:0,contain:[1,3],conveni:1,copi:0,correct:0,cost:0,could:0,coupl:3,cours:0,creat:4,cross:3,cumprod:0,current:[5,7],data:0,decreas:[0,1],defin:[0,2,7],definit:2,depend:[0,4],deriv:0,describ:0,descript:2,detail:[3,6,7],develop:4,diffbas:[0,1,7],differ:1,differenc:4,different:0,differenti:[1,2,3,4,6],diffresult:[0,1],dimens:0,dimension:[3,7],directori:0,disabl:0,discuss:3,divid:0,document:[0,4,6,7],doe:[0,1,2,3],doesn:[0,6],don:[2,3],done:2,doubl:0,down:2,downstream:0,drastic:1,dual:[0,2],dualtest:2,dure:0,dynam:0,each:[0,1,3],easi:[2,3],easiest:2,easili:[0,1],element:[0,1,2],elementari:3,eltyp:0,emit:0,enabl:[0,3,4],encourag:3,end:[0,3,7],enough:6,entir:[0,3],erron:0,error:[0,7],essenti:[0,2],evalu:[0,3,4],even:0,evenli:0,everyth:2,exactli:1,examin:3,exampl:[0,2,3,6,7],except:6,execut:[0,3],exist:[0,2,7],exp2:2,exp:2,expens:0,explic:0,explicit:6,explicitli:[0,7],explictli:1,expm1:2,extend:3,extern:6,extra:[0,1,3],extract:3,fadd:0,fairli:2,famili:1,familiar:3,featur:[0,3,7],feed:1,fell:0,few:[2,7],field:3,find:4,fine:0,finit:4,first:0,flag:0,flexibl:[1,7],float64:0,follow:[3,4],fork:2,form:[0,1],forward:[3,4],forwarddiff:0,free:0,from:[0,2,3,4],fulli:7,futur:0,gener:[0,2,3,4,6,7],getelementptr:0,github:[2,4],gradient:0,gradientconfig:[0,1,7],guess:0,have:[0,2,3,7],heavili:0,help:7,here:[0,2,3,6,7],hessianconfig:1,hessianresult:7,heurist:0,higher:[0,3,4],highli:1,hold:0,horribl:0,how:0,howev:[0,1],http:4,hyper:3,i32:0,i64:0,ident:0,imag:0,improv:2,inbound:0,incorrectli:0,increas:0,inform:[0,1,3,6],inher:0,inject:6,input:[0,1,3,6],insert:0,instal:5,installat:4,instanc:[1,3],instead:[0,1,6,7],instruct:0,int64:0,intern:7,introduc:3,inv:2,isinf:0,isnan:0,jacobian:0,jacobianconfig:1,job:3,journal:4,julia:[0,2,3,4,5,6],julia_:0,kei:3,kindli:4,know:0,larger:0,lead:7,learn:[0,3,4,7],length:[0,7],less:7,let:[0,3],librari:0,like:[1,3,6],likewis:0,limit:4,link:2,list:[2,6],llvm:0,load:0,local:0,locat:0,log10:2,log1p:2,log2:2,log:[0,2],longer:7,look:[2,3],lubin:4,machin:0,magic:7,mai:[1,2,6],maintain:7,major:0,make:[0,2,3,6],manag:5,mani:0,manual:[0,1],master:2,mathemat:3,matrix:[0,1],mean:6,memori:[0,1],mention:2,merg:2,method:[0,1,4],might:0,mirror:0,mode:[0,3,4],modifi:1,modul:0,more:[0,1,7],multipl:3,multipli:[0,3],multithread:4,multithreadconfig:7,must:[6,7],mutat:[0,7],name:2,namespac:7,nansafe_mode_enabled:0,nativ:4,natur:[3,6],need:[0,2],nest:3,newcom:2,noalia:0,non:[2,4,6],note:[0,1],noth:1,notic:0,now:7,ntupl:3,number:[0,2],numer:3,object:4,obvious:2,occur:7,offici:6,old:7,older:4,omit:1,once:2,one:0,onli:[0,6],oop:0,open:2,oper:0,optim:[0,2],org:4,origin:3,other:[0,1,3,4,7],otherwis:0,our:[0,3,7],out:[0,1,7],outlin:2,outperform:4,output:[0,1,3],output_length:7,over:4,overcom:0,overload:3,overtop:3,own:[0,7],packag:[0,2,3,5,7],page:4,papamark:4,paper:4,paramet:[1,3],pariti:7,part:2,partial:[0,3],pass:[1,2,3],perform:[0,1,4],performantli:3,perhap:0,perturb:[0,1,3],pick:[0,2],pkg:[2,5],place:[0,2,7],plan:0,pleas:0,plethora:3,poison:0,possibl:[0,1],pre:0,prealloc:1,preserv:0,prevent:[0,1,3],primal:0,process:[0,2,3],program:6,propag:[0,6],properti:3,propog:3,proport:0,provid:[0,1,2,3,7],qualifi:7,question:0,rand:0,rather:0,reader:3,real:[1,3,6],realiti:[0,3],realli:4,reason:0,rebuild:0,recomend:1,reduc:[0,1],redund:0,refer:[6,7],rehash:3,reimplement:7,relev:[0,2],reli:0,remain:0,remaind:0,replac:2,repositori:2,repres:3,request:4,requir:[0,1,2,4],reshap:[0,7],resolv:2,resourc:4,respect:1,ret:0,retriev:[0,4],reus:1,revel:4,revelslubinpapamarkou2016:4,revolv:2,roadblock:6,rosenbrock:0,rule:[2,3,6],run:[0,2,6],runtim:[0,6],sacrif:0,safe:0,sai:0,sake:1,same:2,scope:0,scroll:2,second:[0,1,3],section:[1,2],see:[0,1,2,6,7],seed:[1,3],seen:[0,6],select:[0,1],sensit:0,set:[0,4],sever:[0,1],shape:[0,1],share:2,should:[0,1,2],show:0,signatur:6,similar:[0,3],simpl:3,simpli:5,sin:[2,3],sinc:[0,7],singl:6,slpvectorizerpass:0,smaller:0,some:[0,2,6,7],sourc:0,special:2,specif:[0,1,6],specifi:1,speed:4,speedup:0,sqrt:2,squar:0,src:[0,2],sret:0,stabil:0,stabl:0,start:0,state:0,storag:[1,6],store:[0,1,3],struct:3,style:6,submit:2,substanti:2,subtyp:6,suit:[0,2],suitabl:2,support:[0,2,3,5,6,7],sure:2,swoop:0,symbol:2,system:0,tag:[0,3],take:[0,1,3,4,7],tan:2,target:[0,1,3,6],task:0,techniqu:[0,4,6],tensor:[0,7],term:3,test:2,than:0,thei:2,them:[1,3,7],thi:[0,1,2,3,4,6,7],thing:0,third:3,though:1,through:[2,6],thu:[0,3,6,7],time:0,titl:[2,4],top:0,transform:7,tune:0,tupl:1,tutori:2,two:[0,2,3],type:0,undefin:0,understand:3,unexport:4,unstabl:0,upgrade:4,url:4,usag:1,use:1,user:[0,1,3,4,6,7],usual:0,usualli:0,util:0,vari:4,variabl:0,vector_hessian:0,version:[0,4],via:0,wai:[0,2,3],want:0,well:[0,3,6],what:[0,2],whatev:2,when:[0,1,2],where:[0,1,3],wherea:0,whether:0,which:[0,1,2,3],wikipedia:4,wish:2,within:[3,6],without:[0,2,3],won:3,word:0,work:[0,1,2],workflow:2,would:[0,3],write:[0,2],written:6,year:4,yield:0,you:[0,1,2,4,7],your:[0,2,4,6],yourself:[1,2],zero:0},titles:["Advanced Usage Guide","Basic ForwardDiff API","How to Contribute","How ForwardDiff Works","ForwardDiff.jl","Installation and Version Requirements","Limitations of ForwardDiff","Upgrading from Older Versions of ForwardDiff"],titleterms:{"function":[0,2,7],"new":2,"public":4,"return":0,abstractconfig:1,access:0,adding:2,advanc:0,api:[1,3,7],basic:1,calculu:2,chunk:[0,7],configur:0,contribut:2,creat:7,deriv:1,differenti:7,dual:3,enabl:7,fix:0,forwarddiff:[1,2,3,4,6,7],from:7,gradient:1,guid:0,hessian:[0,1],higher:7,how:[2,3],implement:[2,3],inf:0,installat:5,issu:0,jacobian:1,limit:6,lower:[0,7],manual:2,multithread:7,nan:0,number:3,older:7,optimiz:2,order:[0,7],requir:5,result:[0,7],retriev:7,set:7,simd:0,size:[0,7],type:1,unari:2,unexport:7,upgrade:7,usage:0,valu:0,vector:0,version:[5,7],via:2,work:3}})
\ No newline at end of file
diff --git a/docs/upgrade.html b/docs/upgrade.html
index eb86c1f4..3965ac72 100644
--- a/docs/upgrade.html
+++ b/docs/upgrade.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Upgrading from Older Versions of ForwardDiff &mdash; ForwardDiff.jl 0.2.3 documentation</title>
+  <title>Upgrading from Older Versions of ForwardDiff &mdash; ForwardDiff.jl 0.5.0 documentation</title>
   
 
   
@@ -30,7 +30,7 @@
   
 
   
-    <link rel="top" title="ForwardDiff.jl 0.2.3 documentation" href="index.html"/>
+    <link rel="top" title="ForwardDiff.jl 0.5.0 documentation" href="index.html"/>
         <link rel="next" title="How ForwardDiff Works" href="how_it_works.html"/>
         <link rel="prev" title="Advanced Usage Guide" href="advanced_usage.html"/> 
 
@@ -60,7 +60,7 @@
             
             
               <div class="version">
-                0.2
+                0.5
               </div>
             
           
@@ -154,11 +154,11 @@ <h1>Upgrading from Older Versions of ForwardDiff<a class="headerlink" href="#upg
 <h2>Unexported API Functions<a class="headerlink" href="#unexported-api-functions" title="Permalink to this headline">¶</a></h2>
 <p>In order to avoid namespace conflicts with other packages, <a class="reference external" href="basic_api.html">ForwardDiff&#8217;s API functions</a> are no longer exported by default. Thus, you must now fully qualify the
 functions to reference them:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="k">using</span> <span class="n">ForwardDiff</span>
 <span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="k">using</span> <span class="n">ForwardDiff</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
@@ -166,44 +166,50 @@ <h2>Unexported API Functions<a class="headerlink" href="#unexported-api-function
 </div>
 <div class="section" id="setting-chunk-size">
 <h2>Setting Chunk Size<a class="headerlink" href="#setting-chunk-size" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">;</span> <span class="n">chunk_size</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span>
 
-<span class="c"># old v0.2 style</span>
+<span class="c"># ForwardDiff v0.2</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">Chunk</span><span class="p">{</span><span class="mi">10</span><span class="p">}())</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; v0.4</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">{</span><span class="mi">10</span><span class="p">}(</span><span class="n">x</span><span class="p">))</span>
+
+<span class="c"># ForwardDiff v0.5 &amp; above</span>
+<span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">Chunk</span><span class="p">{</span><span class="n">N</span><span class="p">}()))</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="enabling-multithreading">
 <h2>Enabling Multithreading<a class="headerlink" href="#enabling-multithreading" title="Permalink to this headline">¶</a></h2>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1/v0.2 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1 &amp; v0.2</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">;</span> <span class="n">multithread</span> <span class="o">=</span> <span class="n">true</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; v0.4</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">MultithreadConfig</span><span class="p">(</span><span class="n">ForwardDiff</span><span class="o">.</span><span class="n">GradientConfig</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
+
+<span class="c"># ForwardDiff v0.5 &amp; above</span>
+<span class="nb">error</span><span class="p">(</span><span class="s">&quot;ForwardDiff no longer supports internal multithreading.&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="retrieving-lower-order-results">
 <h2>Retrieving Lower-Order Results<a class="headerlink" href="#retrieving-lower-order-results" title="Permalink to this headline">¶</a></h2>
 <p>For more detail, see our documentation on <a class="reference external" href="advanced_usage.html#accessing-lower-order-results">retrieving lower-order results</a>.</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">answer</span><span class="p">,</span> <span class="n">results</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">AllResults</span><span class="p">)</span>
 <span class="n">v</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">value</span><span class="p">(</span><span class="n">results</span><span class="p">)</span>
 <span class="n">g</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">results</span><span class="p">)</span>
 <span class="n">h</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="c"># == answer</span>
 
-<span class="c"># old v0.2 style</span>
+<span class="c"># ForwardDiff v0.2</span>
 <span class="n">out</span> <span class="o">=</span> <span class="n">HessianResult</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 <span class="n">v</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">value</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 <span class="n">g</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 <span class="n">h</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style</span>
+<span class="c"># ForwardDiff v0.3 &amp; above</span>
 <span class="k">using</span> <span class="n">DiffBase</span>
 <span class="n">out</span> <span class="o">=</span> <span class="n">DiffBase</span><span class="o">.</span><span class="n">HessianResult</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
@@ -219,10 +225,10 @@ <h2>Higher-Order Differentiation<a class="headerlink" href="#higher-order-differ
 provides the <code class="docutils literal"><span class="pre">tensor</span></code> function. Instead, users can take higher-order/higher-dimensional
 derivatives by composing existing API functions. For example, here&#8217;s how to reimplement
 <code class="docutils literal"><span class="pre">tensor</span></code>:</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="k">function</span><span class="nf"> tensor</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
     <span class="n">n</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
     <span class="n">out</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">y</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">hessian</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">y</span><span class="p">),</span> <span class="n">x</span><span class="p">)</span>
@@ -239,26 +245,26 @@ <h2>Creating Differentiation Functions<a class="headerlink" href="#creating-diff
 functions. Instead, users explicitly define their own functions using ForwardDiff&#8217;s API.
 This leads to clearer code, less &#8220;magic&#8221;, and more flexibility. To learn how about
 ForwardDiff&#8217;s API functions, see <a class="reference external" href="basic_api.html">our API documentation</a>.</p>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">derivative</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">x</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">derivative</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="c"># in-place gradient function of f</span>
 <span class="n">gf!</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">mutates</span> <span class="o">=</span> <span class="n">true</span><span class="p">)</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">gf!</span> <span class="o">=</span> <span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">gradient!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># old v0.1 style</span>
+<div class="highlight-julia"><div class="highlight"><pre><span></span><span class="c"># ForwardDiff v0.1</span>
 <span class="c"># in-place Jacobian function of f!(y, x):</span>
 <span class="n">jf!</span> <span class="o">=</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian</span><span class="p">(</span><span class="n">f!</span><span class="p">,</span> <span class="n">mutates</span> <span class="o">=</span> <span class="n">true</span><span class="p">,</span> <span class="n">output_length</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
 
-<span class="c"># current v0.3 style (since v0.2)</span>
+<span class="c"># ForwardDiff v0.2 &amp; above</span>
 <span class="n">jf!</span> <span class="o">=</span> <span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ForwardDiff</span><span class="o">.</span><span class="n">jacobian!</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">f!</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 </pre></div>
 </div>
@@ -306,7 +312,7 @@ <h2>Creating Differentiation Functions<a class="headerlink" href="#creating-diff
     <script type="text/javascript">
         var DOCUMENTATION_OPTIONS = {
             URL_ROOT:'./',
-            VERSION:'0.2.3',
+            VERSION:'0.5.0',
             COLLAPSE_INDEX:false,
             FILE_SUFFIX:'.html',
             HAS_SOURCE:  true
diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl
index 3b65d67a..429c6291 100644
--- a/src/ForwardDiff.jl
+++ b/src/ForwardDiff.jl
@@ -2,13 +2,13 @@ __precompile__()
 
 module ForwardDiff
 
-using Compat
 using DiffBase
 using DiffBase: DiffResult
 
 import Calculus
 import NaNMath
 import SpecialFunctions
+import Base.Threads
 
 #############################
 # types/functions/constants #
@@ -19,19 +19,6 @@ import SpecialFunctions
 
 const NANSAFE_MODE_ENABLED = false
 
-# multithreading #
-#----------------#
-
-const IS_MULTITHREADED_JULIA = VERSION >= v"0.5.0-dev+923"
-
-if IS_MULTITHREADED_JULIA
-    const NTHREADS = Base.Threads.nthreads()
-    @inline compat_threadid() = Base.Threads.threadid()
-else
-    const NTHREADS = 1
-    @inline compat_threadid() = 1
-end
-
 # function generation #
 #---------------------#
 
@@ -47,10 +34,12 @@ const SPECIAL_FUNCS = (:erf, :erfc, :erfinv, :erfcinv, :erfi, :erfcx,
                        :besselyx, :besselh, :hankelh1, :hankelh1x, :hankelh2,
                        :hankelh2x, :besseli, :besselix, :besselk, :besselkx)
 
+const REAL_TYPES = (AbstractFloat, Irrational, Integer, Rational, Real)
+
 # chunk settings #
 #----------------#
 
-const CHUNK_THRESHOLD = 10
+const DEFAULT_CHUNK_THRESHOLD = 10
 
 ############
 # includes #
@@ -59,7 +48,7 @@ const CHUNK_THRESHOLD = 10
 include("partials.jl")
 include("dual.jl")
 include("config.jl")
-include("api_utils.jl")
+include("utils.jl")
 include("derivative.jl")
 include("gradient.jl")
 include("jacobian.jl")
diff --git a/src/api_utils.jl b/src/api_utils.jl
deleted file mode 100644
index 26839b41..00000000
--- a/src/api_utils.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-##########################
-# picking the chunk size #
-##########################
-
-# Constrained to chunk <= CHUNK_THRESHOLD, minimize (in order of priority):
-#   1. the number of chunks that need to be computed
-#   2. the number of "left over" perturbations in the final chunk
-function pickchunksize(k)
-    if k <= CHUNK_THRESHOLD
-        return k
-    else
-        nchunks = round(Int, k / CHUNK_THRESHOLD, RoundUp)
-        return round(Int, k / nchunks, RoundUp)
-    end
-end
-
-####################
-# value extraction #
-####################
-
-@inline extract_value!(out::DiffResult, ydual) = DiffBase.value!(value, out, ydual)
-@inline extract_value!(out, ydual) = nothing
-
-@inline function extract_value!(out, y, ydual)
-    map!(value, y, ydual)
-    copy_value!(out, y)
-end
-
-@inline copy_value!(out::DiffResult, y) = DiffBase.value!(out, y)
-@inline copy_value!(out, y) = nothing
-
-###################################
-# vector mode function evaluation #
-###################################
-
-vector_mode_dual_eval{F}(f::F, x, cfg::MultithreadConfig) = vector_mode_dual_eval(f, x, gradient_config(cfg))
-vector_mode_dual_eval{F}(f::F, x, cfg::Tuple) = vector_mode_dual_eval(f, x, first(cfg))
-
-function vector_mode_dual_eval{F}(f::F, x, cfg)
-    xdual = cfg.duals
-    seed!(xdual, x, cfg.seeds)
-    return f(xdual)
-end
-
-function vector_mode_dual_eval{F}(f!::F, y, x, cfg)
-    ydual, xdual = cfg.duals
-    seed!(xdual, x, cfg.seeds)
-    seed!(ydual, y)
-    f!(ydual, xdual)
-    return ydual
-end
-
-##################################
-# seed construction/manipulation #
-##################################
-
-@generated function construct_seeds{N,T}(::Type{Partials{N,T}})
-    ex = Expr(:tuple, [:(setindex(zero_partials, seed_unit, $i)) for i in 1:N]...)
-    return quote
-        seed_unit = one(T)
-        zero_partials = zero(Partials{$N,T})
-        return $ex
-    end
-end
-
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x,
-                    seed::Partials{N,T} = zero(Partials{N,T}))
-    for i in eachindex(duals)
-        duals[i] = Dual{N,T}(x[i], seed)
-    end
-    return duals
-end
-
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x,
-                    seeds::NTuple{N,Partials{N,T}})
-    for i in 1:N
-        duals[i] = Dual{N,T}(x[i], seeds[i])
-    end
-    return duals
-end
-
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x, index,
-                    seed::Partials{N,T} = zero(Partials{N,T}))
-    offset = index - 1
-    for i in 1:N
-        j = i + offset
-        duals[j] = Dual{N,T}(x[j], seed)
-    end
-    return duals
-end
-
-function seed!{N,T}(duals::AbstractArray{Dual{N,T}}, x, index,
-                    seeds::NTuple{N,Partials{N,T}}, chunksize = N)
-    offset = index - 1
-    for i in 1:chunksize
-        j = i + offset
-        duals[j] = Dual{N,T}(x[j], seeds[i])
-    end
-    return duals
-end
diff --git a/src/config.jl b/src/config.jl
index fc4be998..c43077aa 100644
--- a/src/config.jl
+++ b/src/config.jl
@@ -1,107 +1,146 @@
-@compat abstract type AbstractConfig end
-
-###########
-# Config #
-###########
-
-@inline chunksize(::Tuple{}) = error("empty tuple passed to `chunksize`")
-
-# Define a few different AbstractConfig types. All these types share the same structure,
-# but feature different constructors and dispatch restrictions in downstream code.
-for Config in (:GradientConfig, :JacobianConfig)
-    @eval begin
-        @compat immutable $Config{N,T,D} <: AbstractConfig
-            seeds::NTuple{N,Partials{N,T}}
-            duals::D
-            # disable default outer constructor
-            (::Type{$Config{N,T,D}}){N,T,D}(seeds, duals) = new{N,T,D}(seeds, duals)
-        end
-
-        # This is type-unstable, which is why our docs advise users to manually enter a chunk size
-        # when possible. The type instability here doesn't really hurt performance, since most of
-        # the heavy lifting happens behind a function barrier, but it can cause inference to give up
-        # when predicting the final output type of API functions.
-        $Config(x::AbstractArray) = $Config{pickchunksize(length(x))}(x)
-
-        function (::Type{$Config{N}}){N,T}(x::AbstractArray{T})
-            seeds = construct_seeds(Partials{N,T})
-            duals = similar(x, Dual{N,T})
-            return $Config{N,T,typeof(duals)}(seeds, duals)
-        end
-
-        Base.copy{N,T,D}(cfg::$Config{N,T,D}) = $Config{N,T,D}(cfg.seeds, copy(cfg.duals))
-        Base.copy{N,T,D<:Tuple}(cfg::$Config{N,T,D}) = $Config{N,T,D}(cfg.seeds, map(copy, cfg.duals))
-
-        @inline chunksize{N}(::$Config{N}) = N
-        @inline chunksize{N}(::Tuple{Vararg{$Config{N}}}) = N
+#######
+# Tag #
+#######
+
+struct Tag{F,H} end
+
+# Here, we could've just as easily used `hash`; however, this
+# is unsafe/undefined behavior if `hash(::Type{V})` is overloaded
+# in a module loaded after ForwardDiff. Thus, we instead use
+# `hash(Symbol(V))`, which is somewhat safer since it's far less
+# likely that somebody would overwrite the Base definition for
+# `Symbol(::DataType)` or `hash(::Symbol)`.
+@generated function Tag(::Type{F}, ::Type{V}) where {F,V}
+    H = hash(Symbol(V))
+    return quote
+        $(Expr(:meta, :inline))
+        Tag{F,$H}()
     end
 end
 
-JacobianConfig(y::AbstractArray, x::AbstractArray) = JacobianConfig{pickchunksize(length(x))}(y, x)
+#########
+# Chunk #
+#########
 
-function (::Type{JacobianConfig{N}}){N,Y,X}(y::AbstractArray{Y}, x::AbstractArray{X})
-    seeds = construct_seeds(Partials{N,X})
-    yduals = similar(y, Dual{N,Y})
-    xduals = similar(x, Dual{N,X})
-    duals = (yduals, xduals)
-    return JacobianConfig{N,X,typeof(duals)}(seeds, duals)
+struct Chunk{N} end
+
+function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
+    N = pickchunksize(input_length, threshold)
+    return Chunk{N}()
 end
 
-##################
-# HessianConfig #
-##################
+function Chunk(x::AbstractArray, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
+    return Chunk(length(x), threshold)
+end
 
-immutable HessianConfig{N,J,JD,G,GD} <: AbstractConfig
-    gradient_config::GradientConfig{N,G,GD}
-    jacobian_config::JacobianConfig{N,J,JD}
+# Constrained to `N <= threshold`, minimize (in order of priority):
+#   1. the number of chunks that need to be computed
+#   2. the number of "left over" perturbations in the final chunk
+function pickchunksize(input_length, threshold = DEFAULT_CHUNK_THRESHOLD)
+    if input_length <= threshold
+        return input_length
+    else
+        nchunks = round(Int, input_length / DEFAULT_CHUNK_THRESHOLD, RoundUp)
+        return round(Int, input_length / nchunks, RoundUp)
+    end
 end
 
-HessianConfig(x::AbstractArray) = HessianConfig{pickchunksize(length(x))}(x)
-HessianConfig(out, x::AbstractArray) = HessianConfig{pickchunksize(length(x))}(out, x)
+##################
+# AbstractConfig #
+##################
+
+abstract type AbstractConfig{T,N} end
 
-function (::Type{HessianConfig{N}}){N}(x::AbstractArray)
-    jacobian_config = JacobianConfig{N}(x)
-    gradient_config = GradientConfig{N}(jacobian_config.duals)
-    return HessianConfig(gradient_config, jacobian_config)
+struct ConfigMismatchError{F,G,H} <: Exception
+    f::F
+    cfg::AbstractConfig{Tag{G,H}}
 end
 
-function (::Type{HessianConfig{N}}){N}(out::DiffResult, x::AbstractArray)
-    jacobian_config = JacobianConfig{N}(DiffBase.gradient(out), x)
-    yduals, xduals = jacobian_config.duals
-    gradient_config = GradientConfig{N}(xduals)
-    return HessianConfig(gradient_config, jacobian_config)
+function Base.showerror(io::IO, e::ConfigMismatchError{F,G}) where {F,G}
+    print(io, "The provided configuration (of type $(typeof(e.cfg))) was constructed for a",
+              " function other than the current target function. ForwardDiff cannot safely",
+              " perform differentiation in this context; see the following issue for details:",
+              " https://github.com/JuliaDiff/ForwardDiff.jl/issues/83. You can resolve this",
+              " problem by constructing and using a configuration with the appropriate target",
+              " function, e.g. `ForwardDiff.GradientConfig($(e.f), x)`")
 end
 
-Base.copy(cfg::HessianConfig) = HessianConfig(copy(cfg.gradient_config),
-                                                 copy(cfg.jacobian_config))
+Base.copy(cfg::AbstractConfig) = deepcopy(cfg)
 
-@inline chunksize{N}(::HessianConfig{N}) = N
-@inline chunksize{N}(::Tuple{Vararg{HessianConfig{N}}}) = N
+@inline chunksize(::AbstractConfig{T,N}) where {T,N} = N
 
-gradient_config(cfg::HessianConfig) = cfg.gradient_config
-jacobian_config(cfg::HessianConfig) = cfg.jacobian_config
+##################
+# GradientConfig #
+##################
 
-#####################
-# MultithreadConfig #
-#####################
+struct GradientConfig{T,V,N,D} <: AbstractConfig{T,N}
+    seeds::NTuple{N,Partials{N,V}}
+    duals::D
+end
 
-immutable MultithreadConfig{A,B} <: AbstractConfig
-    config1::A
-    config2::B
+function GradientConfig(::F,
+                        x::AbstractArray{V},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, V)) where {F,V,N,T}
+    seeds = construct_seeds(Partials{N,V})
+    duals = similar(x, Dual{T,V,N})
+    return GradientConfig{T,V,N,typeof(duals)}(seeds, duals)
 end
 
-@eval function MultithreadConfig(cfg::Union{GradientConfig,JacobianConfig})
-    config1 = ntuple(n -> copy(cfg), Val{$NTHREADS})
-    return MultithreadConfig(config1, nothing)
+##################
+# JacobianConfig #
+##################
+
+struct JacobianConfig{T,V,N,D} <: AbstractConfig{T,N}
+    seeds::NTuple{N,Partials{N,V}}
+    duals::D
 end
 
-function MultithreadConfig(cfg::HessianConfig)
-    config1 = MultithreadConfig(gradient_config(cfg))
-    config2 = copy(jacobian_config(cfg))
-    return MultithreadConfig(config1, config2)
+function JacobianConfig(::F,
+                        x::AbstractArray{V},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, V)) where {F,V,N,T}
+    seeds = construct_seeds(Partials{N,V})
+    duals = similar(x, Dual{T,V,N})
+    return JacobianConfig{T,V,N,typeof(duals)}(seeds, duals)
 end
 
-gradient_config(cfg::MultithreadConfig) = cfg.config1
-jacobian_config(cfg::MultithreadConfig) = cfg.config2
+function JacobianConfig(::F,
+                        y::AbstractArray{Y},
+                        x::AbstractArray{X},
+                        ::Chunk{N} = Chunk(x),
+                        ::T = Tag(F, X)) where {F,Y,X,N,T}
+    seeds = construct_seeds(Partials{N,X})
+    yduals = similar(y, Dual{T,Y,N})
+    xduals = similar(x, Dual{T,X,N})
+    duals = (yduals, xduals)
+    return JacobianConfig{T,X,N,typeof(duals)}(seeds, duals)
+end
 
-@inline chunksize(cfg::MultithreadConfig) = chunksize(gradient_config(cfg))
+#################
+# HessianConfig #
+#################
+
+struct HessianConfig{T,V,N,D,H,DJ} <: AbstractConfig{T,N}
+    jacobian_config::JacobianConfig{Tag{Void,H},V,N,DJ}
+    gradient_config::GradientConfig{T,Dual{Tag{Void,H},V,N},D}
+end
+
+function HessianConfig(f::F,
+                       x::AbstractArray{V},
+                       chunk::Chunk = Chunk(x),
+                       tag::Tag = Tag(F, Dual{Void,V,0})) where {F,V}
+    jacobian_config = JacobianConfig(nothing, x, chunk)
+    gradient_config = GradientConfig(f, jacobian_config.duals, chunk, tag)
+    return HessianConfig(jacobian_config, gradient_config)
+end
+
+function HessianConfig(f::F,
+                       result::DiffResult,
+                       x::AbstractArray{V},
+                       chunk::Chunk = Chunk(x),
+                       tag::Tag = Tag(F, Dual{Void,V,0})) where {F,V}
+    jacobian_config = JacobianConfig(nothing, DiffBase.gradient(result), x, chunk)
+    gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk, tag)
+    return HessianConfig(jacobian_config, gradient_config)
+end
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 6a672a46..b1803534 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -1,137 +1,39 @@
-#############################################
-# ForwardDiffResult --> DiffBase.DiffResult #
-#############################################
+#########################################################
+# Config{N}(args...) --> Config(f, args..., Chunk{N}()) #
+#########################################################
 
-Base.@deprecate DerivativeResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate DerivativeResult(x) DiffBase.DiffResult(copy(x), copy(x))
-
-Base.@deprecate GradientResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate GradientResult(x) DiffBase.GradientResult(x)
-
-Base.@deprecate JacobianResult(x, y) DiffBase.DiffResult(x, y)
-Base.@deprecate JacobianResult(x) DiffBase.JacobianResult(x)
-
-Base.@deprecate HessianResult(x, y, z) DiffBase.DiffResult(x, y, z)
-Base.@deprecate HessianResult(x) DiffBase.HessianResult(x)
-
-@compat immutable Chunk{N}
-    function (::Type{Chunk{N}}){N}()
-        Base.depwarn("Chunk{N}() is deprecated, use the ForwardDiff.AbstractConfig API instead.", :Chunk)
-        return new{N}()
-    end
-end
-
-export Chunk
-
-######################
-# gradient/gradient! #
-######################
-
-function gradient{N}(f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.gradient(f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.gradient(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig{N}(x))) instead.",
-                     :gradient)
-        return gradient(f, x, MultithreadConfig(GradientConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.gradient(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.gradient(f, x, ForwardDiff.GradientConfig{N}(x)) instead.",
-                     :gradient)
-        return gradient(f, x, GradientConfig{N}(x))
-    end
-end
-
-function gradient!{N}(out, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.gradient!(out, f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.gradient!(out, f, x, ForwardDiff.MultithreadConfig(ForwardDiff.GradientConfig{N}(x))) instead.",
-                     :gradient!)
-        return gradient!(out, f, x, MultithreadConfig(GradientConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.gradient!(out, f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.gradient!(out, f, x, ForwardDiff.GradientConfig{N}(x)) instead.",
-                     :gradient!)
-        return gradient!(out, f, x, GradientConfig{N}(x))
-    end
+function (::Type{GradientConfig{N}})(x) where N
+    msg = "GradientConfig{N}(x) is deprecated; use GradientConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :GradientConfig)
+    return GradientConfig(nothing, x, Chunk{N}())
 end
 
-######################
-# jacobian/jacobian! #
-######################
-
-function jacobian{N}(f, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian(f, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian)
-    return jacobian(f, x, JacobianConfig{N}(x))
+function (::Type{JacobianConfig{N}})(x) where N
+    msg = "JacobianConfig{N}(x) is deprecated; use JacobianConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :JacobianConfig)
+    return JacobianConfig(nothing, x, Chunk{N}())
 end
 
-function jacobian{N}(f!, y, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian(f!, y, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian(f!, y, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian)
-    return jacobian(f!, y, x, JacobianConfig{N}(y, x))
-end
-
-function jacobian!{N}(out, f, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian!(out, f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian!(out, f, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian!)
-    return jacobian!(out, f, x, JacobianConfig{N}(x))
-end
-
-function jacobian!{N}(out, f!, y, x, chunk::Chunk{N}; kwargs...)
-    Base.depwarn("ForwardDiff.jacobian!(out, f, y, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                 "ForwardDiff.jacobian!(out, f, y, x, ForwardDiff.JacobianConfig{N}(x)) instead.",
-                 :jacobian!)
-    return jacobian!(out, f!, y, x, JacobianConfig{N}(y, x))
-end
-
-####################
-# hessian/hessian! #
-####################
-
-function hessian{N}(f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    if multithread
-        Base.depwarn("ForwardDiff.hessian(f, x, ::ForwardDiff.Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.hessian(f, x, ForwardDiff.MultithreadConfig(ForwardDiff.HessianConfig{N}(x))) instead.",
-                     :hessian)
-        return hessian(f, x, MultithreadConfig(HessianConfig{N}(x)))
-    else
-        Base.depwarn("ForwardDiff.hessian(f, x, ::ForwardDiff.Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.hessian(f, x, ForwardDiff.HessianConfig{N}(x)) instead.",
-                     :hessian)
-        return hessian(f, x, HessianConfig{N}(x))
-    end
+function (::Type{JacobianConfig{N}})(y, x) where N
+    msg = "JacobianConfig{N}(y, x) is deprecated; use JacobianConfig(nothing, y, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :JacobianConfig)
+    return JacobianConfig(nothing, y, x, Chunk{N}())
 end
 
-function hessian!{N}(out, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    return deprecated_hessian!(out, f, x, chunk; multithread = multithread)
+function (::Type{HessianConfig{N}})(x) where N
+    msg = "HessianConfig{N}(x) is deprecated; use HessianConfig(nothing, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :HessianConfig)
+    return HessianConfig(nothing, x, Chunk{N}())
 end
 
-function hessian!{N}(out::DiffResult, f, x, chunk::Chunk{N}; multithread = false, kwargs...)
-    return deprecated_hessian!(out, f, x, chunk; multithread = multithread)
+function (::Type{HessianConfig{N}})(out, x) where N
+    msg = "HessianConfig{N}(out, x) is deprecated; use HessianConfig(nothing, out, x, Chunk{N}()) instead."
+    Base.depwarn(msg, :HessianConfig)
+    return HessianConfig(nothing, out, x, Chunk{N}())
 end
 
-function deprecated_hessian!{N}(out, f, x, chunk::Chunk{N}; multithread = false)
-    if isa(out, DiffBase.DiffResult)
-        out_str = "out::DiffBase.DiffResult"
-        cfg_str = "ForwardDiff.HessianConfig{N}(out, x)"
-        cfg = HessianConfig{N}(out, x)
-    else
-        out_str = "out"
-        cfg_str = "ForwardDiff.HessianConfig{N}(x)"
-        cfg = HessianConfig{N}(x)
-    end
-    if multithread
-        Base.depwarn("ForwardDiff.hessian!($(out_str), f, x, ::Chunk{N}; multithread = true) is deprecated" *
-                     ", use ForwardDiff.hessian!($(out_str), f, x, ForwardDiff.MultithreadConfig($(cfg_str))) instead.",
-                     :hessian!)
-        return hessian!(out, f, x, MultithreadConfig(cfg))
-    else
-        Base.depwarn("ForwardDiff.hessian!($(out_str), f, x, ::Chunk{N}) is deprecated, use " *
-                     "ForwardDiff.hessian!($(out_str), f, x, $(cfg_str)) instead.",
-                     :hessian!)
-        return hessian!(out, f, x, cfg)
-    end
+function MultithreadConfig(cfg::AbstractConfig)
+    msg = "MultithreadConfig(cfg) is deprecated; use cfg instead (ForwardDiff no longer implements experimental multithreading)."
+    Base.depwarn(msg, :MultithreadConfig)
+    return cfg
 end
diff --git a/src/derivative.jl b/src/derivative.jl
index 3466e293..4d8449a5 100644
--- a/src/derivative.jl
+++ b/src/derivative.jl
@@ -2,11 +2,14 @@
 # API methods #
 ###############
 
-derivative{F}(f::F, x) = extract_derivative(f(Dual(x, one(x))))
+@inline function derivative(f::F, x::R) where {F,R<:Real}
+    T = typeof(Tag(F, R))
+    return extract_derivative(f(Dual{T}(x, one(x))))
+end
 
-function derivative!{F}(out, f::F, x)
-    y = f(Dual(x, one(x)))
-    extract_derivative!(out, y)
+@inline function derivative!(out, f::F, x::R) where {F,R<:Real}
+    T = typeof(Tag(F, typeof(x)))
+    extract_derivative!(out, f(Dual{T}(x, one(x))))
     return out
 end
 
@@ -14,9 +17,16 @@ end
 # result extraction #
 #####################
 
-@inline extract_derivative(y::Real) = partials(y, 1)
+# non-mutating #
+#--------------#
+
+@inline extract_derivative(y::Dual{T,V,1}) where {T,V} = partials(y, 1)
+@inline extract_derivative(y::Real) = zero(y)
 @inline extract_derivative(y::AbstractArray) = extract_derivative!(similar(y, valtype(eltype(y))), y)
 
+# mutating #
+#----------#
+
 extract_derivative!(out::AbstractArray, y::AbstractArray) = map!(extract_derivative, out, y)
 
 function extract_derivative!(out::DiffResult, y)
diff --git a/src/dual.jl b/src/dual.jl
index 8610bd0f..11669c08 100644
--- a/src/dual.jl
+++ b/src/dual.jl
@@ -1,207 +1,262 @@
-const ExternalReal = Union{subtypes(Real)...}
-
 ########
 # Dual #
 ########
 
-immutable Dual{N,T<:Real} <: Real
-    value::T
-    partials::Partials{N,T}
+struct Dual{T,V<:Real,N} <: Real
+    value::V
+    partials::Partials{N,V}
 end
 
 ################
 # Constructors #
 ################
 
-Dual{N,T}(value::T, partials::Partials{N,T}) = Dual{N,T}(value, partials)
+@inline (::Type{Dual{T}})(value::V, partials::Partials{N,V}) where {T,N,V} = Dual{T,V,N}(value, partials)
+
+@inline function (::Type{Dual{T}})(value::A, partials::Partials{N,B}) where {T,N,A,B}
+    C = promote_type(A, B)
+    return Dual{T}(convert(C, value), convert(Partials{N,C}, partials))
+end
+
+@inline (::Type{Dual{T}})(value::Real, partials::Tuple) where {T} = Dual{T}(value, Partials(partials))
+@inline (::Type{Dual{T}})(value::Real, partials::Tuple{}) where {T} = Dual{T}(value, Partials{0,typeof(value)}(partials))
+@inline (::Type{Dual{T}})(value::Real, partials::Real...) where {T} = Dual{T}(value, partials)
+@inline (::Type{Dual{T}})(value::V, ::Type{Val{N}}, ::Type{Val{i}}) where {T,V<:Real,N,i} = Dual{T}(value, single_seed(Partials{N,V}, Val{i}))
+
+@inline Dual(args...) = Dual{Void}(args...)
 
-function Dual{N,A,B}(value::A, partials::Partials{N,B})
-    T = promote_type(A, B)
-    return Dual(convert(T, value), convert(Partials{N,T}, partials))
+####################
+# TagMismatchError #
+####################
+
+struct TagMismatchError{X,Y} <: Exception
+    x::Dual{X}
+    y::Dual{Y}
+end
+
+function TagMismatchError(x, y, z)
+    if isa(x, Dual) && isa(y, Dual) && tagtype(x) !== tagtype(y)
+        return TagMismatchError(x, y)
+    elseif isa(x, Dual) && isa(z, Dual) && tagtype(x) !== tagtype(z)
+        return TagMismatchError(x, z)
+    elseif isa(y, Dual) && isa(z, Dual) && tagtype(y) !== tagtype(z)
+        return TagMismatchError(y, z)
+    else
+        error("the provided arguments have matching tags, or are not Duals")
+    end
 end
 
-Dual(value::Real, partials::Tuple) = Dual(value, Partials(partials))
-Dual(value::Real, partials::Tuple{}) = Dual(value, Partials{0,typeof(value)}(partials))
-Dual(value::Real, partials::Real...) = Dual(value, partials)
+function Base.showerror(io::IO, e::TagMismatchError{X,Y}) where {X,Y}
+    print(io, "potential perturbation confusion detected when computing binary operation ",
+              "on $(e.x) and $(e.y) (tag mismatch: $X != $Y). ForwardDiff cannot safely ",
+              "perform differentiation in this context; see the following issue for ",
+              "details: https://github.com/JuliaDiff/jl/issues/83")
+end
 
 ##############################
 # Utility/Accessor Functions #
 ##############################
 
 @inline value(x::Real) = x
-@inline value(n::Dual) = n.value
+@inline value(d::Dual) = d.value
 
 @inline partials(x::Real) = Partials{0,typeof(x)}(tuple())
-@inline partials(n::Dual) = n.partials
+@inline partials(d::Dual) = d.partials
 @inline partials(x::Real, i...) = zero(x)
-@inline partials(n::Dual, i) = n.partials[i]
-@inline partials(n::Dual, i, j) = partials(n, i).partials[j]
-@inline partials(n::Dual, i, j, k...) = partials(partials(n, i, j), k...)
-
-@inline npartials{N}(::Dual{N}) = N
-@inline npartials{N,T}(::Type{Dual{N,T}}) = N
-
-@inline degree{T}(::T) = degree(T)
-@inline degree{T}(::Type{T}) = 0
-degree{N,T}(::Type{Dual{N,T}}) = 1 + degree(T)
-
-@inline valtype{T}(::T) = T
-@inline valtype{T}(::Type{T}) = T
-@inline valtype{N,T}(::Dual{N,T}) = T
-@inline valtype{N,T}(::Type{Dual{N,T}}) = T
-
-#####################
-# Generic Functions #
-#####################
+@inline partials(d::Dual, i) = d.partials[i]
+@inline partials(d::Dual, i, j) = partials(d, i).partials[j]
+@inline partials(d::Dual, i, j, k...) = partials(partials(d, i, j), k...)
+
+@inline npartials(::Dual{T,V,N}) where {T,V,N} = N
+@inline npartials(::Type{Dual{T,V,N}}) where {T,V,N} = N
+
+@inline order(::Type{V}) where {V} = 0
+@inline order(::Type{Dual{T,V,N}}) where {T,V,N} = 1 + order(V)
+
+@inline valtype(::V) where {V} = V
+@inline valtype(::Type{V}) where {V} = V
+@inline valtype(::Dual{T,V,N}) where {T,V,N} = V
+@inline valtype(::Type{Dual{T,V,N}}) where {T,V,N} = V
+
+@inline tagtype(::V) where {V} = Void
+@inline tagtype(::Type{V}) where {V} = Void
+@inline tagtype(::Dual{T,V,N}) where {T,V,N} = T
+@inline tagtype(::Type{Dual{T,V,N}}) where {T,V,N} = T
+
+#####################################
+# N-ary Operation Definition Macros #
+#####################################
+
+macro define_binary_dual_op(f, xy_body, x_body, y_body)
+    defs = quote
+        @inline $(f)(x::Dual, y::Dual) = throw(TagMismatchError(x, y))
+        @inline $(f)(x::Dual{T}, y::Dual{T}) where {T} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}) where {T,S,X<:Real,Y<:Real,N,M} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N})           where {T,S,X<:Real,Y<:Real,N,M} = $x_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}) where {T,S,X<:Real,Y<:Real,N,M} = $y_body
+    end
+    for R in REAL_TYPES
+        expr = quote
+            @inline $(f)(x::Dual{T}, y::$R) where {T} = $x_body
+            @inline $(f)(x::$R, y::Dual{T}) where {T} = $y_body
+        end
+        append!(defs.args, expr.args)
+    end
+    return esc(defs)
+end
 
-macro ambiguous(ex)
-    def = ex.head == :macrocall ? ex.args[2] : ex
-    sig = def.args[1]
-    body = def.args[2]
-    f = isa(sig.args[1], Expr) && sig.args[1].head == :curly ? sig.args[1].args[1] : sig.args[1]
-    a, b = sig.args[2].args[1], sig.args[3].args[1]
-    Ta, Tb = sig.args[2].args[2], sig.args[3].args[2]
-    if isa(a, Symbol) && isa(b, Symbol) && isa(Ta, Symbol) && isa(Tb, Symbol)
-        if Ta == :Real && Tb == :Dual
-            return quote
-                @inline $(f){A<:ExternalReal,B<:Dual}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-                @inline $(f){M,A<:ExternalReal,B<:Dual}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-                @inline $(f){N,A<:ExternalReal,B<:Dual}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
-                @inline $(f){N,A<:ExternalReal,B<:Dual}($(a)::Dual{N,A}, $(b)::Dual{N,B}) = $(body)
-                @inline $(f){N,M,A<:ExternalReal,B<:Dual}($(a)::Dual{N,A}, $(b)::Dual{M,B}) = $(body)
-                $(esc(ex))
-            end
-        elseif Ta == :Dual && Tb == :Real
-            return quote
-                @inline $(f){A<:Dual,B<:ExternalReal}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-                @inline $(f){M,A<:Dual,B<:ExternalReal}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-                @inline $(f){N,A<:Dual,B<:ExternalReal}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
-                @inline $(f){N,A<:Dual,B<:ExternalReal}($(a)::Dual{N,A}, $(b)::Dual{N,B}) = $(body)
-                @inline $(f){N,M,A<:Dual,B<:ExternalReal}($(a)::Dual{N,A}, $(b)::Dual{M,B}) = $(body)
-                $(esc(ex))
+macro define_ternary_dual_op(f, xyz_body, xy_body, xz_body, yz_body, x_body, y_body, z_body)
+    defs = quote
+        @inline $(f)(x::Dual, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+        @inline $(f)(x::Dual{T}, y::Dual{T}, z::Dual{T}) where {T} = $xyz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xyz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xy_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $xz_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $yz_body
+        @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $x_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $y_body
+        @inline $(f)(x::Dual{S,X,N},           y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Y<:Real,Z<:Real,N,M} = $z_body
+    end
+    for R in REAL_TYPES
+        expr = quote
+            @inline $(f)(x::Dual, y::Dual, z::$R) = throw(TagMismatchError(x, y, z))
+            @inline $(f)(x::Dual, y::$R, z::Dual) = throw(TagMismatchError(x, y, z))
+            @inline $(f)(x::$R, y::Dual, z::Dual) = throw(TagMismatchError(x, y, z))
+
+            @inline $(f)(x::Dual{T}, y::Dual{T}, z::$R) where {T} = $xy_body
+            @inline $(f)(x::Dual{T}, y::$R, z::Dual{T}) where {T} = $xz_body
+            @inline $(f)(x::$R, y::Dual{T}, z::Dual{T}) where {T} = $yz_body
+
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{T,Dual{S,Y,N},M}, z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $xy_body
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::$R,                    z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Z<:Real,N,M} = $xz_body
+            @inline $(f)(x::$R,                    y::Dual{T,Dual{S,Y,N},M}, z::Dual{T,Dual{S,Z,N},M}) where {T,S,Y<:Real,Z<:Real,N,M} = $yz_body
+
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::Dual{S,Y,N},           z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $x_body
+            @inline $(f)(x::Dual{T,Dual{S,X,N},M}, y::$R,                    z::Dual{S,Z,N})           where {T,S,X<:Real,Z<:Real,N,M} = $x_body
+            @inline $(f)(x::$R,                    y::Dual{T,Dual{S,Y,N},M}, z::Dual{S,Z,N})           where {T,S,Y<:Real,Z<:Real,N,M} = $y_body
+            @inline $(f)(x::Dual{S,X,N},           y::Dual{T,Dual{S,Y,N},M}, z::$R)                    where {T,S,X<:Real,Y<:Real,N,M} = $y_body
+            @inline $(f)(x::Dual{S,X,N},           y::$R,                    z::Dual{T,Dual{S,Z,N},M}) where {T,S,X<:Real,Z<:Real,N,M} = $z_body
+            @inline $(f)(x::$R,                    y::Dual{S,Y,N},           z::Dual{T,Dual{S,Z,N},M}) where {T,S,Y<:Real,Z<:Real,N,M} = $z_body
+        end
+        append!(defs.args, expr.args)
+        for Q in REAL_TYPES
+            Q === R && continue
+            expr = quote
+                @inline $(f)(x::Dual{T}, y::$R, z::$Q) where {T} = $x_body
+                @inline $(f)(x::$R, y::Dual{T}, z::$Q) where {T} = $y_body
+                @inline $(f)(x::$R, y::$Q, z::Dual{T}) where {T} = $z_body
             end
-        else
-            return esc(ex)
+            append!(defs.args, expr.args)
         end
-    end
-    return quote
-        @inline $(f){N,M,A<:Real,B<:Real}(a::Dual{N,A}, b::Dual{M,B}) = error("npartials($(typeof(a))) != npartials($(typeof(b)))")
-        if !(in($f, (isequal, ==, isless, <, <=, <)))
-            @inline $(f){A<:Real,B<:Real}(a::Dual{0,A}, b::Dual{0,B}) = Dual($(f)(value(a), value(b)))
-            @inline $(f){M,A<:Real,B<:Real}(a::Dual{0,A}, b::Dual{M,B}) = $(f)(value(a), b)
-            @inline $(f){N,A<:Real,B<:Real}(a::Dual{N,A}, b::Dual{0,B}) = $(f)(a, value(b))
+        expr = quote
+            @inline $(f)(x::Dual{T}, y::$R, z::$R) where {T} = $x_body
+            @inline $(f)(x::$R, y::Dual{T}, z::$R) where {T} = $y_body
+            @inline $(f)(x::$R, y::$R, z::Dual{T}) where {T} = $z_body
         end
-        $(esc(ex))
+        append!(defs.args, expr.args)
     end
+    return esc(defs)
 end
 
-Base.copy(n::Dual) = n
+#####################
+# Generic Functions #
+#####################
 
-Base.eps(n::Dual) = eps(value(n))
-Base.eps{D<:Dual}(::Type{D}) = eps(valtype(D))
+Base.copy(d::Dual) = d
 
-Base.rtoldefault{N, T <: Real}(::Type{Dual{N,T}}) = Base.rtoldefault(T)
+Base.eps(d::Dual) = eps(value(d))
+Base.eps(::Type{D}) where {D<:Dual} = eps(valtype(D))
 
-Base.floor{T<:Real}(::Type{T}, n::Dual) = floor(T, value(n))
-Base.floor(n::Dual) = floor(value(n))
+Base.rtoldefault(::Type{D}) where {D<:Dual} = Base.rtoldefault(valtype(D))
 
-Base.ceil{T<:Real}(::Type{T}, n::Dual) = ceil(T, value(n))
-Base.ceil(n::Dual) = ceil(value(n))
+Base.floor(::Type{R}, d::Dual) where {R<:Real} = floor(R, value(d))
+Base.floor(d::Dual) = floor(value(d))
 
-Base.trunc{T<:Real}(::Type{T}, n::Dual) = trunc(T, value(n))
-Base.trunc(n::Dual) = trunc(value(n))
+Base.ceil(::Type{R}, d::Dual) where {R<:Real} = ceil(R, value(d))
+Base.ceil(d::Dual) = ceil(value(d))
 
-Base.round{T<:Real}(::Type{T}, n::Dual) = round(T, value(n))
-Base.round(n::Dual) = round(value(n))
+Base.trunc(::Type{R}, d::Dual) where {R<:Real} = trunc(R, value(d))
+Base.trunc(d::Dual) = trunc(value(d))
 
-Base.hash(n::Dual) = hash(value(n))
-Base.hash(n::Dual, hsh::UInt64) = hash(value(n), hsh)
+Base.round(::Type{R}, d::Dual) where {R<:Real} = round(R, value(d))
+Base.round(d::Dual) = round(value(d))
 
-function Base.read{N,T}(io::IO, ::Type{Dual{N,T}})
-    value = read(io, T)
-    partials = read(io, Partials{N,T})
-    return Dual{N,T}(value, partials)
+Base.hash(d::Dual) = hash(value(d))
+Base.hash(d::Dual, hsh::UInt64) = hash(value(d), hsh)
+
+function Base.read(io::IO, ::Type{Dual{T,V,N}}) where {T,V,N}
+    value = read(io, V)
+    partials = read(io, Partials{N,V})
+    return Dual{T,V,N}(value, partials)
 end
 
-function Base.write(io::IO, n::Dual)
-    write(io, value(n))
-    write(io, partials(n))
+function Base.write(io::IO, d::Dual)
+    write(io, value(d))
+    write(io, partials(d))
 end
 
-@inline Base.zero(n::Dual) = zero(typeof(n))
-@inline Base.zero{N,T}(::Type{Dual{N,T}}) = Dual(zero(T), zero(Partials{N,T}))
+@inline Base.zero(d::Dual) = zero(typeof(d))
+@inline Base.zero(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(zero(V), zero(Partials{N,V}))
 
-@inline Base.one(n::Dual) = one(typeof(n))
-@inline Base.one{N,T}(::Type{Dual{N,T}}) = Dual(one(T), zero(Partials{N,T}))
+@inline Base.one(d::Dual) = one(typeof(d))
+@inline Base.one(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(one(V), zero(Partials{N,V}))
 
-@inline Base.rand(n::Dual) = rand(typeof(n))
-@inline Base.rand{N,T}(::Type{Dual{N,T}}) = Dual(rand(T), zero(Partials{N,T}))
-@inline Base.rand(rng::AbstractRNG, n::Dual) = rand(rng, typeof(n))
-@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{Dual{N,T}}) = Dual(rand(rng, T), zero(Partials{N,T}))
+@inline Base.rand(d::Dual) = rand(typeof(d))
+@inline Base.rand(::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(rand(V), zero(Partials{N,V}))
+@inline Base.rand(rng::AbstractRNG, d::Dual) = rand(rng, typeof(d))
+@inline Base.rand(rng::AbstractRNG, ::Type{Dual{T,V,N}}) where {T,V,N} = Dual{T}(rand(rng, V), zero(Partials{N,V}))
 
 # Predicates #
 #------------#
 
-isconstant(n::Dual) = iszero(partials(n))
-
-@ambiguous Base.isequal{N}(a::Dual{N}, b::Dual{N}) = isequal(value(a), value(b))
-@ambiguous Base.:(==){N}(a::Dual{N}, b::Dual{N}) = value(a) == value(b)
-@ambiguous Base.isless{N}(a::Dual{N}, b::Dual{N}) = value(a) < value(b)
-@ambiguous Base.:<{N}(a::Dual{N}, b::Dual{N}) = isless(a, b)
-@ambiguous Base.:(<=){N}(a::Dual{N}, b::Dual{N}) = <=(value(a), value(b))
+isconstant(d::Dual) = iszero(partials(d))
 
-for T in (AbstractFloat, Irrational, Real)
-    Base.isequal(n::Dual, x::T) = isequal(value(n), x)
-    Base.isequal(x::T, n::Dual) = isequal(n, x)
+for pred in (:isequal, :(==), :isless, :(<=), :<)
+    @eval begin
+        @define_binary_dual_op(
+            Base.$(pred),
+            $(pred)(value(x), value(y)),
+            $(pred)(value(x), y),
+            $(pred)(x, value(y))
+        )
+    end
+end
 
-    Base.:(==)(n::Dual, x::T) = (value(n) == x)
-    Base.:(==)(x::T, n::Dual) = ==(n, x)
+Base.isnan(d::Dual) = isnan(value(d))
+Base.isfinite(d::Dual) = isfinite(value(d))
+Base.isinf(d::Dual) = isinf(value(d))
+Base.isreal(d::Dual) = isreal(value(d))
+Base.isinteger(d::Dual) = isinteger(value(d))
+Base.iseven(d::Dual) = iseven(value(d))
+Base.isodd(d::Dual) = isodd(value(d))
 
-    Base.isless(n::Dual, x::T) = value(n) < x
-    Base.isless(x::T, n::Dual) = x < value(n)
+########################
+# Promotion/Conversion #
+########################
 
-    Base.:<(n::Dual, x::T) = isless(n, x)
-    Base.:<(x::T, n::Dual) = isless(x, n)
+function Base.promote_rule(::Type{Dual{T,A,N}},
+                           ::Type{Dual{T,B,N}}) where {T,A<:Real,B<:Real,N}
+    return Dual{T,promote_type(A, B),N}
+end
 
-    Base.:(<=)(n::Dual, x::T) = <=(value(n), x)
-    Base.:(<=)(x::T, n::Dual) = <=(x, value(n))
+for R in (:BigFloat, :Bool, :Irrational, :Real)
+    @eval begin
+        Base.promote_rule(::Type{R}, ::Type{Dual{T,V,N}}) where {R<:$R,T,V<:Real,N} = Dual{T,promote_type(R, V),N}
+        Base.promote_rule(::Type{Dual{T,V,N}}, ::Type{R}) where {T,V<:Real,N,R<:$R} = Dual{T,promote_type(V, R),N}
+    end
 end
 
-Base.isnan(n::Dual) = isnan(value(n))
-Base.isfinite(n::Dual) = isfinite(value(n))
-Base.isinf(n::Dual) = isinf(value(n))
-Base.isreal(n::Dual) = isreal(value(n))
-Base.isinteger(n::Dual) = isinteger(value(n))
-Base.iseven(n::Dual) = iseven(value(n))
-Base.isodd(n::Dual) = isodd(value(n))
+Base.convert(::Type{Dual{T,V,N}}, d::Dual{T}) where {T,V<:Real,N} = Dual{T}(convert(V, value(d)), convert(Partials{N,V}, partials(d)))
+Base.convert(::Type{Dual{T,V,N}}, x::Real) where {T,V<:Real,N} = Dual{T}(V(x), zero(Partials{N,V}))
+Base.convert(::Type{D}, d::D) where {D<:Dual} = d
 
-########################
-# Promotion/Conversion #
-########################
+Base.promote_array_type(F, ::Type{D}, ::Type{A}) where {D<:Dual,A<:AbstractFloat} = promote_type(D, A)
+Base.promote_array_type(F, ::Type{<:Dual}, ::Type{<:AbstractFloat}, ::Type{P}) where {P} = P
+Base.promote_array_type(F, ::Type{A}, ::Type{D}) where {D<:Dual,A<:AbstractFloat} = promote_type(D, A)
+Base.promote_array_type(F, ::Type{<:AbstractFloat}, ::Type{<:Dual}, ::Type{P}) where {P} = P
 
-Base.promote_rule{N1,N2,A<:Real,B<:Real}(D1::Type{Dual{N1,A}}, D2::Type{Dual{N2,B}}) = error("can't promote $(D1) and $(D2)")
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{Dual{N,A}}, ::Type{Dual{N,B}}) = Dual{N,promote_type(A, B)}
-Base.promote_rule{N,T<:Real}(::Type{Dual{N,T}}, ::Type{BigFloat}) = Dual{N,promote_type(T, BigFloat)}
-Base.promote_rule{N,T<:Real}(::Type{BigFloat}, ::Type{Dual{N,T}}) = Dual{N,promote_type(BigFloat, T)}
-Base.promote_rule{N,T<:Real}(::Type{Dual{N,T}}, ::Type{Bool}) = Dual{N,promote_type(T, Bool)}
-Base.promote_rule{N,T<:Real}(::Type{Bool}, ::Type{Dual{N,T}}) = Dual{N,promote_type(Bool, T)}
-Base.promote_rule{N,T<:Real,s}(::Type{Dual{N,T}}, ::Type{Irrational{s}}) = Dual{N,promote_type(T, Irrational{s})}
-Base.promote_rule{N,s,T<:Real}(::Type{Irrational{s}}, ::Type{Dual{N,T}}) = Dual{N,promote_type(Irrational{s}, T)}
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{Dual{N,A}}, ::Type{B}) = Dual{N,promote_type(A, B)}
-Base.promote_rule{N,A<:Real,B<:Real}(::Type{A}, ::Type{Dual{N,B}}) = Dual{N,promote_type(A, B)}
-
-Base.convert(::Type{Dual}, n::Dual) = n
-Base.convert{N,T<:Real}(::Type{Dual{N,T}}, n::Dual{N}) = Dual(convert(T, value(n)), convert(Partials{N,T}, partials(n)))
-Base.convert{D<:Dual}(::Type{D}, n::D) = n
-Base.convert{N,T<:Real}(::Type{Dual{N,T}}, x::Real) = Dual(convert(T, x), zero(Partials{N,T}))
-Base.convert(::Type{Dual}, x::Real) = Dual(x)
-
-Base.promote_array_type{D<:Dual, A<:AbstractFloat}(F, ::Type{D}, ::Type{A}) = promote_type(D, A)
-Base.promote_array_type{D<:Dual, A<:AbstractFloat, P}(F, ::Type{D}, ::Type{A}, ::Type{P}) = P
-Base.promote_array_type{A<:AbstractFloat, D<:Dual}(F, ::Type{A}, ::Type{D}) = promote_type(D, A)
-Base.promote_array_type{A<:AbstractFloat, D<:Dual, P}(F, ::Type{A}, ::Type{D}, ::Type{P}) = P
-
-Base.float{N,T}(n::Dual{N,T}) = Dual{N,promote_type(T, Float16)}(n)
+Base.float(d::Dual{T,V,N}) where {T,V,N} = Dual{T,promote_type(V, Float16),N}(d)
+Base.AbstractFloat(d::Dual{T,V,N}) where {T,V,N} = Dual{T,promote_type(V, Float16),N}(d)
 
 ########
 # Math #
@@ -210,78 +265,83 @@ Base.float{N,T}(n::Dual{N,T}) = Dual{N,promote_type(T, Float16)}(n)
 # Addition/Subtraction #
 #----------------------#
 
-@ambiguous @inline Base.:+{N}(n1::Dual{N}, n2::Dual{N}) = Dual(value(n1) + value(n2), partials(n1) + partials(n2))
-@ambiguous @inline Base.:+(n::Dual, x::Real) = Dual(value(n) + x, partials(n))
-@ambiguous @inline Base.:+(x::Real, n::Dual) = n + x
+@define_binary_dual_op(
+    Base.:+,
+    Dual{T}(value(x) + value(y), partials(x) + partials(y)),
+    Dual{T}(value(x) + y, partials(x)),
+    Dual{T}(x + value(y), partials(y))
+)
 
-@ambiguous @inline Base.:-{N}(n1::Dual{N}, n2::Dual{N}) = Dual(value(n1) - value(n2), partials(n1) - partials(n2))
-@ambiguous @inline Base.:-(n::Dual, x::Real) = Dual(value(n) - x, partials(n))
-@ambiguous @inline Base.:-(x::Real, n::Dual) = Dual(x - value(n), -(partials(n)))
-@inline Base.:-(n::Dual) = Dual(-(value(n)), -(partials(n)))
+@define_binary_dual_op(
+    Base.:-,
+    Dual{T}(value(x) - value(y), partials(x) - partials(y)),
+    Dual{T}(value(x) - y, partials(x)),
+    Dual{T}(x - value(y), -partials(y))
+)
+
+@inline Base.:-(d::Dual{T}) where {T} = Dual{T}(-value(d), -partials(d))
 
 # Multiplication #
 #----------------#
 
-@inline Base.:*(n::Dual, x::Bool) = x ? n : (signbit(value(n))==0 ? zero(n) : -zero(n))
-@inline Base.:*(x::Bool, n::Dual) = n * x
-
-@ambiguous @inline function Base.:*{N}(n1::Dual{N}, n2::Dual{N})
-    v1, v2 = value(n1), value(n2)
-    return Dual(v1 * v2, _mul_partials(partials(n1), partials(n2), v2, v1))
-end
+@define_binary_dual_op(
+    Base.:*,
+    begin
+        vx, vy = value(x), value(y)
+        Dual{T}(vx * vy, _mul_partials(partials(x), partials(y), vy, vx))
+    end,
+    Dual{T}(value(x) * y, partials(x) * y),
+    Dual{T}(x * value(y), x * partials(y))
+)
 
-@ambiguous @inline Base.:*(n::Dual, x::Real) = Dual(value(n) * x, partials(n) * x)
-@ambiguous @inline Base.:*(x::Real, n::Dual) = n * x
+@inline Base.:*(d::Dual, x::Bool) = x ? d : (signbit(value(d))==0 ? zero(d) : -zero(d))
+@inline Base.:*(x::Bool, d::Dual) = d * x
 
 # Division #
 #----------#
 
-@ambiguous @inline function Base.:/{N}(n1::Dual{N}, n2::Dual{N})
-    v1, v2 = value(n1), value(n2)
-    return Dual(v1 / v2, _div_partials(partials(n1), partials(n2), v1, v2))
-end
-
-@ambiguous @inline function Base.:/(x::Real, n::Dual)
-    v = value(n)
-    divv = x / v
-    return Dual(divv, -(divv / v) * partials(n))
-end
-
-@ambiguous @inline Base.:/(n::Dual, x::Real) = Dual(value(n) / x, partials(n) / x)
+@define_binary_dual_op(
+    Base.:/,
+    begin
+        vx, vy = value(x), value(y)
+        Dual{T}(vx / vy, _div_partials(partials(x), partials(y), vx, vy))
+    end,
+    Dual{T}(value(x) / y, partials(x) / y),
+    begin
+        v = value(y)
+        divv = x / v
+        Dual{T}(divv, -(divv / v) * partials(y))
+    end
+)
 
 # Exponentiation #
 #----------------#
 
 for f in (:(Base.:^), :(NaNMath.pow))
     @eval begin
-        @ambiguous @inline function ($f){N}(n1::Dual{N}, n2::Dual{N})
-            v1, v2 = value(n1), value(n2)
-            expv = ($f)(v1, v2)
-            powval = v2 * ($f)(v1, v2 - 1)
-            logval = isconstant(n2) ? one(expv) : expv * log(v1)
-            new_partials = _mul_partials(partials(n1), partials(n2), powval, logval)
-            return Dual(expv, new_partials)
-        end
-
-        @inline ($f)(::Base.Irrational{:e}, n::Dual) = exp(n)
-    end
-
-    for T in (:Integer, :Rational, :Real)
-        @eval begin
-            @ambiguous @inline function ($f)(n::Dual, x::$(T))
-                v = value(n)
-                expv = ($f)(v, x)
-                deriv = x * ($f)(v, x - 1)
-                return Dual(expv, deriv * partials(n))
-            end
-
-            @ambiguous @inline function ($f)(x::$(T), n::Dual)
-                v = value(n)
+        @define_binary_dual_op(
+            $f,
+            begin
+                vx, vy = value(x), value(y)
+                expv = ($f)(vx, vy)
+                powval = vy * ($f)(vx, vy - 1)
+                logval = isconstant(y) ? one(expv) : expv * log(vx)
+                new_partials = _mul_partials(partials(x), partials(y), powval, logval)
+                return Dual{T}(expv, new_partials)
+            end,
+            begin
+                v = value(x)
+                expv = ($f)(v, y)
+                deriv = y * ($f)(v, y - 1)
+                return Dual{T}(expv, deriv * partials(x))
+            end,
+            begin
+                v = value(y)
                 expv = ($f)(x, v)
                 deriv = expv*log(x)
-                return Dual(expv, deriv * partials(n))
+                return Dual{T}(expv, deriv * partials(y))
             end
-        end
+        )
     end
 end
 
@@ -290,19 +350,19 @@ end
 
 function to_nanmath(x::Expr)
     if x.head == :call
-        funsym = Expr(:.,:NaNMath,Base.Meta.quot(x.args[1]))
-        return Expr(:call,funsym,[to_nanmath(z) for z in x.args[2:end]]...)
+        funsym = Expr(:., :NaNMath, Base.Meta.quot(x.args[1]))
+        return Expr(:call, funsym, [to_nanmath(z) for z in x.args[2:end]]...)
     else
-        return Expr(:call,[to_nanmath(z) for z in x.args]...)
+        return Expr(:call, [to_nanmath(z) for z in x.args]...)
     end
 end
 
 to_nanmath(x) = x
 
-@inline Base.conj(n::Dual) = n
-@inline Base.transpose(n::Dual) = n
-@inline Base.ctranspose(n::Dual) = n
-@inline Base.abs(n::Dual) = signbit(value(n)) ? -n : n
+@inline Base.conj(d::Dual) = d
+@inline Base.transpose(d::Dual) = d
+@inline Base.ctranspose(d::Dual) = d
+@inline Base.abs(d::Dual) = signbit(value(d)) ? -d : d
 
 for fsym in AUTO_DEFINED_UNARY_FUNCS
     v = :v
@@ -310,20 +370,15 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
 
     # exp and sqrt are manually defined below
     if !(in(fsym, (:exp, :sqrt)))
+        funcs = Vector{Expr}(0)
         is_special_function = in(fsym, SPECIAL_FUNCS)
-        if is_special_function
-            @eval begin
-                @inline function SpecialFunctions.$(fsym)(n::Dual)
-                    $(v) = value(n)
-                    return Dual(SpecialFunctions.$(fsym)($v), $(deriv) * partials(n))
-                end
-            end
-        end
-        if !(is_special_function) || VERSION < v"0.6.0-dev.2767"
+        is_special_function && push!(funcs, :(SpecialFunctions.$(fsym)))
+        (!(is_special_function) || VERSION < v"0.6.0-dev.2767") && push!(funcs, :(Base.$(fsym)))
+        for func in funcs
             @eval begin
-                @inline function Base.$(fsym)(n::Dual)
-                    $(v) = value(n)
-                    return Dual(Base.$(fsym)($v), $(deriv) * partials(n))
+                @inline function $(func)(d::Dual{T}) where T
+                    $(v) = value(d)
+                    return Dual{T}($(func)($v), $(deriv) * partials(d))
                 end
             end
         end
@@ -333,9 +388,9 @@ for fsym in AUTO_DEFINED_UNARY_FUNCS
     if fsym in NANMATH_FUNCS
         nan_deriv = to_nanmath(deriv)
         @eval begin
-            @inline function NaNMath.$(fsym)(n::Dual)
-                v = value(n)
-                return Dual(NaNMath.$(fsym)($v), $(nan_deriv) * partials(n))
+            @inline function NaNMath.$(fsym)(d::Dual{T}) where T
+                v = value(d)
+                return Dual{T}(NaNMath.$(fsym)($v), $(nan_deriv) * partials(d))
             end
         end
     end
@@ -345,87 +400,96 @@ end
 # Special Cases #
 #################
 
-# Manually Optimized Functions #
-#------------------------------#
+# exp
 
-@inline function Base.exp{N}(n::Dual{N})
-    expv = exp(value(n))
-    return Dual(expv, expv * partials(n))
+@inline function Base.exp(d::Dual{T}) where T
+    expv = exp(value(d))
+    return Dual{T}(expv, expv * partials(d))
 end
 
-@inline function Base.sqrt{N}(n::Dual{N})
-    sqrtv = sqrt(value(n))
+# sqrt
+
+@inline function Base.sqrt(d::Dual{T}) where T
+    sqrtv = sqrt(value(d))
     deriv = inv(sqrtv + sqrtv)
-    return Dual(sqrtv, deriv * partials(n))
+    return Dual{T}(sqrtv, deriv * partials(d))
 end
 
-@inline function calc_hypot(x, y)
+# hypot
+
+@inline function calc_hypot(x, y, ::Type{T}) where T
     vx = value(x)
     vy = value(y)
     h = hypot(vx, vy)
-    return Dual(h, (vx/h) * partials(x) + (vy/h) * partials(y))
+    return Dual{T}(h, (vx/h) * partials(x) + (vy/h) * partials(y))
 end
 
-@inline function calc_hypot(x, y, z)
+@define_binary_dual_op(
+    Base.hypot,
+    calc_hypot(x, y, T),
+    calc_hypot(x, y, T),
+    calc_hypot(x, y, T)
+)
+
+@inline function calc_hypot(x, y, z, ::Type{T}) where T
     vx = value(x)
     vy = value(y)
     vz = value(z)
     h = hypot(vx, vy, vz)
-    return Dual(h, (vx/h) * partials(x) + (vy/h) * partials(y) + (vz/h) * partials(z))
+    p = (vx / h) * partials(x) + (vy / h) * partials(y) + (vz / h) * partials(z)
+    return Dual{T}(h, p)
 end
 
-@ambiguous @inline Base.hypot{N}(x::Dual{N}, y::Dual{N}) = calc_hypot(x, y)
-@ambiguous @inline Base.hypot(x::Dual, y::Real) = calc_hypot(x, y)
-@ambiguous @inline Base.hypot(x::Real, y::Dual) = calc_hypot(x, y)
-
-@inline Base.hypot(x::Dual, y::Dual, z::Dual) = calc_hypot(x, y, z)
+@define_ternary_dual_op(
+    Base.hypot,
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+    calc_hypot(x, y, z, T),
+)
 
-@inline Base.hypot(x::Real, y::Dual, z::Dual) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Dual, y::Real, z::Dual) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Dual, y::Dual, z::Real) = calc_hypot(x, y, z)
+# atan2
 
-@inline Base.hypot(x::Dual, y::Real, z::Real) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Real, y::Dual, z::Real) = calc_hypot(x, y, z)
-@inline Base.hypot(x::Real, y::Real, z::Dual) = calc_hypot(x, y, z)
-
-@inline sincos(n) = (sin(n), cos(n))
-
-@inline function sincos(n::Dual)
-    sn, cn = sincos(value(n))
-    return (Dual(sn, cn * partials(n)), Dual(cn, -sn * partials(n)))
-end
-
-# Other Functions #
-#-----------------#
-
-@inline function calc_atan2(y, x)
+@inline function calc_atan2(y, x, ::Type{T}) where T
     z = y / x
     v = value(z)
     atan2v = atan2(value(y), value(x))
     deriv = inv(one(v) + v*v)
-    return Dual(atan2v, deriv * partials(z))
+    return Dual{T}(atan2v, deriv * partials(z))
 end
 
-@ambiguous @inline Base.atan2{N}(y::Dual{N}, x::Dual{N}) = calc_atan2(y, x)
-@ambiguous @inline Base.atan2(y::Real, x::Dual) = calc_atan2(y, x)
-@ambiguous @inline Base.atan2(y::Dual, x::Real) = calc_atan2(y, x)
+@define_binary_dual_op(
+    Base.atan2,
+    calc_atan2(x, y, T),
+    calc_atan2(x, y, T),
+    calc_atan2(x, y, T)
+)
 
-@generated function Base.fma{N}(x::Dual{N}, y::Dual{N}, z::Dual{N})
+# fma
+
+@generated function calc_fma_xyz(x::Dual{T,<:Real,N},
+                                 y::Dual{T,<:Real,N},
+                                 z::Dual{T,<:Real,N}) where {T,N}
     ex = Expr(:tuple, [:(fma(value(x), partials(y)[$i], fma(value(y), partials(x)[$i], partials(z)[$i]))) for i in 1:N]...)
     return quote
         $(Expr(:meta, :inline))
         v = fma(value(x), value(y), value(z))
-        Dual(v, $ex)
+        return Dual{T}(v, $ex)
     end
 end
 
-@inline function Base.fma(x::Dual, y::Dual, z::Real)
+@inline function calc_fma_xy(x::Dual{T}, y::Dual{T}, z::Real) where T
     vx, vy = value(x), value(y)
     result = fma(vx, vy, z)
-    return Dual(result, _mul_partials(partials(x), partials(y), vy, vx))
+    return Dual{T}(result, _mul_partials(partials(x), partials(y), vy, vx))
 end
 
-@generated function Base.fma{N}(x::Dual{N}, y::Real, z::Dual{N})
+@generated function calc_fma_xz(x::Dual{T,<:Real,N},
+                                y::Real,
+                                z::Dual{T,<:Real,N}) where {T,N}
     ex = Expr(:tuple, [:(fma(partials(x)[$i], y,  partials(z)[$i])) for i in 1:N]...)
     return quote
         $(Expr(:meta, :inline))
@@ -434,27 +498,34 @@ end
     end
 end
 
-@inline Base.fma(x::Real, y::Dual, z::Dual) = fma(y, x, z)
+@define_ternary_dual_op(
+    Base.fma,
+    calc_fma_xyz(x, y, z),                         # xyz_body
+    calc_fma_xy(x, y, z),                          # xy_body
+    calc_fma_xz(x, y, z),                          # xz_body
+    Base.fma(y, x, z),                             # yz_body
+    Dual{T}(fma(value(x), y, z), partials(x) * y), # x_body
+    Base.fma(y, x, z),                             # y_body
+    Dual{T}(fma(x, y, value(z)), partials(z))      # z_body
+)
 
-@inline function Base.fma(x::Dual, y::Real, z::Real)
-    vx = value(x)
-    return Dual(fma(vx, y, value(z)), partials(x) * y)
-end
+# sincos
 
-@inline Base.fma(x::Real, y::Dual, z::Real) = fma(y, x, z)
+@inline sincos(x) = (sin(x), cos(x))
 
-@inline function Base.fma(x::Real, y::Real, z::Dual)
-    Dual(fma(x, y, value(z)), partials(z))
+@inline function sincos(d::Dual{T}) where T
+    sd, cd = sincos(value(d))
+    return (Dual{T}(sd, cd * partials(d)), Dual{T}(cd, -sd * partials(d)))
 end
 
 ###################
 # Pretty Printing #
 ###################
 
-function Base.show{N}(io::IO, n::Dual{N})
-    print(io, "Dual(", value(n))
+function Base.show(io::IO, d::Dual{T,V,N}) where {T,V,N}
+    print(io, "Dual{$(repr(T))}(", value(d))
     for i in 1:N
-        print(io, ",", partials(n, i))
+        print(io, ",", partials(d, i))
     end
     print(io, ")")
 end
diff --git a/src/gradient.jl b/src/gradient.jl
index fd0f9e29..c611d22a 100644
--- a/src/gradient.jl
+++ b/src/gradient.jl
@@ -2,7 +2,12 @@
 # API methods #
 ###############
 
-function gradient{F}(f::F, x, cfg::AbstractConfig = GradientConfig(x))
+const AllowedGradientConfig{F,H} = Union{GradientConfig{Tag{F,H}}, GradientConfig{Tag{Void,H}}}
+
+gradient(f, x, cfg::GradientConfig) = throw(ConfigHismatchError(f, cfg))
+gradient!(out, f, x, cfg::GradientConfig) = throw(ConfigHismatchError(f, cfg))
+
+function gradient(f::F, x, cfg::AllowedGradientConfig{F,H} = GradientConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_gradient(f, x, cfg)
     else
@@ -10,7 +15,7 @@ function gradient{F}(f::F, x, cfg::AbstractConfig = GradientConfig(x))
     end
 end
 
-function gradient!{F}(out, f::F, x, cfg::AbstractConfig = GradientConfig(x))
+function gradient!(out, f::F, x, cfg::AllowedGradientConfig{F,H} = GradientConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_gradient!(out, f, x, cfg)
     else
@@ -56,13 +61,13 @@ end
 # vector mode #
 ###############
 
-function vector_mode_gradient{F}(f::F, x, cfg)
+function vector_mode_gradient(f::F, x, cfg) where {F}
     ydual = vector_mode_dual_eval(f, x, cfg)
     out = similar(x, valtype(ydual))
     return extract_gradient!(out, ydual)
 end
 
-function vector_mode_gradient!{F}(out, f::F, x, cfg)
+function vector_mode_gradient!(out, f::F, x, cfg) where {F}
     ydual = vector_mode_dual_eval(f, x, cfg)
     extract_gradient!(out, ydual)
     return out
@@ -72,9 +77,6 @@ end
 # chunk mode #
 ##############
 
-# single threaded #
-#-----------------#
-
 function chunk_mode_gradient_expr(out_definition::Expr)
     return quote
         @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))"
@@ -119,80 +121,10 @@ function chunk_mode_gradient_expr(out_definition::Expr)
     end
 end
 
-@eval function chunk_mode_gradient{F,N}(f::F, x, cfg::GradientConfig{N})
+@eval function chunk_mode_gradient(f::F, x, cfg::GradientConfig{T,V,N}) where {F,T,V,N}
     $(chunk_mode_gradient_expr(:(out = similar(x, valtype(ydual)))))
 end
 
-@eval function chunk_mode_gradient!{F,N}(out, f::F, x, cfg::GradientConfig{N})
+@eval function chunk_mode_gradient!(out, f::F, x, cfg::GradientConfig{T,V,N}) where {F,T,V,N}
     $(chunk_mode_gradient_expr(:()))
 end
-
-# multithreaded #
-#---------------#
-
-if IS_MULTITHREADED_JULIA
-    function multithread_chunk_mode_expr(out_definition::Expr)
-        return quote
-            cfg = gradient_config(multi_cfg)
-            N = chunksize(cfg)
-            @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))"
-
-            # precalculate loop bounds
-            xlen = length(x)
-            remainder = xlen % N
-            lastchunksize = ifelse(remainder == 0, N, remainder)
-            lastchunkindex = xlen - lastchunksize + 1
-            middlechunks = 2:div(xlen - lastchunksize, N)
-
-            # fetch and seed work vectors
-            current_cfg = cfg[compat_threadid()]
-            current_xdual = current_cfg.duals
-            current_seeds = current_cfg.seeds
-
-            Base.Threads.@threads for t in 1:length(cfg)
-                seed!(cfg[t].duals, x)
-            end
-
-            # do first chunk manually to calculate output type
-            seed!(current_xdual, x, 1, current_seeds)
-            current_ydual = f(current_xdual)
-            $(out_definition)
-            extract_gradient_chunk!(out, current_ydual, 1, N)
-            seed!(current_xdual, x, 1)
-
-            # do middle chunks
-            Base.Threads.@threads for c in middlechunks
-                # see https://github.com/JuliaLang/julia/issues/14948
-                local chunk_cfg = cfg[compat_threadid()]
-                local chunk_xdual = chunk_cfg.duals
-                local chunk_seeds = chunk_cfg.seeds
-                local chunk_index = ((c - 1) * N + 1)
-                seed!(chunk_xdual, x, chunk_index, chunk_seeds)
-                local chunk_dual = f(chunk_xdual)
-                extract_gradient_chunk!(out, chunk_dual, chunk_index, N)
-                seed!(chunk_xdual, x, chunk_index)
-            end
-
-            # do final chunk
-            seed!(current_xdual, x, lastchunkindex, current_seeds, lastchunksize)
-            current_ydual = f(current_xdual)
-            extract_gradient_chunk!(out, current_ydual, lastchunkindex, lastchunksize)
-
-            # load value, this is a no-op unless `out` is a DiffResult
-            extract_value!(out, current_ydual)
-
-            return out
-        end
-    end
-
-    @eval function chunk_mode_gradient{F}(f::F, x, multi_cfg::MultithreadConfig)
-        $(multithread_chunk_mode_expr(:(out = similar(x, valtype(current_ydual)))))
-    end
-
-    @eval function chunk_mode_gradient!{F}(out, f::F, x, multi_cfg::MultithreadConfig)
-        $(multithread_chunk_mode_expr(:()))
-    end
-else
-    chunk_mode_gradient(f, x, cfg::Tuple) = error("Multithreading is not enabled for this Julia installation.")
-    chunk_mode_gradient!(out, f, x, cfg::Tuple) = chunk_mode_gradient!(f, x, cfg)
-end
diff --git a/src/hessian.jl b/src/hessian.jl
index ae59b092..c74cf3a8 100644
--- a/src/hessian.jl
+++ b/src/hessian.jl
@@ -2,24 +2,30 @@
 # API methods #
 ###############
 
-function hessian{F}(f::F, x, cfg::AbstractConfig = HessianConfig(x))
-    ∇f = y -> gradient(f, y, gradient_config(cfg))
-    return jacobian(∇f, x, jacobian_config(cfg))
+const AllowedHessianConfig{F,H} = Union{HessianConfig{Tag{F,H}}, HessianConfig{Tag{Void,H}}}
+
+hessian(f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+hessian!(out, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+hessian!(out::DiffResult, f, x, cfg::HessianConfig) = throw(ConfigMismatchError(f, cfg))
+
+function hessian(f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, x)) where {F,H}
+    ∇f = y -> gradient(f, y, cfg.gradient_config)
+    return jacobian(∇f, x, cfg.jacobian_config)
 end
 
-function hessian!{F}(out, f::F, x, cfg::AbstractConfig = HessianConfig(x))
-    ∇f = y -> gradient(f, y, gradient_config(cfg))
-    jacobian!(out, ∇f, x, jacobian_config(cfg))
+function hessian!(out, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, x)) where {F,H}
+    ∇f = y -> gradient(f, y, cfg.gradient_config)
+    jacobian!(out, ∇f, x, cfg.jacobian_config)
     return out
 end
 
-function hessian!{F}(out::DiffResult, f::F, x, cfg::AbstractConfig = HessianConfig(out, x))
+function hessian!(out::DiffResult, f::F, x, cfg::AllowedHessianConfig{F,H} = HessianConfig(f, out, x)) where {F,H}
     ∇f! = (y, z) -> begin
         result = DiffResult(zero(eltype(y)), y)
-        gradient!(result, f, z, gradient_config(cfg))
+        gradient!(result, f, z, cfg.gradient_config)
         DiffBase.value!(out, value(DiffBase.value(result)))
         return y
     end
-    jacobian!(DiffBase.hessian(out), ∇f!, DiffBase.gradient(out), x, jacobian_config(cfg))
+    jacobian!(DiffBase.hessian(out), ∇f!, DiffBase.gradient(out), x, cfg.jacobian_config)
     return out
 end
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 83103045..96f73de4 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -2,7 +2,14 @@
 # API methods #
 ###############
 
-function jacobian{F}(f::F, x, cfg::JacobianConfig = JacobianConfig(x))
+const AllowedJacobianConfig{F,H} = Union{JacobianConfig{Tag{F,H}}, JacobianConfig{Tag{Void,H}}}
+
+jacobian(f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
+jacobian(f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
+jacobian!(out, f, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f, cfg))
+jacobian!(out, f!, y, x, cfg::JacobianConfig) = throw(ConfigMismatchError(f!, cfg))
+
+function jacobian(f::F, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f, x, cfg)
     else
@@ -10,7 +17,7 @@ function jacobian{F}(f::F, x, cfg::JacobianConfig = JacobianConfig(x))
     end
 end
 
-function jacobian{F}(f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
+function jacobian(f!::F, y, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f!, y, x)) where {F,H}
     if chunksize(cfg) == length(x)
         return vector_mode_jacobian(f!, y, x, cfg)
     else
@@ -18,7 +25,7 @@ function jacobian{F}(f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
     end
 end
 
-function jacobian!{F}(out, f::F, x, cfg::JacobianConfig = JacobianConfig(x))
+function jacobian!(out, f::F, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f, x, cfg)
     else
@@ -27,7 +34,7 @@ function jacobian!{F}(out, f::F, x, cfg::JacobianConfig = JacobianConfig(x))
     return out
 end
 
-function jacobian!{F}(out, f!::F, y, x, cfg::JacobianConfig = JacobianConfig(y, x))
+function jacobian!(out, f!::F, y, x, cfg::AllowedJacobianConfig{F,H} = JacobianConfig(f!, y, x)) where {F,H}
     if chunksize(cfg) == length(x)
         vector_mode_jacobian!(out, f!, y, x, cfg)
     else
@@ -71,7 +78,7 @@ reshape_jacobian(out::DiffResult, ydual, xdual) = reshape_jacobian(DiffBase.jaco
 # vector mode #
 ###############
 
-function vector_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian(f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f, x, cfg)
     out = similar(ydual, valtype(eltype(ydual)), length(ydual), N)
     extract_jacobian!(out, ydual, N)
@@ -79,7 +86,7 @@ function vector_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
     return out
 end
 
-function vector_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian(f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     out = similar(y, length(y), N)
@@ -88,14 +95,14 @@ function vector_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
     return out
 end
 
-function vector_mode_jacobian!{F,N}(out, f::F, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian!(out, f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f, x, cfg)
     extract_jacobian!(out, ydual, N)
     extract_value!(out, ydual)
     return out
 end
 
-function vector_mode_jacobian!{F,N}(out, f!::F, y, x, cfg::JacobianConfig{N})
+function vector_mode_jacobian!(out, f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     ydual = vector_mode_dual_eval(f!, y, x, cfg)
     map!(value, y, ydual)
     extract_jacobian!(out, ydual, N)
@@ -150,7 +157,7 @@ function jacobian_chunk_mode_expr(work_array_definition::Expr, compute_ydual::Ex
     end
 end
 
-@eval function chunk_mode_jacobian{F,N}(f::F, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian(f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -160,7 +167,7 @@ end
                                :()))
 end
 
-@eval function chunk_mode_jacobian{F,N}(f!::F, y, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian(f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
@@ -170,7 +177,7 @@ end
                                :(map!(value, y, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,N}(out, f::F, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian!(out, f::F, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    xdual = cfg.duals
                                    seed!(xdual, x)
@@ -180,7 +187,7 @@ end
                                :(extract_value!(out, ydual))))
 end
 
-@eval function chunk_mode_jacobian!{F,N}(out, f!::F, y, x, cfg::JacobianConfig{N})
+@eval function chunk_mode_jacobian!(out, f!::F, y, x, cfg::JacobianConfig{T,V,N}) where {F,T,V,N}
     $(jacobian_chunk_mode_expr(quote
                                    ydual, xdual = cfg.duals
                                    seed!(xdual, x)
diff --git a/src/partials.jl b/src/partials.jl
index 03f87be2..1485364a 100644
--- a/src/partials.jl
+++ b/src/partials.jl
@@ -1,28 +1,32 @@
-immutable Partials{N,T} <: AbstractVector{T}
-    values::NTuple{N,T}
+immutable Partials{N,V} <: AbstractVector{V}
+    values::NTuple{N,V}
 end
 
 ##############################
 # Utility/Accessor Functions #
 ##############################
 
-@inline valtype{N,T}(::Partials{N,T}) = T
-@inline valtype{N,T}(::Type{Partials{N,T}}) = T
+@generated function single_seed(::Type{Partials{N,V}}, ::Type{Val{i}}) where {N,V,i}
+    ex = Expr(:tuple, [ifelse(i === j, :(one(V)), :(zero(V))) for j in 1:N]...)
+    return :(Partials($(ex)))
+end
+
+@inline valtype(::Partials{N,V}) where {N,V} = V
+@inline valtype(::Type{Partials{N,V}}) where {N,V} = V
 
-@inline npartials{N}(::Partials{N}) = N
-@inline npartials{N,T}(::Type{Partials{N,T}}) = N
+@inline npartials(::Partials{N}) where {N} = N
+@inline npartials(::Type{Partials{N,V}}) where {N,V} = N
 
-@inline Base.length{N}(::Partials{N}) = N
-@inline Base.size{N}(::Partials{N}) = (N,)
+@inline Base.length(::Partials{N}) where {N} = N
+@inline Base.size(::Partials{N}) where {N} = (N,)
 
 @inline Base.getindex(partials::Partials, i::Int) = partials.values[i]
-setindex{N,T}(partials::Partials{N,T}, v, i) = Partials{N,T}((partials[1:i-1]..., v, partials[i+1:N]...))
 
 Base.start(partials::Partials) = start(partials.values)
 Base.next(partials::Partials, i) = next(partials.values, i)
 Base.done(partials::Partials, i) = done(partials.values, i)
 
-@compat Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
+Base.IndexStyle(::Type{<:Partials}) = IndexLinear()
 
 #####################
 # Generic Functions #
@@ -31,18 +35,18 @@ Base.done(partials::Partials, i) = done(partials.values, i)
 @inline iszero(partials::Partials) = iszero_tuple(partials.values)
 
 @inline Base.zero(partials::Partials) = zero(typeof(partials))
-@inline Base.zero{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(zero_tuple(NTuple{N,T}))
+@inline Base.zero(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(zero_tuple(NTuple{N,V}))
 
 @inline Base.one(partials::Partials) = one(typeof(partials))
-@inline Base.one{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(one_tuple(NTuple{N,T}))
+@inline Base.one(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(one_tuple(NTuple{N,V}))
 
 @inline Base.rand(partials::Partials) = rand(typeof(partials))
-@inline Base.rand{N,T}(::Type{Partials{N,T}}) = Partials{N,T}(rand_tuple(NTuple{N,T}))
+@inline Base.rand(::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(rand_tuple(NTuple{N,V}))
 @inline Base.rand(rng::AbstractRNG, partials::Partials) = rand(rng, typeof(partials))
-@inline Base.rand{N,T}(rng::AbstractRNG, ::Type{Partials{N,T}}) = Partials{N,T}(rand_tuple(rng, NTuple{N,T}))
+@inline Base.rand(rng::AbstractRNG, ::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(rand_tuple(rng, NTuple{N,V}))
 
-Base.isequal{N}(a::Partials{N}, b::Partials{N}) = isequal(a.values, b.values)
-Base.:(==){N}(a::Partials{N}, b::Partials{N}) = a.values == b.values
+Base.isequal(a::Partials{N}, b::Partials{N}) where {N} = isequal(a.values, b.values)
+Base.:(==)(a::Partials{N}, b::Partials{N}) where {N} = a.values == b.values
 
 const PARTIALS_HASH = hash(Partials)
 
@@ -51,7 +55,7 @@ Base.hash(partials::Partials, hsh::UInt64) = hash(hash(partials), hsh)
 
 @inline Base.copy(partials::Partials) = partials
 
-Base.read{N,T}(io::IO, ::Type{Partials{N,T}}) = Partials{N,T}(ntuple(i->read(io, T), Val{N}))
+Base.read(io::IO, ::Type{Partials{N,V}}) where {N,V} = Partials{N,V}(ntuple(i->read(io, V), Val{N}))
 
 function Base.write(io::IO, partials::Partials)
     for p in partials
@@ -63,17 +67,17 @@ end
 # Conversion/Promotion #
 ########################
 
-Base.promote_rule{N,A,B}(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) = Partials{N,promote_type(A, B)}
+Base.promote_rule(::Type{Partials{N,A}}, ::Type{Partials{N,B}}) where {N,A,B} = Partials{N,promote_type(A, B)}
 
-Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials) = Partials{N,T}(partials.values)
-Base.convert{N,T}(::Type{Partials{N,T}}, partials::Partials{N,T}) = partials
+Base.convert(::Type{Partials{N,V}}, partials::Partials) where {N,V} = Partials{N,V}(partials.values)
+Base.convert(::Type{Partials{N,V}}, partials::Partials{N,V}) where {N,V} = partials
 
 ########################
 # Arithmetic Functions #
 ########################
 
-@inline Base.:+{N}(a::Partials{N}, b::Partials{N}) = Partials(add_tuples(a.values, b.values))
-@inline Base.:-{N}(a::Partials{N}, b::Partials{N}) = Partials(sub_tuples(a.values, b.values))
+@inline Base.:+(a::Partials{N}, b::Partials{N}) where {N} = Partials(add_tuples(a.values, b.values))
+@inline Base.:-(a::Partials{N}, b::Partials{N}) where {N} = Partials(sub_tuples(a.values, b.values))
 @inline Base.:-(partials::Partials) = Partials(minus_tuple(partials.values))
 @inline Base.:*(x::Real, partials::Partials) = partials*x
 
@@ -95,7 +99,7 @@ if NANSAFE_MODE_ENABLED
         return Partials(div_tuple_by_scalar(partials.values, x))
     end
 
-    @inline function _mul_partials{N}(a::Partials{N}, b::Partials{N}, x_a, x_b)
+    @inline function _mul_partials(a::Partials{N}, b::Partials{N}, x_a, x_b) where N
         x_a = ifelse(!isfinite(x_a) && iszero(a), one(x_a), x_a)
         x_b = ifelse(!isfinite(x_b) && iszero(b), one(x_b), x_b)
         return Partials(mul_tuples(a.values, b.values, x_a, x_b))
@@ -109,7 +113,7 @@ else
         return Partials(div_tuple_by_scalar(partials.values, x))
     end
 
-    @inline function _mul_partials{N}(a::Partials{N}, b::Partials{N}, x_a, x_b)
+    @inline function _mul_partials(a::Partials{N}, b::Partials{N}, x_a, x_b) where N
         return Partials(mul_tuples(a.values, b.values, x_a, x_b))
     end
 end
@@ -117,15 +121,15 @@ end
 # edge cases where N == 0 #
 #-------------------------#
 
-@inline Base.:+{A,B}(a::Partials{0,A}, b::Partials{0,B}) = Partials{0,promote_type(A,B)}(tuple())
-@inline Base.:-{A,B}(a::Partials{0,A}, b::Partials{0,B}) = Partials{0,promote_type(A,B)}(tuple())
-@inline Base.:-{T}(partials::Partials{0,T}) = partials
-@inline Base.:*{T}(partials::Partials{0,T}, x::Real) = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:*{T}(x::Real, partials::Partials{0,T}) = Partials{0,promote_type(T,typeof(x))}(tuple())
-@inline Base.:/{T}(partials::Partials{0,T}, x::Real) = Partials{0,promote_type(T,typeof(x))}(tuple())
+@inline Base.:+(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline Base.:-(a::Partials{0,A}, b::Partials{0,B}) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline Base.:-(partials::Partials{0,V}) where {V} = partials
+@inline Base.:*(partials::Partials{0,V}, x::Real) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
+@inline Base.:*(x::Real, partials::Partials{0,V}) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
+@inline Base.:/(partials::Partials{0,V}, x::Real) where {V} = Partials{0,promote_type(V,typeof(x))}(tuple())
 
-@inline _mul_partials{A,B}(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) = Partials{0,promote_type(A,B)}(tuple())
-@inline _div_partials{A,B}(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) = Partials{0,promote_type(A,B)}(tuple())
+@inline _mul_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
+@inline _div_partials(a::Partials{0,A}, b::Partials{0,B}, afactor, bfactor) where {A,B} = Partials{0,promote_type(A,B)}(tuple())
 
 ##################################
 # Generated Functions on NTuples #
@@ -150,60 +154,60 @@ end
 @inline rand_tuple(::AbstractRNG, ::Type{Tuple{}}) = tuple()
 @inline rand_tuple(::Type{Tuple{}}) = tuple()
 
-@generated function iszero_tuple{N,T}(tup::NTuple{N,T})
+@generated function iszero_tuple(tup::NTuple{N,V}) where {N,V}
     ex = Expr(:&&, [:(z == tup[$i]) for i=1:N]...)
     return quote
-        z = zero(T)
+        z = zero(V)
         $(Expr(:meta, :inline))
         @inbounds return $ex
     end
 end
 
-@generated function zero_tuple{N,T}(::Type{NTuple{N,T}})
+@generated function zero_tuple(::Type{NTuple{N,V}}) where {N,V}
     ex = tupexpr(i -> :(z), N)
     return quote
-        z = zero(T)
+        z = zero(V)
         return $ex
     end
 end
 
-@generated function one_tuple{N,T}(::Type{NTuple{N,T}})
+@generated function one_tuple(::Type{NTuple{N,V}}) where {N,V}
     ex = tupexpr(i -> :(z), N)
     return quote
-        z = one(T)
+        z = one(V)
         return $ex
     end
 end
 
-@generated function rand_tuple{N,T}(rng::AbstractRNG, ::Type{NTuple{N,T}})
-    return tupexpr(i -> :(rand(rng, T)), N)
+@generated function rand_tuple(rng::AbstractRNG, ::Type{NTuple{N,V}}) where {N,V}
+    return tupexpr(i -> :(rand(rng, V)), N)
 end
 
-@generated function rand_tuple{N,T}(::Type{NTuple{N,T}})
-    return tupexpr(i -> :(rand(T)), N)
+@generated function rand_tuple(::Type{NTuple{N,V}}) where {N,V}
+    return tupexpr(i -> :(rand(V)), N)
 end
 
-@generated function scale_tuple{N}(tup::NTuple{N}, x)
+@generated function scale_tuple(tup::NTuple{N}, x) where N
     return tupexpr(i -> :(tup[$i] * x), N)
 end
 
-@generated function div_tuple_by_scalar{N}(tup::NTuple{N}, x)
+@generated function div_tuple_by_scalar(tup::NTuple{N}, x) where N
     return tupexpr(i -> :(tup[$i] / x), N)
 end
 
-@generated function add_tuples{N}(a::NTuple{N}, b::NTuple{N})
+@generated function add_tuples(a::NTuple{N}, b::NTuple{N})  where N
     return tupexpr(i -> :(a[$i] + b[$i]), N)
 end
 
-@generated function sub_tuples{N}(a::NTuple{N}, b::NTuple{N})
+@generated function sub_tuples(a::NTuple{N}, b::NTuple{N})  where N
     return tupexpr(i -> :(a[$i] - b[$i]), N)
 end
 
-@generated function minus_tuple{N}(tup::NTuple{N})
+@generated function minus_tuple(tup::NTuple{N}) where N
     return tupexpr(i -> :(-tup[$i]), N)
 end
 
-@generated function mul_tuples{N}(a::NTuple{N}, b::NTuple{N}, afactor, bfactor)
+@generated function mul_tuples(a::NTuple{N}, b::NTuple{N}, afactor, bfactor) where N
     return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N)
 end
 
@@ -211,4 +215,4 @@ end
 # Pretty Printing #
 ###################
 
-Base.show{N}(io::IO, p::Partials{N}) = print(io, "Partials", p.values)
+Base.show(io::IO, p::Partials{N}) where {N} = print(io, "Partials", p.values)
diff --git a/src/utils.jl b/src/utils.jl
new file mode 100644
index 00000000..a9d3dbd4
--- /dev/null
+++ b/src/utils.jl
@@ -0,0 +1,76 @@
+####################
+# value extraction #
+####################
+
+@inline extract_value!(out::DiffResult, ydual) = DiffBase.value!(value, out, ydual)
+@inline extract_value!(out, ydual) = nothing
+
+@inline function extract_value!(out, y, ydual)
+    map!(value, y, ydual)
+    copy_value!(out, y)
+end
+
+@inline copy_value!(out::DiffResult, y) = DiffBase.value!(out, y)
+@inline copy_value!(out, y) = nothing
+
+###################################
+# vector mode function evaluation #
+###################################
+
+function vector_mode_dual_eval(f::F, x, cfg::Union{JacobianConfig,GradientConfig}) where F
+    xdual = cfg.duals
+    seed!(xdual, x, cfg.seeds)
+    return f(xdual)
+end
+
+function vector_mode_dual_eval(f!::F, y, x, cfg::JacobianConfig) where F
+    ydual, xdual = cfg.duals
+    seed!(xdual, x, cfg.seeds)
+    seed!(ydual, y)
+    f!(ydual, xdual)
+    return ydual
+end
+
+##################################
+# seed construction/manipulation #
+##################################
+
+@generated function construct_seeds(::Type{Partials{N,V}}) where {N,V}
+    return Expr(:tuple, [:(single_seed(Partials{N,V}, Val{$i})) for i in 1:N]...)
+end
+
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
+               seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
+    for i in eachindex(duals)
+        duals[i] = Dual{T,V,N}(x[i], seed)
+    end
+    return duals
+end
+
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
+               seeds::NTuple{N,Partials{N,V}}) where {T,V,N}
+    for i in 1:N
+        duals[i] = Dual{T,V,N}(x[i], seeds[i])
+    end
+    return duals
+end
+
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
+               seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
+    offset = index - 1
+    for i in 1:N
+        j = i + offset
+        duals[j] = Dual{T,V,N}(x[j], seed)
+    end
+    return duals
+end
+
+function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
+               seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N}
+    offset = index - 1
+    for i in 1:chunksize
+        j = i + offset
+        duals[j] = Dual{T,V,N}(x[j], seeds[i])
+    end
+    return duals
+end
diff --git a/test/DeprecatedTest.jl b/test/DeprecatedTest.jl
index 9cadc9e0..561c5f2c 100644
--- a/test/DeprecatedTest.jl
+++ b/test/DeprecatedTest.jl
@@ -3,166 +3,41 @@ module DeprecatedTest
 using Base.Test
 using ForwardDiff, DiffBase
 
-include(joinpath(dirname(@__FILE__), "utils.jl"))
-
-info("The following tests print lots of deprecation warnings on purpose.")
-
-#############################################
-# ForwardDiffResult --> DiffBase.DiffResult #
-#############################################
-
-v = rand()
-x, y = rand(5), rand(5)
-h = rand(5, 5)
-
-@test isa(DerivativeResult(v, y), DiffBase.DiffResult)
-@test isa(DerivativeResult(v), DiffBase.DiffResult)
-
-@test isa(GradientResult(v, y), DiffBase.DiffResult)
-@test isa(GradientResult(x), DiffBase.DiffResult)
-
-@test isa(JacobianResult(x, y), DiffBase.DiffResult)
-@test isa(JacobianResult(x), DiffBase.DiffResult)
-
-@test isa(HessianResult(v, y, h), DiffBase.DiffResult)
-@test isa(HessianResult(x), DiffBase.DiffResult)
-
-######################
-# gradient/gradient! #
-######################
+using ForwardDiff: AbstractConfig, GradientConfig,
+                   JacobianConfig, HessianConfig,
+                   MultithreadConfig
 
-x = rand(5)
-f = x -> prod(x) + sum(x)
-v = f(x)
-g = ForwardDiff.gradient(f, x)
-
-@test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = false) == g
-
-out = similar(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = false)
-@test out == g
-
-out = DiffBase.GradientResult(x)
-ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.gradient(f, x, Chunk{1}(); multithread = true) == g
-
-    out = similar(x)
-    ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == g
+include(joinpath(dirname(@__FILE__), "utils.jl"))
 
-    out = DiffBase.GradientResult(x)
-    ForwardDiff.gradient!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == v
-    @test DiffBase.gradient(out) == g
+function similar_duals(a::AbstractArray, b::AbstractArray)
+    return typeof(a) == typeof(b) && size(a) == size(b)
 end
 
-######################
-# jacobian/jacobian! #
-######################
-
-# f(x) -> y #
-#-----------#
-
-x = rand(5)
-f = cumprod
-y = f(x)
-j = ForwardDiff.jacobian(f, x)
-
-@test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = false) == j
+similar_duals(a::Tuple, b::Tuple) = all(similar_duals.(a, b))
 
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = false)
-@test out == j
-
-out = DiffBase.JacobianResult(x)
-ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
-
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.jacobian(f, x, Chunk{1}(); multithread = true) == j
-
-    out = similar(x, length(y), length(x))
-    ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == j
-
-    out = DiffBase.JacobianResult(x)
-    ForwardDiff.jacobian!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == y
-    @test DiffBase.jacobian(out) == j
+function similar_config(a::AbstractConfig, b::AbstractConfig)
+    return a.seeds == b.seeds && similar_duals(a.duals, b.duals)
 end
 
-# f!(y, x) #
-#----------#
-
-y = similar(x)
-f! = cumprod!
-f!(y, x)
-j = ForwardDiff.jacobian(f!, y, x)
-
-@test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = false) == j
-
-out = similar(x, length(y), length(x))
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = false)
-@test out == j
-
-out = DiffBase.JacobianResult(y, x)
-ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == y
-@test DiffBase.jacobian(out) == j
-
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.jacobian(f!, y, x, Chunk{1}(); multithread = true) == j
-
-    out = similar(x, length(y), length(x))
-    ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-    @test out == j
-
-    out = DiffBase.JacobianResult(y, x)
-    ForwardDiff.jacobian!(out, f!, y, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == y
-    @test DiffBase.jacobian(out) == j
+function similar_config(a::HessianConfig, b::HessianConfig)
+    return (similar_config(a.gradient_config, b.gradient_config) &&
+            similar_config(a.jacobian_config, b.jacobian_config))
 end
 
-####################
-# hessian/hessian! #
-####################
-
-x = rand(5)
-f = x -> prod(x) + sum(x)
-v = f(x)
-g = ForwardDiff.gradient(f, x)
-h = ForwardDiff.hessian(f, x, Chunk{1}())
-
-@test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = false) == h
-
-out = similar(x, length(x), length(x))
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = false)
-@test out == h
-
+x = rand(3)
+y = rand(3)
 out = DiffBase.HessianResult(x)
-ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = false)
-@test DiffBase.value(out) == v
-@test DiffBase.gradient(out) == g
-@test DiffBase.hessian(out) == h
-
-if ForwardDiff.IS_MULTITHREADED_JULIA
-    @test ForwardDiff.hessian(f, x, Chunk{1}(); multithread = true) == h
+N = 1
+chunk = ForwardDiff.Chunk{N}()
 
-    out = similar(x, length(x), length(x))
-    ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-    @test out == h
+info("The following tests print lots of deprecation warnings on purpose.")
 
-    out = DiffBase.HessianResult(x)
-    ForwardDiff.hessian!(out, f, x, Chunk{1}(); multithread = true)
-    @test DiffBase.value(out) == v
-    @test DiffBase.gradient(out) == g
-    @test DiffBase.hessian(out) == h
-end
+@test similar_config(GradientConfig{N}(x), GradientConfig(nothing, x, chunk))
+@test similar_config(JacobianConfig{N}(x), JacobianConfig(nothing, x, chunk))
+@test similar_config(JacobianConfig{N}(y, x), JacobianConfig(nothing, y, x, chunk))
+@test similar_config(HessianConfig{N}(x), HessianConfig(nothing, x, chunk))
+@test similar_config(HessianConfig{N}(out, x), HessianConfig(nothing, out, x, chunk))
+@test similar_config(MultithreadConfig(GradientConfig(nothing, x, chunk)), GradientConfig(nothing, x, chunk))
 
 info("Deprecation testing is now complete, so any further deprecation warnings are real.")
 
diff --git a/test/DerivativeTest.jl b/test/DerivativeTest.jl
index dcedbbba..85300ba3 100644
--- a/test/DerivativeTest.jl
+++ b/test/DerivativeTest.jl
@@ -7,6 +7,8 @@ using ForwardDiff
 
 include(joinpath(dirname(@__FILE__), "utils.jl"))
 
+srand(1)
+
 ########################
 # test vs. Calculus.jl #
 ########################
@@ -29,17 +31,18 @@ for f in DiffBase.NUMBER_TO_ARRAY_FUNCS
     println("  ...testing $f")
     v = f(x)
     d = ForwardDiff.derivative(f, x)
+
+    @test !(eltype(d) <: ForwardDiff.Dual)
     @test isapprox(d, Calculus.derivative(f, x), atol=FINITEDIFF_ERROR)
 
     out = similar(v)
     ForwardDiff.derivative!(out, f, x)
     @test isapprox(out, d)
 
-    out = DiffBase.DiffResult(zero(v), similar(d))
+    out = DiffBase.DiffResult(similar(v), similar(d))
     ForwardDiff.derivative!(out, f, x)
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.derivative(out), d)
 end
 
-
 end # module
diff --git a/test/DualTest.jl b/test/DualTest.jl
index 53dc610e..c8f91765 100644
--- a/test/DualTest.jl
+++ b/test/DualTest.jl
@@ -13,51 +13,32 @@ samerng() = MersenneTwister(1)
 # By lower-bounding the Int range at 2, we avoid cases where differentiating an
 # exponentiation of an Int value would cause a DomainError due to reducing the
 # exponent by one
-intrand(T) = T == Int ? rand(2:10) : rand(T)
+intrand(V) = V == Int ? rand(2:10) : rand(V)
 
-# fix testing issue with Base.hypot(::Int...) undefined in 0.4
-if v"0.4" <= VERSION < v"0.5"
-    Base.hypot(x::Int, y::Int) = Base.hypot(Float64(x), Float64(y))
-    Base.hypot(x, y, z) = hypot(hypot(x, y), z)
-end
-
-if VERSION < v"0.5"
-    # isapprox on v0.4 doesn't properly set the tolerance
-    # for mixed-precision inputs, while @test_approx_eq does
-    # Use @eval to avoid expanding @test_approx_eq on 0.6 where it's deprecated
-    @eval test_approx_diffnums(a::Real, b::Real) = @test_approx_eq a b
-else
-    test_approx_diffnums(a::Real, b::Real) = @test isapprox(a, b)
-end
-
-function test_approx_diffnums{N}(a::Dual{N}, b::Dual{N})
-    test_approx_diffnums(value(a), value(b))
-    for i in 1:N
-        test_approx_diffnums(partials(a)[i], partials(b)[i])
-    end
-end
+dual_isapprox(a, b) = isapprox(a, b)
+dual_isapprox(a::Dual, b::Dual) = isapprox(value(a), value(b)) && isapprox(partials(a), partials(b))
 
-for N in (0,3), M in (0,4), T in (Int, Float32)
-    println("  ...testing Dual{$N,$T} and Dual{$N,Dual{$M,$T}}")
+for N in (0,3), M in (0,4), V in (Int, Float32)
+    println("  ...testing Dual{Void,$V,$N} and Dual{Void,Dual{Void,$V,$M},$N}")
 
-    PARTIALS = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL = intrand(T)
+    PARTIALS = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL = intrand(V)
     FDNUM = Dual(PRIMAL, PARTIALS)
 
-    PARTIALS2 = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL2 = intrand(T)
+    PARTIALS2 = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL2 = intrand(V)
     FDNUM2 = Dual(PRIMAL2, PARTIALS2)
 
-    PARTIALS3 = Partials{N,T}(ntuple(n -> intrand(T), Val{N}))
-    PRIMAL3 = intrand(T)
+    PARTIALS3 = Partials{N,V}(ntuple(n -> intrand(V), Val{N}))
+    PRIMAL3 = intrand(V)
     FDNUM3 = Dual(PRIMAL3, PARTIALS3)
 
-    M_PARTIALS = Partials{M,T}(ntuple(m -> intrand(T), Val{M}))
-    NESTED_PARTIALS = convert(Partials{N,Dual{M,T}}, PARTIALS)
+    M_PARTIALS = Partials{M,V}(ntuple(m -> intrand(V), Val{M}))
+    NESTED_PARTIALS = convert(Partials{N,Dual{Void,V,M}}, PARTIALS)
     NESTED_FDNUM = Dual(Dual(PRIMAL, M_PARTIALS), NESTED_PARTIALS)
 
-    M_PARTIALS2 = Partials{M,T}(ntuple(m -> intrand(T), Val{M}))
-    NESTED_PARTIALS2 = convert(Partials{N,Dual{M,T}}, PARTIALS2)
+    M_PARTIALS2 = Partials{M,V}(ntuple(m -> intrand(V), Val{M}))
+    NESTED_PARTIALS2 = convert(Partials{N,Dual{Void,V,M}}, PARTIALS2)
     NESTED_FDNUM2 = Dual(Dual(PRIMAL2, M_PARTIALS2), NESTED_PARTIALS2)
 
     ################
@@ -65,10 +46,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     ################
 
     @test Dual(PRIMAL, PARTIALS...) === FDNUM
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS)) === Dual{N,widen(T)}
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS.values)) === Dual{N,widen(T)}
-    @test typeof(Dual(widen(T)(PRIMAL), PARTIALS...)) === Dual{N,widen(T)}
-    @test typeof(NESTED_FDNUM) == Dual{N,Dual{M,T}}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS)) === Dual{Void,widen(V),N}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS.values)) === Dual{Void,widen(V),N}
+    @test typeof(Dual(widen(V)(PRIMAL), PARTIALS...)) === Dual{Void,widen(V),N}
+    @test typeof(NESTED_FDNUM) == Dual{Void,Dual{Void,V,M},N}
 
     #############
     # Accessors #
@@ -78,7 +59,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test value(FDNUM) == PRIMAL
     @test value(NESTED_FDNUM) === Dual(PRIMAL, M_PARTIALS)
 
-    @test partials(PRIMAL) == Partials{0,T}(tuple())
+    @test partials(PRIMAL) == Partials{0,V}(tuple())
     @test partials(FDNUM) == PARTIALS
     @test partials(NESTED_FDNUM) === NESTED_PARTIALS
 
@@ -94,10 +75,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test ForwardDiff.npartials(NESTED_FDNUM) == N
     @test ForwardDiff.npartials(typeof(NESTED_FDNUM)) == N
 
-    @test ForwardDiff.valtype(FDNUM) == T
-    @test ForwardDiff.valtype(typeof(FDNUM)) == T
-    @test ForwardDiff.valtype(NESTED_FDNUM) == Dual{M,T}
-    @test ForwardDiff.valtype(typeof(NESTED_FDNUM)) == Dual{M,T}
+    @test ForwardDiff.valtype(FDNUM) == V
+    @test ForwardDiff.valtype(typeof(FDNUM)) == V
+    @test ForwardDiff.valtype(NESTED_FDNUM) == Dual{Void,V,M}
+    @test ForwardDiff.valtype(typeof(NESTED_FDNUM)) == Dual{Void,V,M}
 
     #####################
     # Generic Functions #
@@ -106,11 +87,11 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test FDNUM === copy(FDNUM)
     @test NESTED_FDNUM === copy(NESTED_FDNUM)
 
-    if T != Int
+    if V != Int
         @test eps(FDNUM) === eps(PRIMAL)
-        @test eps(typeof(FDNUM)) === eps(T)
+        @test eps(typeof(FDNUM)) === eps(V)
         @test eps(NESTED_FDNUM) === eps(PRIMAL)
-        @test eps(typeof(NESTED_FDNUM)) === eps(T)
+        @test eps(typeof(NESTED_FDNUM)) === eps(V)
 
         @test floor(Int, FDNUM) === floor(Int, PRIMAL)
         @test floor(Int, FDNUM2) === floor(Int, PRIMAL2)
@@ -145,7 +126,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
         @test round(NESTED_FDNUM) === round(PRIMAL)
 
         @test Base.rtoldefault(typeof(FDNUM)) ≡ Base.rtoldefault(typeof(PRIMAL))
-        @test Dual(PRIMAL-eps(T), PARTIALS) ≈ FDNUM
+        @test Dual(PRIMAL-eps(V), PARTIALS) ≈ FDNUM
         @test Base.rtoldefault(typeof(NESTED_FDNUM)) ≡ Base.rtoldefault(typeof(PRIMAL))
     end
 
@@ -169,19 +150,19 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     close(TMPIO)
 
     @test zero(FDNUM) === Dual(zero(PRIMAL), zero(PARTIALS))
-    @test zero(typeof(FDNUM)) === Dual(zero(T), zero(Partials{N,T}))
+    @test zero(typeof(FDNUM)) === Dual(zero(V), zero(Partials{N,V}))
     @test zero(NESTED_FDNUM) === Dual(Dual(zero(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test zero(typeof(NESTED_FDNUM)) === Dual(Dual(zero(T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test zero(typeof(NESTED_FDNUM)) === Dual(Dual(zero(V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
     @test one(FDNUM) === Dual(one(PRIMAL), zero(PARTIALS))
-    @test one(typeof(FDNUM)) === Dual(one(T), zero(Partials{N,T}))
+    @test one(typeof(FDNUM)) === Dual(one(V), zero(Partials{N,V}))
     @test one(NESTED_FDNUM) === Dual(Dual(one(PRIMAL), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test one(typeof(NESTED_FDNUM)) === Dual(Dual(one(T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test one(typeof(NESTED_FDNUM)) === Dual(Dual(one(V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
-    @test rand(samerng(), FDNUM) === Dual(rand(samerng(), T), zero(PARTIALS))
-    @test rand(samerng(), typeof(FDNUM)) === Dual(rand(samerng(), T), zero(Partials{N,T}))
-    @test rand(samerng(), NESTED_FDNUM) === Dual(Dual(rand(samerng(), T), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
-    @test rand(samerng(), typeof(NESTED_FDNUM)) === Dual(Dual(rand(samerng(), T), zero(Partials{M,T})), zero(Partials{N,Dual{M,T}}))
+    @test rand(samerng(), FDNUM) === Dual(rand(samerng(), V), zero(PARTIALS))
+    @test rand(samerng(), typeof(FDNUM)) === Dual(rand(samerng(), V), zero(Partials{N,V}))
+    @test rand(samerng(), NESTED_FDNUM) === Dual(Dual(rand(samerng(), V), zero(M_PARTIALS)), zero(NESTED_PARTIALS))
+    @test rand(samerng(), typeof(NESTED_FDNUM)) === Dual(Dual(rand(samerng(), V), zero(Partials{M,V})), zero(Partials{N,Dual{Void,V,M}}))
 
     # Predicates #
     #------------#
@@ -268,10 +249,10 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test isreal(NESTED_FDNUM)
 
     @test isinteger(Dual(1.0, PARTIALS))
-    @test isinteger(FDNUM) == (T == Int)
+    @test isinteger(FDNUM) == (V == Int)
 
     @test isinteger(Dual(Dual(1.0, M_PARTIALS), NESTED_PARTIALS))
-    @test isinteger(NESTED_FDNUM) == (T == Int)
+    @test isinteger(NESTED_FDNUM) == (V == Int)
 
     @test iseven(Dual(2))
     @test !(iseven(Dual(1)))
@@ -289,42 +270,42 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     # Promotion/Conversion #
     ########################
 
-    const WIDE_T = widen(T)
+    WIDE_T = widen(V)
 
-    @test promote_type(Dual{N,T}, T) == Dual{N,T}
-    @test promote_type(Dual{N,T}, WIDE_T) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,WIDE_T}, T) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,T}, Dual{N,T}) == Dual{N,T}
-    @test promote_type(Dual{N,T}, Dual{N,WIDE_T}) == Dual{N,WIDE_T}
-    @test promote_type(Dual{N,WIDE_T}, Dual{N,Dual{M,T}}) == Dual{N,Dual{M,WIDE_T}}
+    @test promote_type(Dual{Void,V,N}, V) == Dual{Void,V,N}
+    @test promote_type(Dual{Void,V,N}, WIDE_T) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,WIDE_T,N}, V) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,V,N}, Dual{Void,V,N}) == Dual{Void,V,N}
+    @test promote_type(Dual{Void,V,N}, Dual{Void,WIDE_T,N}) == Dual{Void,WIDE_T,N}
+    @test promote_type(Dual{Void,WIDE_T,N}, Dual{Void,Dual{Void,V,M},N}) == Dual{Void,Dual{Void,WIDE_T,M},N}
 
-    const WIDE_FDNUM = convert(Dual{N,WIDE_T}, FDNUM)
-    const WIDE_NESTED_FDNUM = convert(Dual{N,Dual{M,WIDE_T}}, NESTED_FDNUM)
+    WIDE_FDNUM = convert(Dual{Void,WIDE_T,N}, FDNUM)
+    WIDE_NESTED_FDNUM = convert(Dual{Void,Dual{Void,WIDE_T,M},N}, NESTED_FDNUM)
 
-    @test typeof(WIDE_FDNUM) === Dual{N,WIDE_T}
-    @test typeof(WIDE_NESTED_FDNUM) === Dual{N,Dual{M,WIDE_T}}
+    @test typeof(WIDE_FDNUM) === Dual{Void,WIDE_T,N}
+    @test typeof(WIDE_NESTED_FDNUM) === Dual{Void,Dual{Void,WIDE_T,M},N}
 
     @test value(WIDE_FDNUM) == PRIMAL
     @test value(WIDE_NESTED_FDNUM) == PRIMAL
 
     @test convert(Dual, FDNUM) === FDNUM
     @test convert(Dual, NESTED_FDNUM) === NESTED_FDNUM
-    @test convert(Dual{N,T}, FDNUM) === FDNUM
-    @test convert(Dual{N,Dual{M,T}}, NESTED_FDNUM) === NESTED_FDNUM
-    @test convert(Dual{N,WIDE_T}, PRIMAL) === Dual(WIDE_T(PRIMAL), zero(Partials{N,WIDE_T}))
-    @test convert(Dual{N,Dual{M,WIDE_T}}, PRIMAL) === Dual(Dual(WIDE_T(PRIMAL), zero(Partials{M,WIDE_T})), zero(Partials{N,Dual{M,T}}))
-    @test convert(Dual{N,Dual{M,T}}, FDNUM) === Dual(Dual{M,T}(PRIMAL), convert(Partials{N,Dual{M,T}}, PARTIALS))
-    @test convert(Dual{N,Dual{M,WIDE_T}}, FDNUM) === Dual(Dual{M,WIDE_T}(PRIMAL), convert(Partials{N,Dual{M,WIDE_T}}, PARTIALS))
-
-    if T != Int
-        @test Base.promote_array_type(+, Dual{N,T}, T, Base.promote_op(+, Dual{N,T}, T)) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,Int}, T, Base.promote_op(+, Dual{N,Int}, T)) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,T}, Base.promote_op(+, T, Dual{N,T})) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,Int}, Base.promote_op(+, T, Dual{N,Int})) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,T}, T) == Dual{N,T}
-        @test Base.promote_array_type(+, Dual{N,Int}, T) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,T}) == Dual{N,T}
-        @test Base.promote_array_type(+, T, Dual{N,Int}) == Dual{N,T}
+    @test convert(Dual{Void,V,N}, FDNUM) === FDNUM
+    @test convert(Dual{Void,Dual{Void,V,M},N}, NESTED_FDNUM) === NESTED_FDNUM
+    @test convert(Dual{Void,WIDE_T,N}, PRIMAL) === Dual(WIDE_T(PRIMAL), zero(Partials{N,WIDE_T}))
+    @test convert(Dual{Void,Dual{Void,WIDE_T,M},N}, PRIMAL) === Dual(Dual(WIDE_T(PRIMAL), zero(Partials{M,WIDE_T})), zero(Partials{N,Dual{Void,V,M}}))
+    @test convert(Dual{Void,Dual{Void,V,M},N}, FDNUM) === Dual(Dual{Void,V,M}(PRIMAL), convert(Partials{N,Dual{Void,V,M}}, PARTIALS))
+    @test convert(Dual{Void,Dual{Void,WIDE_T,M},N}, FDNUM) === Dual(Dual{Void,WIDE_T,M}(PRIMAL), convert(Partials{N,Dual{Void,WIDE_T,M}}, PARTIALS))
+
+    if V != Int
+        @test Base.promote_array_type(+, Dual{Void,V,N}, V, Base.promote_op(+, Dual{Void,V,N}, V)) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,Int,N}, V, Base.promote_op(+, Dual{Void,Int,N}, V)) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,V,N}, Base.promote_op(+, V, Dual{Void,V,N})) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,Int,N}, Base.promote_op(+, V, Dual{Void,Int,N})) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,V,N}, V) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, Dual{Void,Int,N}, V) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,V,N}) == Dual{Void,V,N}
+        @test Base.promote_array_type(+, V, Dual{Void,Int,N}) == Dual{Void,V,N}
     end
 
     ########
@@ -361,47 +342,31 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test PRIMAL * NESTED_FDNUM === Dual(value(NESTED_FDNUM) * PRIMAL, partials(NESTED_FDNUM) * PRIMAL)
 
     if M > 0 && N > 0
-        @test Dual(FDNUM) / Dual(PRIMAL) === Dual(FDNUM / PRIMAL)
-        @test Dual(PRIMAL) / Dual(FDNUM) === Dual(PRIMAL / FDNUM)
-        @test Dual(FDNUM) / FDNUM2 === FDNUM / FDNUM2
-        @test FDNUM / Dual(FDNUM2) === FDNUM / FDNUM2
-        @test Dual(FDNUM, FDNUM2) / Dual(PRIMAL) === Dual(FDNUM, FDNUM2) / PRIMAL
-        @test Dual(PRIMAL) / Dual(FDNUM, FDNUM2) === PRIMAL / Dual(FDNUM, FDNUM2)
+        @test Dual{1}(FDNUM) / Dual{1}(PRIMAL) === Dual{1}(FDNUM / PRIMAL)
+        @test Dual{1}(PRIMAL) / Dual{1}(FDNUM) === Dual{1}(PRIMAL / FDNUM)
+        @test Dual{1}(FDNUM) / FDNUM2 === Dual{1}(FDNUM / FDNUM2)
+        @test FDNUM / Dual{1}(FDNUM2) === Dual{1}(FDNUM / FDNUM2)
+        @test Dual{1}(FDNUM / PRIMAL, FDNUM2 / PRIMAL) === Dual{1}(FDNUM, FDNUM2) / PRIMAL
     end
 
-    test_approx_diffnums(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
-    test_approx_diffnums(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
-    test_approx_diffnums(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
+    @test dual_isapprox(FDNUM / FDNUM2, Dual(value(FDNUM) / value(FDNUM2), ForwardDiff._div_partials(partials(FDNUM), partials(FDNUM2), value(FDNUM), value(FDNUM2))))
+    @test dual_isapprox(FDNUM / PRIMAL, Dual(value(FDNUM) / PRIMAL, partials(FDNUM) / PRIMAL))
+    @test dual_isapprox(PRIMAL / FDNUM, Dual(PRIMAL / value(FDNUM), (-(PRIMAL) / value(FDNUM)^2) * partials(FDNUM)))
 
-    test_approx_diffnums(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
-    test_approx_diffnums(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
-    test_approx_diffnums(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
+    @test dual_isapprox(NESTED_FDNUM / NESTED_FDNUM2, Dual(value(NESTED_FDNUM) / value(NESTED_FDNUM2), ForwardDiff._div_partials(partials(NESTED_FDNUM), partials(NESTED_FDNUM2), value(NESTED_FDNUM), value(NESTED_FDNUM2))))
+    @test dual_isapprox(NESTED_FDNUM / PRIMAL, Dual(value(NESTED_FDNUM) / PRIMAL, partials(NESTED_FDNUM) / PRIMAL))
+    @test dual_isapprox(PRIMAL / NESTED_FDNUM, Dual(PRIMAL / value(NESTED_FDNUM), (-(PRIMAL) / value(NESTED_FDNUM)^2) * partials(NESTED_FDNUM)))
 
-    test_approx_diffnums(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
-    test_approx_diffnums(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
-    test_approx_diffnums(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
+    @test dual_isapprox(FDNUM^FDNUM2, exp(FDNUM2 * log(FDNUM)))
+    @test dual_isapprox(FDNUM^PRIMAL, exp(PRIMAL * log(FDNUM)))
+    @test dual_isapprox(PRIMAL^FDNUM, exp(FDNUM * log(PRIMAL)))
 
-    test_approx_diffnums(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
-    test_approx_diffnums(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
-    test_approx_diffnums(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
+    @test dual_isapprox(NESTED_FDNUM^NESTED_FDNUM2, exp(NESTED_FDNUM2 * log(NESTED_FDNUM)))
+    @test dual_isapprox(NESTED_FDNUM^PRIMAL, exp(PRIMAL * log(NESTED_FDNUM)))
+    @test dual_isapprox(PRIMAL^NESTED_FDNUM, exp(NESTED_FDNUM * log(PRIMAL)))
 
     @test partials(NaNMath.pow(Dual(-2.0, 1.0), Dual(2.0, 0.0)), 1) == -4.0
 
-    test_approx_diffnums(fma(FDNUM, FDNUM2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                             PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS +
-                                             PARTIALS3))
-    test_approx_diffnums(fma(FDNUM, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS))
-    test_approx_diffnums(fma(PRIMAL, FDNUM2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL*PARTIALS2 + PARTIALS3))
-    test_approx_diffnums(fma(PRIMAL, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                               PRIMAL*PARTIALS2))
-    test_approx_diffnums(fma(FDNUM, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                              PRIMAL2*PARTIALS + PARTIALS3))
-    test_approx_diffnums(fma(FDNUM, PRIMAL2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3),
-                                               PRIMAL2*PARTIALS))
-    test_approx_diffnums(fma(PRIMAL, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PARTIALS3))
-
     # Unary Functions #
     #-----------------#
 
@@ -417,7 +382,7 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
     @test abs(-NESTED_FDNUM) === NESTED_FDNUM
     @test abs(NESTED_FDNUM) === NESTED_FDNUM
 
-    if T != Int
+    if V != Int
         UNSUPPORTED_NESTED_FUNCS = (:trigamma, :airyprime, :besselj1, :bessely1)
         DOMAIN_ERR_FUNCS = (:asec, :acsc, :asecd, :acscd, :acoth, :acosh)
 
@@ -437,11 +402,11 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
                     @eval begin
                         fdnum = $(is_domain_err_func ? FDNUM + 1 : FDNUM)
                         $(v) = ForwardDiff.value(fdnum)
-                        $(test_approx_diffnums)($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
+                        @test dual_isapprox($(func)(fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(fdnum)))
                         if $(!(is_unsupported_nested_func))
                             nested_fdnum = $(is_domain_err_func ? NESTED_FDNUM + 1 : NESTED_FDNUM)
                             $(v) = ForwardDiff.value(nested_fdnum)
-                            $(test_approx_diffnums)($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
+                            @test dual_isapprox($(func)(nested_fdnum), ForwardDiff.Dual($(func)($v), $(deriv) * ForwardDiff.partials(nested_fdnum)))
                         end
                     end
                 end
@@ -452,17 +417,26 @@ for N in (0,3), M in (0,4), T in (Int, Float32)
         end
     end
 
-    # Manually Optimized Functions #
-    #------------------------------#
+    # Special Cases #
+    #---------------#
 
-    test_approx_diffnums(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
-    test_approx_diffnums(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
-    map(test_approx_diffnums, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM)))
+    @test dual_isapprox(hypot(FDNUM, FDNUM2), sqrt(FDNUM^2 + FDNUM2^2))
+    @test dual_isapprox(hypot(FDNUM, FDNUM2, FDNUM), sqrt(2*(FDNUM^2) + FDNUM2^2))
 
-    if T === Float32
+    @test all(map(dual_isapprox, ForwardDiff.sincos(FDNUM), (sin(FDNUM), cos(FDNUM))))
+
+    if V === Float32
         @test typeof(sqrt(FDNUM)) === typeof(FDNUM)
         @test typeof(sqrt(NESTED_FDNUM)) === typeof(NESTED_FDNUM)
     end
+
+    @test dual_isapprox(fma(FDNUM, FDNUM2, FDNUM3),   Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS + PARTIALS3))
+    @test dual_isapprox(fma(FDNUM, FDNUM2, PRIMAL3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PRIMAL2*PARTIALS))
+    @test dual_isapprox(fma(PRIMAL, FDNUM2, FDNUM3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2 + PARTIALS3))
+    @test dual_isapprox(fma(PRIMAL, FDNUM2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL*PARTIALS2))
+    @test dual_isapprox(fma(FDNUM, PRIMAL2, FDNUM3),  Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL2*PARTIALS + PARTIALS3))
+    @test dual_isapprox(fma(FDNUM, PRIMAL2, PRIMAL3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PRIMAL2*PARTIALS))
+    @test dual_isapprox(fma(PRIMAL, PRIMAL2, FDNUM3), Dual(fma(PRIMAL, PRIMAL2, PRIMAL3), PARTIALS3))
 end
 
 end # module
diff --git a/test/GradientTest.jl b/test/GradientTest.jl
index 6b468b43..487ad6b2 100644
--- a/test/GradientTest.jl
+++ b/test/GradientTest.jl
@@ -16,12 +16,10 @@ x = [0.1, 0.2, 0.3]
 v = f(x)
 g = [-9.4, 15.6, 52.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded test with chunk size = $c")
-    cfg = ForwardDiff.GradientConfig{c}(x)
+for c in (1, 2, 3), tag in (nothing, f)
+    println("  ...running hardcoded test with chunk size = $c and tag = $tag")
+    cfg = ForwardDiff.GradientConfig(tag, x, ForwardDiff.Chunk{c}())
 
-    # single-threaded #
-    #-----------------#
     @test isapprox(g, ForwardDiff.gradient(f, x, cfg))
     @test isapprox(g, ForwardDiff.gradient(f, x))
 
@@ -41,24 +39,6 @@ for c in (1, 2, 3)
     out = DiffBase.GradientResult(x)
     ForwardDiff.gradient!(out, f, x)
     @test isapprox(DiffBase.value(out), v)
-    @test isapprox(DiffBase.gradient(out), g)
-
-    # multithreaded #
-    #---------------#
-    if ForwardDiff.IS_MULTITHREADED_JULIA
-        multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-
-        @test isapprox(g, ForwardDiff.gradient(f, x, multi_cfg))
-
-        out = similar(x)
-        ForwardDiff.gradient!(out, f, x, multi_cfg)
-        @test isapprox(out, g)
-
-        out = DiffBase.GradientResult(x)
-        ForwardDiff.gradient!(out, f, x, multi_cfg)
-        @test isapprox(DiffBase.value(out), v)
-        @test isapprox(DiffBase.gradient(out), g)
-    end
 end
 
 ########################
@@ -69,12 +49,10 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
     v = f(X)
     g = ForwardDiff.gradient(f, X)
     @test isapprox(g, Calculus.gradient(f, X), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        println("  ...testing $f with chunk size = $c")
-        cfg = ForwardDiff.GradientConfig{c}(X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = ForwardDiff.GradientConfig(tag, X, ForwardDiff.Chunk{c}())
 
-        # single-threaded #
-        #-----------------#
         out = ForwardDiff.gradient(f, X, cfg)
         @test isapprox(out, g)
 
@@ -86,24 +64,6 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
         ForwardDiff.gradient!(out, f, X, cfg)
         @test isapprox(DiffBase.value(out), v)
         @test isapprox(DiffBase.gradient(out), g)
-
-        # multithreaded #
-        #---------------#
-        if ForwardDiff.IS_MULTITHREADED_JULIA
-            multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-
-            out = ForwardDiff.gradient(f, X, multi_cfg)
-            @test isapprox(out, g)
-
-            out = similar(X)
-            ForwardDiff.gradient!(out, f, X, multi_cfg)
-            @test isapprox(out, g)
-
-            out = DiffBase.GradientResult(X)
-            ForwardDiff.gradient!(out, f, X, multi_cfg)
-            @test isapprox(DiffBase.value(out), v)
-            @test isapprox(DiffBase.gradient(out), g)
-        end
     end
 end
 
diff --git a/test/HessianTest.jl b/test/HessianTest.jl
index 2b89e50e..5d697b16 100644
--- a/test/HessianTest.jl
+++ b/test/HessianTest.jl
@@ -19,13 +19,11 @@ h = [-66.0  -40.0    0.0;
      -40.0  130.0  -80.0;
        0.0  -80.0  200.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded test with chunk size = $c")
-    cfg = ForwardDiff.HessianConfig{c}(x)
-    resultcfg = ForwardDiff.HessianConfig{c}(DiffBase.HessianResult(x), x)
+for c in (1, 2, 3), tag in (nothing, f)
+    println("  ...running hardcoded test with chunk size = $c and tag = $tag")
+    cfg = ForwardDiff.HessianConfig(tag, x, ForwardDiff.Chunk{c}())
+    resultcfg = ForwardDiff.HessianConfig(tag, DiffBase.HessianResult(x), x, ForwardDiff.Chunk{c}())
 
-    # single-threaded #
-    #-----------------#
     @test isapprox(h, ForwardDiff.hessian(f, x))
     @test isapprox(h, ForwardDiff.hessian(f, x, cfg))
 
@@ -48,25 +46,6 @@ for c in (1, 2, 3)
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.gradient(out), g)
     @test isapprox(DiffBase.hessian(out), h)
-
-    # multithreaded #
-    #---------------#
-    if ForwardDiff.IS_MULTITHREADED_JULIA
-        multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-        multi_resultcfg = ForwardDiff.MultithreadConfig(resultcfg)
-
-        @test isapprox(h, ForwardDiff.hessian(f, x, multi_cfg))
-
-        out = similar(x, 3, 3)
-        ForwardDiff.hessian!(out, f, x, multi_cfg)
-        @test isapprox(out, h)
-
-        out = DiffBase.HessianResult(x)
-        ForwardDiff.hessian!(out, f, x, multi_resultcfg)
-        @test isapprox(DiffBase.value(out), v)
-        @test isapprox(DiffBase.gradient(out), g)
-        @test isapprox(DiffBase.hessian(out), h)
-    end
 end
 
 ########################
@@ -79,13 +58,11 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
     h = ForwardDiff.hessian(f, X)
     # finite difference approximation error is really bad for Hessians...
     @test isapprox(h, Calculus.hessian(f, X), atol=0.02)
-    for c in CHUNK_SIZES
-        println("  ...testing $f with chunk size = $c")
-        cfg = ForwardDiff.HessianConfig{c}(X)
-        resultcfg = ForwardDiff.HessianConfig{c}(DiffBase.HessianResult(X), X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = ForwardDiff.HessianConfig(tag, X, ForwardDiff.Chunk{c}())
+        resultcfg = ForwardDiff.HessianConfig(tag, DiffBase.HessianResult(X), X, ForwardDiff.Chunk{c}())
 
-        # single-threaded #
-        #-----------------#
         out = ForwardDiff.hessian(f, X, cfg)
         @test isapprox(out, h)
 
@@ -98,26 +75,6 @@ for f in DiffBase.VECTOR_TO_NUMBER_FUNCS
         @test isapprox(DiffBase.value(out), v)
         @test isapprox(DiffBase.gradient(out), g)
         @test isapprox(DiffBase.hessian(out), h)
-
-        # multithreaded #
-        #---------------#
-        if ForwardDiff.IS_MULTITHREADED_JULIA
-            multi_cfg = ForwardDiff.MultithreadConfig(cfg)
-            multi_resultcfg = ForwardDiff.MultithreadConfig(resultcfg)
-
-            out = ForwardDiff.hessian(f, X, multi_cfg)
-            @test isapprox(out, h)
-
-            out = similar(X, length(X), length(X))
-            ForwardDiff.hessian!(out, f, X, multi_cfg)
-            @test isapprox(out, h)
-
-            out = DiffBase.HessianResult(X)
-            ForwardDiff.hessian!(out, f, X, multi_resultcfg)
-            @test isapprox(DiffBase.value(out), v)
-            @test isapprox(DiffBase.gradient(out), g)
-            @test isapprox(DiffBase.hessian(out), h)
-        end
     end
 end
 
diff --git a/test/JacobianTest.jl b/test/JacobianTest.jl
index 4e7ae9ac..83ee0a47 100644
--- a/test/JacobianTest.jl
+++ b/test/JacobianTest.jl
@@ -28,10 +28,10 @@ j = [0.8242369704835132  0.4121184852417566  -10.933563142616123
      0.169076696546684   0.084538348273342   -2.299173530851733
      0.0                 0.0                 1.0]
 
-for c in (1, 2, 3)
-    println("  ...running hardcoded tests with chunk size $c")
-    cfg = JacobianConfig{c}(x)
-    ycfg = JacobianConfig{c}(zeros(4), x)
+for c in (1, 2, 3), tags in ((nothing, nothing), (f, f!))
+    println("  ...running hardcoded test with chunk size = $c and tag = $tags")
+    cfg = JacobianConfig(tags[1], x, ForwardDiff.Chunk{c}())
+    ycfg = JacobianConfig(tags[2], zeros(4), x, ForwardDiff.Chunk{c}())
 
     # testing f(x)
     @test isapprox(j, ForwardDiff.jacobian(f, x, cfg))
@@ -46,7 +46,7 @@ for c in (1, 2, 3)
     @test isapprox(out, j)
 
     out = DiffBase.JacobianResult(zeros(4), zeros(3))
-    ForwardDiff.jacobian!(out, f, x, JacobianConfig(x))
+    ForwardDiff.jacobian!(out, f, x, JacobianConfig(tags[1], x))
     @test isapprox(DiffBase.value(out), v)
     @test isapprox(DiffBase.jacobian(out), j)
 
@@ -92,10 +92,10 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
     v = f(X)
     j = ForwardDiff.jacobian(f, X)
     @test isapprox(j, Calculus.jacobian(x -> vec(f(x)), X, :forward), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        cfg = JacobianConfig{c}(X)
+    for c in CHUNK_SIZES, tag in (nothing, f)
+        println("  ...testing $f with chunk size = $c and tag = $tag")
+        cfg = JacobianConfig(tag, X, ForwardDiff.Chunk{c}())
 
-        println("  ...testing $f with chunk size = $c")
         out = ForwardDiff.jacobian(f, X, cfg)
         @test isapprox(out, j)
 
@@ -115,11 +115,10 @@ for f! in DiffBase.INPLACE_ARRAY_TO_ARRAY_FUNCS
     f!(v, X)
     j = ForwardDiff.jacobian(f!, zeros(Y), X)
     @test isapprox(j, Calculus.jacobian(x -> (y = zeros(Y); f!(y, x); vec(y)), X, :forward), atol=FINITEDIFF_ERROR)
-    for c in CHUNK_SIZES
-        cfg = JacobianConfig{c}(X)
-        ycfg = JacobianConfig{c}(zeros(Y), X)
+    for c in CHUNK_SIZES, tag in (nothing, f!)
+        println("  ...testing $(f!) with chunk size = $c and tag = $tag")
+        ycfg = JacobianConfig(tag, zeros(Y), X, ForwardDiff.Chunk{c}())
 
-        println("  ...testing $(f!) with chunk size = $c")
         y = zeros(Y)
         out = ForwardDiff.jacobian(f!, y, X, ycfg)
         @test isapprox(y, v)
diff --git a/test/MiscTest.jl b/test/MiscTest.jl
index 32faa075..53afcecc 100644
--- a/test/MiscTest.jl
+++ b/test/MiscTest.jl
@@ -71,6 +71,14 @@ testf2 = x -> testdf(x[1]) * f(x[2])
 
 @test isapprox(ForwardDiff.gradient(f2, x), ForwardDiff.gradient(testf2, x))
 
+# Perturbation Confusion (Issue #83) #
+#------------------------------------#
+
+D = ForwardDiff.derivative
+
+@test_throws ForwardDiff.TagMismatchError D(x -> x * D(y -> x + y, 1), 1)
+@test_throws ForwardDiff.TagMismatchError ForwardDiff.gradient(v -> sum(v) * D(y -> y * norm(v), 1), [1])
+
 ######################################
 # Higher-Dimensional Differentiation #
 ######################################
diff --git a/test/SIMDTest.jl b/test/SIMDTest.jl
index 9446494d..ad75e47f 100644
--- a/test/SIMDTest.jl
+++ b/test/SIMDTest.jl
@@ -8,7 +8,7 @@ const DUALS = (Dual(1., 2., 3., 4.),
                Dual(Dual(1., 2.), Dual(3., 4.)))
 
 
-function simd_sum{T}(x::Vector{T})
+function simd_sum(x::Vector{T}) where T
     s = zero(T)
     @simd for i in eachindex(x)
         @inbounds s = s + x[i]
diff --git a/test/runtests.jl b/test/runtests.jl
index f41cabe6..ede73646 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -35,7 +35,7 @@ tic()
 include("MiscTest.jl")
 println("done (took $(toq()) seconds).")
 
-if Base.JLOptions().opt_level >= 3 && VERSION >= v"0.5"
+if Base.JLOptions().opt_level >= 3
     println("Testing SIMD vectorization...")
     tic()
     include("SIMDTest.jl")
diff --git a/test/utils.jl b/test/utils.jl
index f1bac85e..fd5941a8 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -1,13 +1,13 @@
 import ForwardDiff
-using ForwardDiff.CHUNK_THRESHOLD
+using ForwardDiff.DEFAULT_CHUNK_THRESHOLD
 using Base.Test
 
 # seed RNG, thus making result inaccuracies deterministic
 # so we don't have to retune EPS for arbitrary inputs
 srand(1)
 
-const XLEN = CHUNK_THRESHOLD + 1
-const YLEN = div(CHUNK_THRESHOLD, 2) + 1
+const XLEN = DEFAULT_CHUNK_THRESHOLD + 1
+const YLEN = div(DEFAULT_CHUNK_THRESHOLD, 2) + 1
 const X, Y = rand(XLEN), rand(YLEN)
-const CHUNK_SIZES = (1, div(CHUNK_THRESHOLD, 3), div(CHUNK_THRESHOLD, 2), CHUNK_THRESHOLD, CHUNK_THRESHOLD + 1)
+const CHUNK_SIZES = (1, div(DEFAULT_CHUNK_THRESHOLD, 3), div(DEFAULT_CHUNK_THRESHOLD, 2), DEFAULT_CHUNK_THRESHOLD, DEFAULT_CHUNK_THRESHOLD + 1)
 const FINITEDIFF_ERROR = 3e-5