From 2d6fee8405d3b3d68abb25495fe318f3fa9be06e Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 17 Nov 2016 13:43:51 -0500
Subject: [PATCH 1/6] Added mode argument to several step methods and advi to
 allow mode setting (e.g. nanguardmode) for Theano functions

---
 pymc3/step_methods/metropolis.py    |  7 +++++--
 pymc3/step_methods/nuts.py          | 11 ++++++++---
 pymc3/variational/advi.py           |  9 ++++++---
 pymc3/variational/advi_minibatch.py |  7 +++++--
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 6775eeb2f3..3645542f48 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -66,7 +66,8 @@ class Metropolis(ArrayStepShared):
         The frequency of tuning. Defaults to 100 iterations.
     model : PyMC Model
         Optional model for sampling step. Defaults to None (taken from context).
-
+    mode :  string or `Mode` instance.
+        compilation mode passed to Theano functions
     """
     default_blocked = False
 
@@ -93,6 +94,8 @@ def __init__(self, vars=None, S=None, proposal_dist=NormalProposal, scaling=1.,
             [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars])
         self.any_discrete = self.discrete.any()
         self.all_discrete = self.discrete.all()
+        
+        self.mode = mode
 
         shared = pm.make_shared_replacements(vars, model)
         self.delta_logp = delta_logp(model.logpt, vars, shared)
@@ -430,6 +433,6 @@ def delta_logp(logp, vars, shared):
 
     logp1 = pm.CallableTensor(logp0)(inarray1)
 
-    f = theano.function([inarray1, inarray0], logp1 - logp0)
+    f = theano.function([inarray1, inarray0], logp1 - logp0, mode=self.mode)
     f.trust_input = True
     return f
diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py
index b72813156d..17404fb1e4 100644
--- a/pymc3/step_methods/nuts.py
+++ b/pymc3/step_methods/nuts.py
@@ -32,7 +32,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state
                  k=0.75,
                  t0=10,
                  model=None,
-                 profile=False, **kwargs):
+                 profile=False,
+                 mode=None, **kwargs):
         """
         Parameters
         ----------
@@ -57,6 +58,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state
             model : Model
             profile : bool or ProfileStats
                 sets the functions to be profiled
+            mode :  string or `Mode` instance.
+                compilation mode passed to Theano functions
         """
         model = modelcontext(model)
 
@@ -90,6 +93,7 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state
         self.Hbar = 0
         self.u = log(self.step_size * 10)
         self.m = 1
+        self.mode = mode
 
         shared = make_shared_replacements(vars, model)
 
@@ -106,7 +110,7 @@ def create_energy_func(q):
             p = tt.dvector('p')
             p.tag.test_value = q.tag.test_value
             E0 = energy(self.H, q, p)
-            E0_func = theano.function([q, p], E0)
+            E0_func = theano.function([q, p], E0, mode=self.mode)
             E0_func.trust_input = True
 
             return E0_func
@@ -227,6 +231,7 @@ def leapfrog1_dE(H, q, profile):
 
     dE = E - E0
 
-    f = theano.function([q, p, e, E0], [q1, p1, dE], profile=profile)
+    f = theano.function([q, p, e, E0], [q1, p1, dE], 
+                        profile=profile, mode=self.mode)
     f.trust_input = True
     return f
diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py
index 67889529b0..a89195e085 100644
--- a/pymc3/variational/advi.py
+++ b/pymc3/variational/advi.py
@@ -36,7 +36,8 @@ def gen_random_state():
 
 
 def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
-         optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None):
+         optimizer=None, learning_rate=.001, epsilon=.1, mode=None,     
+         random_seed=None):
     """Perform automatic differentiation variational inference (ADVI).
 
     This function implements the meanfield ADVI, where the variational
@@ -87,7 +88,9 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
         This parameter is ignored when optimizer is given.
     random_seed : int or None
         Seed to initialize random state. None uses current seed.
-
+    mode :  string or `Mode` instance.
+        Compilation mode passed to Theano functions
+         
     Returns
     -------
     ADVIFit
@@ -137,7 +140,7 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
     uw_shared = theano.shared(uw, 'uw_shared')
     elbo = pm.CallableTensor(elbo)(uw_shared)
     updates = optimizer(loss=-1 * elbo, param=[uw_shared])
-    f = theano.function([], [uw_shared, elbo], updates=updates)
+    f = theano.function([], [uw_shared, elbo], updates=updates, mode=mode)
 
     # Optimization loop
     elbos = np.empty(n)
diff --git a/pymc3/variational/advi_minibatch.py b/pymc3/variational/advi_minibatch.py
index 79ed5a0638..097ff16825 100644
--- a/pymc3/variational/advi_minibatch.py
+++ b/pymc3/variational/advi_minibatch.py
@@ -207,7 +207,8 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1,
                    minibatch_RVs=None, minibatch_tensors=None,
                    minibatches=None, local_RVs=None, observed_RVs=None,
                    encoder_params=[], total_size=None, optimizer=None,
-                   learning_rate=.001, epsilon=.1, random_seed=None):
+                   learning_rate=.001, epsilon=.1, random_seed=None,
+                   mode=None):
     """Perform mini-batch ADVI.
 
     This function implements a mini-batch ADVI with the meanfield
@@ -297,6 +298,8 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1,
         This parameter is ignored when an optimizer is given.
     random_seed : int
         Seed to initialize random state.
+    mode :  string or `Mode` instance.
+        compilation mode passed to Theano functions
 
     Returns
     -------
@@ -382,7 +385,7 @@ def is_shared(t):
     if 0 < len(global_RVs):
         params += [uw_global_shared]
     updates = OrderedDict(optimizer(loss=-1 * elbo, param=params))
-    f = theano.function(tensors, elbo, updates=updates)
+    f = theano.function(tensors, elbo, updates=updates, mode=mode)
 
     # Optimization loop
     elbos = np.empty(n)

From 4e5b9c205de139c9ac6226eeb976377162414ba6 Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 17 Nov 2016 14:25:18 -0500
Subject: [PATCH 2/6] Fixed namespace bugs in mode attribute

---
 pymc3/step_methods/metropolis.py | 2 +-
 pymc3/step_methods/nuts.py       | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 3645542f48..67b7873bb5 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -72,7 +72,7 @@ class Metropolis(ArrayStepShared):
     default_blocked = False
 
     def __init__(self, vars=None, S=None, proposal_dist=NormalProposal, scaling=1.,
-                 tune=True, tune_interval=100, model=None, **kwargs):
+                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
 
         model = pm.modelcontext(model)
 
diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py
index 17404fb1e4..84bb3a90ba 100644
--- a/pymc3/step_methods/nuts.py
+++ b/pymc3/step_methods/nuts.py
@@ -118,7 +118,8 @@ def create_energy_func(q):
         self.H, q = create_hamiltonian(vars, shared, model)
         self.compute_energy = create_energy_func(q)
 
-        self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile)
+        self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile, 
+                                        mode=self.mode)
 
         super(NUTS, self).__init__(vars, shared, **kwargs)
 
@@ -204,7 +205,7 @@ def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0):
     return
 
 
-def leapfrog1_dE(H, q, profile):
+def leapfrog1_dE(H, q, profile, mode):
     """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory.
     Parameters
     ----------
@@ -232,6 +233,6 @@ def leapfrog1_dE(H, q, profile):
     dE = E - E0
 
     f = theano.function([q, p, e, E0], [q1, p1, dE], 
-                        profile=profile, mode=self.mode)
+                        profile=profile, mode=mode)
     f.trust_input = True
     return f

From 0ebaacdee2fe34fb6451aede880d9dd5d69f0ccc Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Thu, 17 Nov 2016 16:40:37 -0500
Subject: [PATCH 3/6] Reverted function in delta_logp to not accept mode
 argument

---
 pymc3/step_methods/metropolis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 67b7873bb5..74f64a27d0 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -433,6 +433,6 @@ def delta_logp(logp, vars, shared):
 
     logp1 = pm.CallableTensor(logp0)(inarray1)
 
-    f = theano.function([inarray1, inarray0], logp1 - logp0, mode=self.mode)
+    f = theano.function([inarray1, inarray0], logp1 - logp0)
     f.trust_input = True
     return f

From 55c8ce644ff762f591dd1df7b44c825005ecc066 Mon Sep 17 00:00:00 2001
From: Maxim <maxim.v.kochurov@gmail.com>
Date: Mon, 28 Nov 2016 07:38:18 +0300
Subject: [PATCH 4/6] ENH User model (#1525)

* Started to write Base class for pymc3.models

* mode `add_var` to public api

* Added some docstrings

* Added some docstrings

* added getitem and fixed a typo

* added assertion check

* added resolve var method

* decided not to add resolve method

* Added linear component

* Docs fix

* patsy's intercept is inited properly now

* refactored code

* updated docs

* added possibility to init coefficients with random variables

* added glm

* refactored api, fixed formula init

* refactored linear model, extended acceptable types

* moved useful matrix and labels creation to utils file

* code style

* removed redundant evaluation of shape

* refactored resolver for constructing matrix and labels

* changed error message

* changed signature of init

* simplified utils any_to_tensor_and_labels code

* tests for `any_to_tensor_and_labels`

* added docstring for `any_to_tensor_and_labels` util

* forgot to document return type in `any_to_tensor_and_labels`

* refactored code for dict

* dict tests fix(do not check labels there)

* added access to random vars of model

* added a shortcut for all variables so there is a unified way to get them

* added default priors for linear model

* update docs for linear

* refactored UserModel api, made it more similar to pm.Model class

* Lots of refactoring, tests for base class, more plain api design

* deleted unused module variable

* fixed some typos in docstring

* Refactored pm.Model class, now it is ready for inheritance

* Added documentation for Model class

* Small typo in docstring

* nested contains for treedict (needed for add_random_variable)

* More accurate duplicate implementation of treedict/treelist

* refactored treedict/treelist

* changed `__imul__` of treelist

* added `root` property and `isroot` indicator for base model

* protect `parent` and `model` attributes from violation

* travis' python2 did not fail on bad syntax(maybe it's too new), fixed

* decided not to use functools wrapper

 Unfortunately functools wrapper fails
 when decorating built-in methods so I
 need to fix that improper behaviour.
 Some bad but needed tricks were implemented

* Added models package to setup script

* Refactor utils

* Fix some typos in pm.model
---
 pymc3/model.py                    | 271 +++++++++++++++++++++++++++---
 pymc3/models/__init__.py          |   6 +
 pymc3/models/linear.py            | 134 +++++++++++++++
 pymc3/models/utils.py             | 126 ++++++++++++++
 pymc3/tests/test_model.py         | 120 +++++++++++++
 pymc3/tests/test_models_linear.py | 108 ++++++++++++
 pymc3/tests/test_models_utils.py  |  79 +++++++++
 requirements.txt                  |   1 +
 setup.py                          |   5 +-
 9 files changed, 829 insertions(+), 21 deletions(-)
 create mode 100644 pymc3/models/__init__.py
 create mode 100644 pymc3/models/linear.py
 create mode 100644 pymc3/models/utils.py
 create mode 100644 pymc3/tests/test_model.py
 create mode 100644 pymc3/tests/test_models_linear.py
 create mode 100644 pymc3/tests/test_models_utils.py

diff --git a/pymc3/model.py b/pymc3/model.py
index 5f079b5686..1220c5529d 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1,3 +1,4 @@
+import six
 import numpy as np
 import theano
 import theano.tensor as tt
@@ -168,17 +169,215 @@ def logpt(self):
         return tt.sum(self.logp_elemwiset)
 
 
-class Model(Context, Factor):
-    """Encapsulates the variables and likelihood factors of a model."""
+class InitContextMeta(type):
+    """Metaclass that executes `__init__` of instance in it's context"""
+    def __call__(cls, *args, **kwargs):
+        instance = cls.__new__(cls, *args, **kwargs)
+        with instance:  # appends context
+            instance.__init__(*args, **kwargs)
+        return instance
+
+
+def withparent(meth):
+    """Helper wrapper that passes calls to parent's instance"""
+    def wrapped(self, *args, **kwargs):
+        res = meth(self, *args, **kwargs)
+        if getattr(self, 'parent', None) is not None:
+            getattr(self.parent, meth.__name__)(*args, **kwargs)
+        return res
+    # Unfortunately functools wrapper fails
+    # when decorating built-in methods so we
+    # need to fix that improper behaviour
+    wrapped.__name__ = meth.__name__
+    return wrapped
+
+
+class treelist(list):
+    """A list that passes mutable extending operations used in Model
+    to parent list instance.
+    Extending treelist you will also extend its parent
+    """
+    def __init__(self, iterable=(), parent=None):
+        super(treelist, self).__init__(iterable)
+        assert isinstance(parent, list) or parent is None
+        self.parent = parent
+        if self.parent is not None:
+            self.parent.extend(self)
+    # typechecking here works bad
+    append = withparent(list.append)
+    __iadd__ = withparent(list.__iadd__)
+    extend = withparent(list.extend)
+
+    def tree_contains(self, item):
+        if isinstance(self.parent, treedict):
+            return (list.__contains__(self, item) or
+                    self.parent.tree_contains(item))
+        elif isinstance(self.parent, list):
+            return (list.__contains__(self, item) or
+                    self.parent.__contains__(item))
+        else:
+            return list.__contains__(self, item)
+
+    def __setitem__(self, key, value):
+        raise NotImplementedError('Method is removed as we are not'
+                                  ' able to determine '
+                                  'appropriate logic for it')
+
+    def __imul__(self, other):
+        t0 = len(self)
+        list.__imul__(self, other)
+        if self.parent is not None:
+            self.parent.extend(self[t0:])
+
+
+class treedict(dict):
+    """A dict that passes mutable extending operations used in Model
+    to parent dict instance.
+    Extending treedict you will also extend its parent
+    """
+    def __init__(self, iterable=(), parent=None, **kwargs):
+        super(treedict, self).__init__(iterable, **kwargs)
+        assert isinstance(parent, dict) or parent is None
+        self.parent = parent
+        if self.parent is not None:
+            self.parent.update(self)
+    # typechecking here works bad
+    __setitem__ = withparent(dict.__setitem__)
+    update = withparent(dict.update)
+
+    def tree_contains(self, item):
+        # needed for `add_random_variable` method
+        if isinstance(self.parent, treedict):
+            return (dict.__contains__(self, item) or
+                    self.parent.tree_contains(item))
+        elif isinstance(self.parent, dict):
+            return (dict.__contains__(self, item) or
+                    self.parent.__contains__(item))
+        else:
+            return dict.__contains__(self, item)
+
+
+class Model(six.with_metaclass(InitContextMeta, Context, Factor)):
+    """Encapsulates the variables and likelihood factors of a model.
 
-    def __init__(self):
-        self.named_vars = {}
-        self.free_RVs = []
-        self.observed_RVs = []
-        self.deterministics = []
-        self.potentials = []
-        self.missing_values = []
-        self.model = self
+    Model class can be used for creating class based models. To create
+    a class based model you should inherit from `Model` and
+    override `__init__` with arbitrary definitions
+    (do not forget to call base class `__init__` first).
+
+    Parameters
+    ----------
+    name : str, default '' - name that will be used as prefix for
+        names of all random variables defined within model
+    model : Model, default None - instance of Model that is
+        supposed to be a parent for the new instance. If None,
+        context will be used. All variables defined within instance
+        will be passed to the parent instance. So that 'nested' model
+        contributes to the variables and likelihood factors of
+        parent model.
+
+    Examples
+    --------
+    # How to define a custom model
+    class CustomModel(Model):
+        # 1) override init
+        def __init__(self, mean=0, sd=1, name='', model=None):
+            # 2) call super's init first, passing model and name to it
+            # name will be prefix for all variables here
+            # if no name specified for model there will be no prefix
+            super(CustomModel, self).__init__(name, model)
+            # now you are in the context of instance,
+            # `modelcontext` will return self
+            # you can define variables in several ways
+            # note, that all variables will get model's name prefix
+
+            # 3) you can create variables with Var method
+            self.Var('v1', Normal.dist(mu=mean, sd=sd))
+            # this will create variable named like '{prefix_}v1'
+            # and assign attribute 'v1' to instance
+            # created variable can be accessed with self.v1 or self['v1']
+
+            # 4) this syntax will also work as we are in the context
+            # of instance itself, names are given as usual
+            Normal('v2', mu=mean, sd=sd)
+
+            # something more complex is allowed too
+            Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.))
+
+            # Deterministic variables can be used in usual way
+            Deterministic('v3_sq', self.v3 ** 2)
+            # Potentials too
+            Potential('p1', tt.constant(1))
+
+    # After defining a class CustomModel you can use it in several ways
+
+    # I:
+    #   state the model within a context
+    with Model() as model:
+        CustomModel()
+        # arbitrary actions
+
+    # II:
+    #   use new class as entering point in context
+    with CustomModel() as model:
+        Normal('new_normal_var', mu=1, sd=0)
+
+    # III:
+    #   just get model instance with all that was defined in it
+    model = CustomModel()
+
+    # IV:
+    #   use many custom models within one context
+    with Model() as model:
+        CustomModel(mean=1, name='first')
+        CustomModel(mean=2, name='second')
+    """
+    def __new__(cls, *args, **kwargs):
+        # resolves the parent instance
+        instance = object.__new__(cls)
+        if kwargs.get('model') is not None:
+            instance._parent = kwargs.get('model')
+        elif cls.get_contexts():
+            instance._parent = cls.get_contexts()[-1]
+        else:
+            instance._parent = None
+        return instance
+
+    def __init__(self, name='', model=None):
+        self.name = name
+        if self.parent is not None:
+            self.named_vars = treedict(parent=self.parent.named_vars)
+            self.free_RVs = treelist(parent=self.parent.free_RVs)
+            self.observed_RVs = treelist(parent=self.parent.observed_RVs)
+            self.deterministics = treelist(parent=self.parent.deterministics)
+            self.potentials = treelist(parent=self.parent.potentials)
+            self.missing_values = treelist(parent=self.parent.missing_values)
+        else:
+            self.named_vars = treedict()
+            self.free_RVs = treelist()
+            self.observed_RVs = treelist()
+            self.deterministics = treelist()
+            self.potentials = treelist()
+            self.missing_values = treelist()
+
+    @property
+    def model(self):
+        return self
+
+    @property
+    def parent(self):
+        return self._parent
+
+    @property
+    def root(self):
+        model = self
+        while not model.isroot:
+            model = model.parent
+        return model
+
+    @property
+    def isroot(self):
+        return self.parent is None
 
     @property
     @memoize
@@ -271,6 +470,7 @@ def Var(self, name, dist, data=None):
         -------
         FreeRV or ObservedRV
         """
+        name = self.name_for(name)
         if data is None:
             if getattr(dist, "transform", None) is None:
                 var = FreeRV(name=name, distribution=dist, model=self)
@@ -308,15 +508,46 @@ def Var(self, name, dist, data=None):
 
     def add_random_variable(self, var):
         """Add a random variable to the named variables of the model."""
-        if var.name in self.named_vars:
+        if self.named_vars.tree_contains(var.name):
             raise ValueError(
                 "Variable name {} already exists.".format(var.name))
         self.named_vars[var.name] = var
-        if not hasattr(self, var.name):
-            setattr(self, var.name, var)
+        if not hasattr(self, self.name_of(var.name)):
+            setattr(self, self.name_of(var.name), var)
+
+    @property
+    def prefix(self):
+        return '%s_' % self.name if self.name else ''
+
+    def name_for(self, name):
+        """Checks if name has prefix and adds if needed
+        """
+        if self.prefix:
+            if not name.startswith(self.prefix):
+                return '{}{}'.format(self.prefix, name)
+            else:
+                return name
+        else:
+            return name
+
+    def name_of(self, name):
+        """Checks if name has prefix and deletes if needed
+        """
+        if not self.prefix or not name:
+            return name
+        elif name.startswith(self.prefix):
+            return name[len(self.prefix):]
+        else:
+            return name
 
     def __getitem__(self, key):
-        return self.named_vars[key]
+        try:
+            return self.named_vars[key]
+        except KeyError as e:
+            try:
+                return self.named_vars[self.name_for(key)]
+            except KeyError:
+                raise e
 
     @memoize
     def makefn(self, outs, mode=None, *args, **kwargs):
@@ -633,9 +864,10 @@ def Deterministic(name, var, model=None):
     -------
     n : var but with name name
     """
-    var.name = name
-    modelcontext(model).deterministics.append(var)
-    modelcontext(model).add_random_variable(var)
+    model = modelcontext(model)
+    var.name = model.name_for(name)
+    model.deterministics.append(var)
+    model.add_random_variable(var)
     return var
 
 
@@ -651,8 +883,9 @@ def Potential(name, var, model=None):
     -------
     var : var, with name attribute
     """
-    var.name = name
-    modelcontext(model).potentials.append(var)
+    model = modelcontext(model)
+    var.name = model.name_for(name)
+    model.potentials.append(var)
     return var
 
 
diff --git a/pymc3/models/__init__.py b/pymc3/models/__init__.py
new file mode 100644
index 0000000000..17e6772687
--- /dev/null
+++ b/pymc3/models/__init__.py
@@ -0,0 +1,6 @@
+from .linear import LinearComponent, Glm
+
+__all__ = [
+    'LinearComponent',
+    'Glm'
+]
diff --git a/pymc3/models/linear.py b/pymc3/models/linear.py
new file mode 100644
index 0000000000..b6a224f2b4
--- /dev/null
+++ b/pymc3/models/linear.py
@@ -0,0 +1,134 @@
+import theano.tensor as tt
+import pandas as pd
+import numpy as np
+from ..distributions import Normal, Flat
+from ..glm import families
+from ..model import Model, Deterministic
+from .utils import any_to_tensor_and_labels
+
+__all__ = [
+    'LinearComponent',
+    'Glm'
+]
+
+
+class LinearComponent(Model):
+    """Creates linear component, y_est is accessible via attribute
+    Parameters
+    ----------
+    name : str - name, associated with the linear component
+    x : pd.DataFrame or np.ndarray
+    y : pd.Series or np.array
+    intercept : bool - fit with intercept or not?
+    labels : list - replace variable names with these labels
+    priors : dict - priors for coefficients
+        use `Intercept` key for defining Intercept prior
+            defaults to Flat.dist()
+        use `Regressor` key for defining default prior for all regressors
+            defaults to Normal.dist(mu=0, tau=1.0E-6)
+    vars : dict - random variables instead of creating new ones
+    """
+    default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6)
+    default_intercept_prior = Flat.dist()
+
+    def __init__(self, x, y, intercept=True, labels=None,
+                 priors=None, vars=None, name='', model=None):
+        super(LinearComponent, self).__init__(name, model)
+        if priors is None:
+            priors = {}
+        if vars is None:
+            vars = {}
+        x, labels = any_to_tensor_and_labels(x, labels)
+        # now we have x, shape and labels
+        if intercept:
+            x = tt.concatenate(
+                [tt.ones((x.shape[0], 1), x.dtype), x],
+                axis=1
+            )
+            labels = ['Intercept'] + labels
+        coeffs = list()
+        for name in labels:
+            if name == 'Intercept':
+                if name in vars:
+                    v = Deterministic(name, vars[name])
+                else:
+                    v = self.Var(
+                        name=name,
+                        dist=priors.get(
+                            name,
+                            self.default_intercept_prior
+                        )
+                    )
+                coeffs.append(v)
+            else:
+                if name in vars:
+                    v = Deterministic(name, vars[name])
+                else:
+                    v = self.Var(
+                        name=name,
+                        dist=priors.get(
+                            name,
+                            priors.get(
+                                'Regressor',
+                                self.default_regressor_prior
+                            )
+                        )
+                    )
+                coeffs.append(v)
+        self.coeffs = tt.stack(coeffs, axis=0)
+        self.y_est = x.dot(self.coeffs)
+
+    @classmethod
+    def from_formula(cls, formula, data, priors=None, vars=None, name='', model=None):
+        import patsy
+        y, x = patsy.dmatrices(formula, data)
+        labels = x.design_info.column_names
+        return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels,
+                   priors=priors, vars=vars, name=name, model=model)
+
+
+class Glm(LinearComponent):
+    """Creates glm model, y_est is accessible via attribute
+    Parameters
+    ----------
+    name : str - name, associated with the linear component
+    x : pd.DataFrame or np.ndarray
+    y : pd.Series or np.array
+    intercept : bool - fit with intercept or not?
+    labels : list - replace variable names with these labels
+    priors : dict - priors for coefficients
+        use `Intercept` key for defining Intercept prior
+            defaults to Flat.dist()
+        use `Regressor` key for defining default prior for all regressors
+            defaults to Normal.dist(mu=0, tau=1.0E-6)
+    init : dict - test_vals for coefficients
+    vars : dict - random variables instead of creating new ones
+    family : pymc3.glm.families object
+    """
+    def __init__(self, x, y, intercept=True, labels=None,
+                 priors=None, vars=None, family='normal', name='', model=None):
+        super(Glm, self).__init__(
+            x, y, intercept=intercept, labels=labels,
+            priors=priors, vars=vars, name=name, model=model
+        )
+
+        _families = dict(
+            normal=families.Normal,
+            student=families.StudentT,
+            binomial=families.Binomial,
+            poisson=families.Poisson
+        )
+        if isinstance(family, str):
+            family = _families[family]()
+        self.y_est = family.create_likelihood(
+            name='', y_est=self.y_est,
+            y_data=y, model=self)
+
+    @classmethod
+    def from_formula(cls, formula, data, priors=None,
+                     vars=None, family='normal', name='', model=None):
+        import patsy
+        y, x = patsy.dmatrices(formula, data)
+        labels = x.design_info.column_names
+        return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels,
+                   priors=priors, vars=vars, family=family, name=name, model=model)
diff --git a/pymc3/models/utils.py b/pymc3/models/utils.py
new file mode 100644
index 0000000000..ea073fcd1b
--- /dev/null
+++ b/pymc3/models/utils.py
@@ -0,0 +1,126 @@
+import six
+import pandas as pd
+from pandas.core.common import PandasError
+import numpy as np
+import theano.tensor as tt
+
+
+def any_to_tensor_and_labels(x, labels=None):
+    """Util for converting input x to tensor trying to
+    create labels for columns if they are not provided.
+
+    Default names for columns are ['x0', 'x1', ...], for mappable
+    arrays (e.g. pd.DataFrame) their names are treated as labels.
+    You can override them with `labels` argument.
+
+    If you have tensor input you should provide labels as we
+    cannot get their shape directly
+
+    If you pass dict input we cannot rely on labels order thus dict
+    keys are treated as labels anyway
+
+    Parameters
+    ----------
+    x : np.ndarray | pd.DataFrame | tt.Variable | dict | list
+    labels : list - names for columns of output tensor
+
+    Returns
+    -------
+    (x, labels) - tensor and labels for its columns
+    """
+    if isinstance(labels, six.string_types):
+        labels = [labels]
+    # pandas.DataFrame
+    # labels can come from here
+    # we can override them
+    if isinstance(x, pd.DataFrame):
+        if not labels:
+            labels = x.columns
+        x = x.as_matrix()
+
+    # pandas.Series
+    # there can still be a label
+    # we can override labels
+    elif isinstance(x, pd.Series):
+        if not labels:
+            labels = [x.name]
+        x = x.as_matrix()[:, None]
+
+    # dict
+    # labels are keys,
+    # cannot override them
+    elif isinstance(x, dict):
+        # try to do it via pandas
+        try:
+            x = pd.DataFrame.from_dict(x)
+            labels = x.columns
+            x = x.as_matrix()
+        # some types fail there
+        # another approach is to construct
+        # variable by hand
+        except (PandasError, TypeError):
+            res = []
+            labels = []
+            for k, v in x.items():
+                res.append(v)
+                labels.append(k)
+            x = tt.stack(res, axis=1)
+            if x.ndim == 1:
+                x = x[:, None]
+    # case when it can appear to be some
+    # array like value like lists of lists
+    # numpy deals with it
+    elif not isinstance(x, tt.Variable):
+        x = np.asarray(x)
+        if x.ndim == 0:
+            raise ValueError('Cannot use scalars')
+        elif x.ndim == 1:
+            x = x[:, None]
+    # something really strange goes here,
+    # but user passes labels trusting seems
+    # to be a good option
+    elif labels is not None:
+        x = tt.as_tensor_variable(x)
+        if x.ndim == 0:
+            raise ValueError('Cannot use scalars')
+        elif x.ndim == 1:
+            x = x[:, None]
+    else:   # trust input
+        pass
+    # we should check that we can extract labels
+    if labels is None and not isinstance(x, tt.Variable):
+        labels = ['x%d' % i for i in range(x.shape[1])]
+    # for theano variables we should have labels from user
+    elif labels is None:
+        raise ValueError('Please provide labels as '
+                         'we cannot infer shape of input')
+    else:   # trust labels, user knows what he is doing
+        pass
+    # it's time to check shapes if we can
+    if not isinstance(x, tt.Variable):
+        if not len(labels) == x.shape[1]:
+            raise ValueError(
+                'Please provide full list '
+                'of labels for coefficients, '
+                'got len(labels)=%d instead of %d'
+                % (len(labels), x.shape[1])
+            )
+    else:
+        # trust labels, as we raised an
+        # error in bad case, we have labels
+        pass
+    # convert labels to list
+    if isinstance(labels, pd.RangeIndex):
+        labels = ['x%d' % i for i in labels]
+    # maybe it was a tuple ot whatever
+    elif not isinstance(labels, list):
+        labels = list(labels)
+    # as output we need tensor
+    if not isinstance(x, tt.Variable):
+        x = tt.as_tensor_variable(x)
+        # finally check dimensions
+        if x.ndim == 0:
+            raise ValueError('Cannot use scalars')
+        elif x.ndim == 1:
+            x = x[:, None]
+    return x, labels
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
new file mode 100644
index 0000000000..4eb96e6789
--- /dev/null
+++ b/pymc3/tests/test_model.py
@@ -0,0 +1,120 @@
+import unittest
+import theano.tensor as tt
+import pymc3 as pm
+from pymc3.distributions import HalfCauchy, Normal
+from pymc3 import Potential, Deterministic
+
+
+class NewModel(pm.Model):
+    def __init__(self, name='', model=None):
+        super(NewModel, self).__init__(name, model)
+        assert pm.modelcontext(None) is self
+        # 1) init variables with Var method
+        self.Var('v1', pm.Normal.dist())
+        self.v2 = pm.Normal('v2', mu=0, sd=1)
+        # 2) Potentials and Deterministic variables with method too
+        # be sure that names will not overlap with other same models
+        pm.Deterministic('d', tt.constant(1))
+        pm.Potential('p', tt.constant(1))
+
+
+class DocstringModel(pm.Model):
+    def __init__(self, mean=0, sd=1, name='', model=None):
+        super(DocstringModel, self).__init__(name, model)
+        self.Var('v1', Normal.dist(mu=mean, sd=sd))
+        Normal('v2', mu=mean, sd=sd)
+        Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.))
+        Deterministic('v3_sq', self.v3 ** 2)
+        Potential('p1', tt.constant(1))
+
+
+class TestBaseModel(unittest.TestCase):
+    def test_setattr_properly_works(self):
+        with pm.Model() as model:
+            pm.Normal('v1')
+            self.assertEqual(len(model.vars), 1)
+            with pm.Model('sub') as submodel:
+                submodel.Var('v1', pm.Normal.dist())
+                self.assertTrue(hasattr(submodel, 'v1'))
+                self.assertEqual(len(submodel.vars), 1)
+            self.assertEqual(len(model.vars), 2)
+            with submodel:
+                submodel.Var('v2', pm.Normal.dist())
+                self.assertTrue(hasattr(submodel, 'v2'))
+                self.assertEqual(len(submodel.vars), 2)
+            self.assertEqual(len(model.vars), 3)
+
+    def test_context_passes_vars_to_parent_model(self):
+        with pm.Model() as model:
+            # a set of variables is created
+            NewModel()
+            # another set of variables are created but with prefix 'another'
+            usermodel2 = NewModel(name='another')
+            # you can enter in a context with submodel
+            with usermodel2:
+                usermodel2.Var('v3', pm.Normal.dist())
+                pm.Normal('v4')
+                # this variable is created in parent model too
+        self.assertIn('another_v2', model.named_vars)
+        self.assertIn('another_v3', model.named_vars)
+        self.assertIn('another_v3', usermodel2.named_vars)
+        self.assertIn('another_v4', model.named_vars)
+        self.assertIn('another_v4', usermodel2.named_vars)
+        self.assertTrue(hasattr(usermodel2, 'v3'))
+        self.assertTrue(hasattr(usermodel2, 'v2'))
+        self.assertTrue(hasattr(usermodel2, 'v4'))
+        # When you create a class based model you should follow some rules
+        with model:
+            m = NewModel('one_more')
+        self.assertTrue(m.d is model['one_more_d'])
+        self.assertTrue(m['d'] is model['one_more_d'])
+        self.assertTrue(m['one_more_d'] is model['one_more_d'])
+
+
+class TestNested(unittest.TestCase):
+    def test_nest_context_works(self):
+        with pm.Model() as m:
+            new = NewModel()
+            with new:
+                self.assertTrue(
+                    pm.modelcontext(None) is new
+                )
+            self.assertTrue(
+                pm.modelcontext(None) is m
+            )
+        self.assertIn('v1', m.named_vars)
+        self.assertIn('v2', m.named_vars)
+
+    def test_named_context(self):
+        with pm.Model() as m:
+            NewModel(name='new')
+        self.assertIn('new_v1', m.named_vars)
+        self.assertIn('new_v2', m.named_vars)
+
+    def test_docstring_example1(self):
+        usage1 = DocstringModel()
+        self.assertIn('v1', usage1.named_vars)
+        self.assertIn('v2', usage1.named_vars)
+        self.assertIn('v3', usage1.named_vars)
+        self.assertIn('v3_sq', usage1.named_vars)
+        self.assertTrue(len(usage1.potentials), 1)
+
+    def test_docstring_example2(self):
+        with pm.Model() as model:
+            DocstringModel(name='prefix')
+        self.assertIn('prefix_v1', model.named_vars)
+        self.assertIn('prefix_v2', model.named_vars)
+        self.assertIn('prefix_v3', model.named_vars)
+        self.assertIn('prefix_v3_sq', model.named_vars)
+        self.assertTrue(len(model.potentials), 1)
+
+    def test_duplicates_detection(self):
+        with pm.Model():
+            DocstringModel(name='prefix')
+            self.assertRaises(ValueError, DocstringModel, name='prefix')
+
+    def test_model_root(self):
+        with pm.Model() as model:
+            self.assertTrue(model is model.root)
+            with pm.Model() as sub:
+                self.assertTrue(model is sub.root)
diff --git a/pymc3/tests/test_models_linear.py b/pymc3/tests/test_models_linear.py
new file mode 100644
index 0000000000..fac66fc0de
--- /dev/null
+++ b/pymc3/tests/test_models_linear.py
@@ -0,0 +1,108 @@
+import numpy as np
+from .helpers import SeededTest
+from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample
+from pymc3.models.linear import LinearComponent, Glm
+
+
+# Generate data
+def generate_data(intercept, slope, size=700):
+    x = np.linspace(-1, 1, size)
+    y = intercept + x * slope
+    return x, y
+
+
+class TestGLM(SeededTest):
+    @classmethod
+    def setUpClass(cls):
+        super(TestGLM, cls).setUpClass()
+        cls.intercept = 1
+        cls.slope = 3
+        cls.sd = .05
+        x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000)
+        cls.y_linear += np.random.normal(size=1000, scale=cls.sd)
+        cls.data_linear = dict(x=x_linear, y=cls.y_linear)
+
+        x_logistic, y_logistic = generate_data(cls.intercept, cls.slope, size=3000)
+        y_logistic = 1 / (1 + np.exp(-y_logistic))
+        bern_trials = [np.random.binomial(1, i) for i in y_logistic]
+        cls.data_logistic = dict(x=x_logistic, y=bern_trials)
+
+    def test_linear_component(self):
+        vars_to_create = {
+            'sigma_interval_',
+            'y_obs',
+            'lm_x0',
+            'lm_Intercept'
+        }
+        with Model() as model:
+            lm = LinearComponent(
+                self.data_linear['x'],
+                self.data_linear['y'],
+                name='lm'
+            )   # yields lm_x0, lm_Intercept
+            sigma = Uniform('sigma', 0, 20)     # yields sigma_interval_
+            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)  # yields y_obs
+            start = find_MAP(vars=[sigma])
+            step = Slice(model.vars)
+            trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed)
+
+            self.assertAlmostEqual(np.mean(trace['lm_Intercept']), self.intercept, 1)
+            self.assertAlmostEqual(np.mean(trace['lm_x0']), self.slope, 1)
+            self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1)
+        self.assertSetEqual(vars_to_create, set(model.named_vars.keys()))
+
+    def test_linear_component_from_formula(self):
+        with Model() as model:
+            lm = LinearComponent.from_formula('y ~ x', self.data_linear)
+            sigma = Uniform('sigma', 0, 20)
+            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)
+            start = find_MAP(vars=[sigma])
+            step = Slice(model.vars)
+            trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed)
+
+            self.assertAlmostEqual(np.mean(trace['Intercept']), self.intercept, 1)
+            self.assertAlmostEqual(np.mean(trace['x']), self.slope, 1)
+            self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1)
+
+    def test_glm(self):
+        with Model() as model:
+            vars_to_create = {
+                'glm_sd_log_',
+                'glm_y',
+                'glm_x0',
+                'glm_Intercept'
+            }
+            Glm(
+                self.data_linear['x'],
+                self.data_linear['y'],
+                name='glm'
+            )
+            start = find_MAP()
+            step = Slice(model.vars)
+            trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed)
+            self.assertAlmostEqual(np.mean(trace['glm_Intercept']), self.intercept, 1)
+            self.assertAlmostEqual(np.mean(trace['glm_x0']), self.slope, 1)
+            self.assertAlmostEqual(np.mean(trace['glm_sd']), self.sd, 1)
+            self.assertSetEqual(vars_to_create, set(model.named_vars.keys()))
+
+    def test_glm_from_formula(self):
+        with Model() as model:
+            NAME = 'glm'
+            Glm.from_formula('y ~ x', self.data_linear, name=NAME)
+            start = find_MAP()
+            step = Slice(model.vars)
+            trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed)
+
+            self.assertAlmostEqual(np.mean(trace['%s_Intercept' % NAME]), self.intercept, 1)
+            self.assertAlmostEqual(np.mean(trace['%s_x' % NAME]), self.slope, 1)
+            self.assertAlmostEqual(np.mean(trace['%s_sd' % NAME]), self.sd, 1)
+
+    def test_strange_types(self):
+        with Model():
+            self.assertRaises(
+                ValueError,
+                Glm,
+                1,
+                self.data_linear['y'],
+                name='lm'
+            )
diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py
new file mode 100644
index 0000000000..160846bcbc
--- /dev/null
+++ b/pymc3/tests/test_models_utils.py
@@ -0,0 +1,79 @@
+import unittest
+import numpy as np
+import pandas as pd
+import theano.tensor as tt
+import pymc3.models.utils as utils
+
+
+class TestUtils(unittest.TestCase):
+    def setUp(self):
+        self.data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
+
+    def assertMatrixLabels(self, m, l, mt=None, lt=None):
+        self.assertTrue(
+            np.all(
+                np.equal(
+                    m.eval(),
+                    mt if mt is not None else self.data.as_matrix()
+                )
+            )
+        )
+        self.assertEqual(l, list(lt or self.data.columns))
+
+    def test_numpy_init(self):
+        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix())
+        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
+        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix(), labels=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+
+    def test_pandas_init(self):
+        m, l = utils.any_to_tensor_and_labels(self.data)
+        self.assertMatrixLabels(m, l)
+        m, l = utils.any_to_tensor_and_labels(self.data, labels=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+
+    def test_dict_input(self):
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('dict'))
+        self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
+
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('series'))
+        self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
+
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('list'))
+        self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
+
+        inp = {k: tt.as_tensor_variable(v) for k, v in self.data.to_dict('series').items()}
+        m, l = utils.any_to_tensor_and_labels(inp)
+        self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
+
+    def test_list_input(self):
+        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist())
+        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
+        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+
+    def test_tensor_input(self):
+        m, l = utils.any_to_tensor_and_labels(
+            tt.as_tensor_variable(self.data.as_matrix().tolist()),
+            labels=['x0', 'x1']
+        )
+        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
+        m, l = utils.any_to_tensor_and_labels(
+            tt.as_tensor_variable(self.data.as_matrix().tolist()),
+            labels=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+
+    def test_user_mistakes(self):
+        # no labels for tensor variable
+        self.assertRaises(
+            ValueError,
+            utils.any_to_tensor_and_labels,
+            tt.as_tensor_variable(self.data.as_matrix().tolist())
+        )
+        # len of labels is bad
+        self.assertRaises(
+            ValueError,
+            utils.any_to_tensor_and_labels,
+            self.data.as_matrix().tolist(),
+            labels=['x']
+        )
diff --git a/requirements.txt b/requirements.txt
index fc6625c5d9..2e53afc286 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ recommonmark
 sphinx
 nbsphinx
 numpydoc
+six
diff --git a/setup.py b/setup.py
index 708cfb1006..017c28898a 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 DISTNAME = 'pymc3'
 DESCRIPTION = "PyMC3"
-LONG_DESCRIPTION    = """Bayesian estimation, particularly using Markov chain Monte Carlo (MCMC), is an increasingly relevant approach to statistical estimation. However, few statistical software packages implement MCMC samplers, and they are non-trivial to code by hand. ``pymc3`` is a python package that implements the Metropolis-Hastings algorithm as a python class, and is extremely flexible and applicable to a large suite of problems. ``pymc3`` includes methods for summarizing output, plotting, goodness-of-fit and convergence diagnostics."""
+LONG_DESCRIPTION = """Bayesian estimation, particularly using Markov chain Monte Carlo (MCMC), is an increasingly relevant approach to statistical estimation. However, few statistical software packages implement MCMC samplers, and they are non-trivial to code by hand. ``pymc3`` is a python package that implements the Metropolis-Hastings algorithm as a python class, and is extremely flexible and applicable to a large suite of problems. ``pymc3`` includes methods for summarizing output, plotting, goodness-of-fit and convergence diagnostics."""
 MAINTAINER = 'Thomas Wiecki'
 MAINTAINER_EMAIL = 'thomas.wiecki@gmail.com'
 AUTHOR = 'John Salvatier and Christopher Fonnesbeck'
@@ -50,7 +50,8 @@
           packages=['pymc3', 'pymc3.distributions',
                     'pymc3.step_methods', 'pymc3.tuning',
                     'pymc3.tests', 'pymc3.glm', 'pymc3.examples',
-                    'pymc3.backends', 'pymc3.variational', 'docs', '.'],
+                    'pymc3.models', 'pymc3.backends',
+                    'pymc3.variational', 'docs', '.'],
           classifiers=classifiers,
           install_requires=install_reqs,
           tests_require=test_reqs,

From d6f57a7333801aa126f9df85f2b6f45c4aacc5a7 Mon Sep 17 00:00:00 2001
From: colin <colin@kensho.com>
Date: Tue, 8 Nov 2016 00:29:28 -0500
Subject: [PATCH 5/6] Refactor Hamiltonian methods into single class

---
 pymc3/step_methods/__init__.py                |   4 +-
 pymc3/step_methods/arraystep.py               |  29 ++-
 pymc3/step_methods/hmc.py                     | 127 ----------
 pymc3/step_methods/hmc/__init__.py            |   2 +
 pymc3/step_methods/hmc/base_hmc.py            |  57 +++++
 pymc3/step_methods/hmc/hmc.py                 |  56 +++++
 pymc3/step_methods/hmc/nuts.py                | 132 ++++++++++
 pymc3/step_methods/{ => hmc}/quadpotential.py |   0
 pymc3/step_methods/hmc/trajectory.py          | 184 ++++++++++++++
 pymc3/step_methods/nuts.py                    | 238 ------------------
 pymc3/tests/models.py                         |   2 +-
 pymc3/tests/test_hmc.py                       |  29 +--
 pymc3/tests/test_step.py                      | 131 +++++++++-
 pymc3/theanof.py                              |   4 +-
 14 files changed, 589 insertions(+), 406 deletions(-)
 delete mode 100644 pymc3/step_methods/hmc.py
 create mode 100644 pymc3/step_methods/hmc/__init__.py
 create mode 100644 pymc3/step_methods/hmc/base_hmc.py
 create mode 100644 pymc3/step_methods/hmc/hmc.py
 create mode 100644 pymc3/step_methods/hmc/nuts.py
 rename pymc3/step_methods/{ => hmc}/quadpotential.py (100%)
 create mode 100644 pymc3/step_methods/hmc/trajectory.py
 delete mode 100644 pymc3/step_methods/nuts.py

diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py
index ccb5e22c47..07ee4a453c 100644
--- a/pymc3/step_methods/__init__.py
+++ b/pymc3/step_methods/__init__.py
@@ -1,6 +1,6 @@
 from .compound import CompoundStep
 
-from .hmc import HamiltonianMC
+from .hmc import HamiltonianMC, NUTS
 
 from .metropolis import Metropolis
 from .metropolis import BinaryMetropolis
@@ -15,5 +15,3 @@
 from .gibbs import ElemwiseCategorical
 
 from .slicer import Slice
-
-from .nuts import NUTS
\ No newline at end of file
diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index c0619ae653..da83191139 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -4,7 +4,6 @@
 from ..blocking import ArrayOrdering, DictToArrayBijection
 import numpy as np
 from numpy.random import uniform
-from numpy import log, isfinite
 from enum import IntEnum, unique
 
 __all__ = ['ArrayStep', 'ArrayStepShared', 'metrop_select', 'SamplerHist',
@@ -90,9 +89,9 @@ class ArrayStep(BlockedStep):
     ----------
     vars : list
         List of variables for sampler.
+    fs: list of logp theano functions
     allvars: Boolean (default False)
     blocked: Boolean (default True)
-    fs: logp theano function
     """
 
     def __init__(self, vars, fs, allvars=False, blocked=True):
@@ -114,10 +113,11 @@ def step(self, point):
 
 
 class ArrayStepShared(BlockedStep):
-    """Faster version of ArrayStep that requires the substep method that does not wrap the functions the step method uses.
+    """Faster version of ArrayStep that requires the substep method that does not wrap
+       the functions the step method uses.
 
-    Works by setting shared variables before using the step. This eliminates the mapping and unmapping overhead as well
-    as moving fewer variables around.
+    Works by setting shared variables before using the step. This eliminates the mapping
+    and unmapping overhead as well as moving fewer variables around.
     """
 
     def __init__(self, vars, shared, blocked=True):
@@ -144,14 +144,25 @@ def step(self, point):
 
 
 def metrop_select(mr, q, q0):
-    # Perform rejection/acceptance step for Metropolis class samplers
+    """Perform rejection/acceptance step for Metropolis class samplers.
 
+    Returns the new sample q if a uniform random number is less than the
+    metropolis acceptance rate (`mr`), and the old sample otherwise.
+
+    Parameters
+    ----------
+    mr : float, Metropolis acceptance rate
+    q : proposed sample
+    q0 : current sample
+
+    Returns
+    -------
+    q or q0
+    """
     # Compare acceptance ratio to uniform random number
-    if isfinite(mr) and log(uniform()) < mr:
-        # Accept proposed value
+    if np.isfinite(mr) and np.log(uniform()) < mr:
         return q
     else:
-        # Reject proposed value
         return q0
 
 
diff --git a/pymc3/step_methods/hmc.py b/pymc3/step_methods/hmc.py
deleted file mode 100644
index 295b6122d0..0000000000
--- a/pymc3/step_methods/hmc.py
+++ /dev/null
@@ -1,127 +0,0 @@
-'''
-Created on Mar 7, 2011
-
-@author: johnsalvatier
-'''
-from numpy import floor
-from .quadpotential import quad_potential
-from .arraystep import ArrayStep, SamplerHist, metrop_select, Competence
-from ..tuning import guess_scaling
-from ..model import modelcontext, Point
-from ..theanof import inputvars
-from ..vartypes import discrete_types
-
-import numpy as np
-from scipy.sparse import issparse
-
-from collections import namedtuple
-
-__all__ = ['HamiltonianMC']
-
-# TODO:
-# add constraint handling via page 37 of Radford's
-# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html
-
-
-def unif(step_size, elow=.85, ehigh=1.15):
-    return np.random.uniform(elow, ehigh) * step_size
-
-
-class HamiltonianMC(ArrayStep):
-    default_blocked = True
-
-    def __init__(self, vars=None, scaling=None, step_scale=.25, path_length=2., is_cov=False, step_rand=unif, state=None, model=None, **kwargs):
-        """
-        Parameters
-        ----------
-            vars : list of theano variables
-            scaling : array_like, ndim = {1,2}
-                Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
-            step_scale : float, default=.25
-                Size of steps to take, automatically scaled down by 1/n**(1/4) (defaults to .25)
-            path_length : float, default=2
-                total length to travel
-            is_cov : bool, default=False
-                Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector
-            step_rand : function float -> float, default=unif
-                A function which takes the step size and returns an new one used to randomize the step size at each iteration.
-            state
-                State object
-            model : Model
-        """
-        model = modelcontext(model)
-
-        if vars is None:
-            vars = model.cont_vars
-        vars = inputvars(vars)
-
-        if scaling is None:
-            scaling = model.test_point
-
-        if isinstance(scaling, dict):
-            scaling = guess_scaling(Point(scaling, model=model), model=model)
-
-        n = scaling.shape[0]
-
-        self.step_size = step_scale / n ** (1 / 4.)
-
-        self.potential = quad_potential(scaling, is_cov, as_cov=False)
-
-        self.path_length = path_length
-        self.step_rand = step_rand
-
-        if state is None:
-            state = SamplerHist()
-        self.state = state
-
-        super(HamiltonianMC, self).__init__(
-            vars, [model.fastlogp, model.fastdlogp(vars)], **kwargs)
-
-    def astep(self, q0, logp, dlogp):
-        H = Hamiltonian(logp, dlogp, self.potential)
-
-        e = self.step_rand(self.step_size)
-        nstep = int(self.path_length / e)
-
-        p0 = H.pot.random()
-
-        q, p = leapfrog(H, q0, p0, nstep, e)
-        p = -p
-
-        mr = energy(H, q0, p0) - energy(H, q, p)
-
-        self.state.metrops.append(mr)
-
-        return metrop_select(mr, q, q0)
-
-    @staticmethod
-    def competence(var):
-        if var.dtype in discrete_types:
-            return Competence.INCOMPATIBLE
-        return Competence.COMPATIBLE
-
-
-def bern(p):
-    return np.random.uniform() < p
-
-Hamiltonian = namedtuple("Hamiltonian", "logp, dlogp, pot")
-
-
-def energy(H, q, p):
-    return -(H.logp(q) - H.pot.energy(p))
-
-
-def leapfrog(H, q, p, n, e):
-    _, dlogp, pot = H
-
-    p = p - (e / 2) * -dlogp(q)  # half momentum update
-
-    for i in range(n):
-        # alternate full variable and momentum updates
-        q = q + e * pot.velocity(p)
-
-        if i != n - 1:
-            p = p - e * -dlogp(q)
-
-    p = p - (e / 2) * -dlogp(q)  # do a half step momentum update to finish off
-    return q, p
diff --git a/pymc3/step_methods/hmc/__init__.py b/pymc3/step_methods/hmc/__init__.py
new file mode 100644
index 0000000000..07fa92af67
--- /dev/null
+++ b/pymc3/step_methods/hmc/__init__.py
@@ -0,0 +1,2 @@
+from .hmc import HamiltonianMC
+from .nuts import NUTS
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
new file mode 100644
index 0000000000..7d87f5d3c8
--- /dev/null
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -0,0 +1,57 @@
+from ..arraystep import ArrayStepShared
+from .trajectory import get_theano_hamiltonian_functions
+
+from pymc3.tuning import guess_scaling
+from pymc3.model import modelcontext, Point
+from .quadpotential import quad_potential
+from pymc3.theanof import inputvars, make_shared_replacements
+
+
+class BaseHMC(ArrayStepShared):
+    default_blocked = True
+
+    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
+                 model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs):
+        """
+        Parameters
+        ----------
+            vars : list of theano variables
+            scaling : array_like, ndim = {1,2}
+                Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
+            step_scale : float, default=0.25
+                Size of steps to take, automatically scaled down by 1/n**(1/4)
+            is_cov : bool, default=False
+                Treat scaling as a covariance matrix/vector if True, else treat it as a
+                precision matrix/vector
+            state
+                State object
+            model : pymc3 Model instance.  default=Context model
+            blocked: Boolean, default True
+            use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
+                default False.
+            **theano_kwargs: passed to theano functions
+        """
+        model = modelcontext(model)
+
+        if vars is None:
+            vars = model.cont_vars
+        vars = inputvars(vars)
+
+        if scaling is None:
+            scaling = model.test_point
+
+        if isinstance(scaling, dict):
+            scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars)
+
+        n = scaling.shape[0]
+        self.step_size = step_scale / (n ** 0.25)
+        self.potential = quad_potential(scaling, is_cov, as_cov=False)
+
+        shared = make_shared_replacements(vars, model)
+        if theano_kwargs is None:
+            theano_kwargs = {}
+
+        self.H, self.compute_energy, self.leapfrog, self._vars = get_theano_hamiltonian_functions(
+            vars, shared, model.logpt, self.potential, use_single_leapfrog, **theano_kwargs)
+
+        super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
new file mode 100644
index 0000000000..70a042f263
--- /dev/null
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -0,0 +1,56 @@
+'''
+Created on Mar 7, 2011
+
+@author: johnsalvatier
+'''
+from ..arraystep import metrop_select, Competence
+from .base_hmc import BaseHMC
+from pymc3.vartypes import discrete_types
+import numpy as np
+
+
+__all__ = ['HamiltonianMC']
+
+# TODO:
+# add constraint handling via page 37 of Radford's
+# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html
+
+
+def unif(step_size, elow=.85, ehigh=1.15):
+    return np.random.uniform(elow, ehigh) * step_size
+
+
+class HamiltonianMC(BaseHMC):
+    default_blocked = True
+
+    def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs):
+        """
+        Parameters
+        ----------
+            vars : list of theano variables
+            path_length : float, default=2
+                total length to travel
+            step_rand : function float -> float, default=unif
+                A function which takes the step size and returns an new one used to
+                randomize the step size at each iteration.
+            **kwargs : passed to BaseHMC
+        """
+        super(HamiltonianMC, self).__init__(vars, **kwargs)
+        self.path_length = path_length
+        self.step_rand = step_rand
+
+    def astep(self, q0):
+        e = np.array(self.step_rand(self.step_size))
+        n_steps = np.array(self.path_length / e, dtype='int32')
+        q = q0
+        p = self.H.pot.random()  # initialize momentum
+        initial_energy = self.compute_energy(q, p)
+        q, p, current_energy = self.leapfrog(q, p, e, n_steps)
+        energy_change = current_energy - initial_energy
+        return metrop_select(energy_change, q, q0)
+
+    @staticmethod
+    def competence(var):
+        if var.dtype in discrete_types:
+            return Competence.INCOMPATIBLE
+        return Competence.COMPATIBLE
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
new file mode 100644
index 0000000000..e22a3cb8ec
--- /dev/null
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -0,0 +1,132 @@
+from ..arraystep import Competence
+from .base_hmc import BaseHMC
+from pymc3.vartypes import continuous_types
+
+import numpy as np
+import numpy.random as nr
+
+__all__ = ['NUTS']
+
+
+def bern(p):
+    return np.random.uniform() < p
+
+
+class NUTS(BaseHMC):
+    """
+    Automatically tunes step size and adjust number of steps for good performance.
+
+    Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in:
+
+    Hoffman, Matthew D., & Gelman, Andrew. (2011).
+    The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
+    """
+    default_blocked = True
+
+    def __init__(self, vars=None, Emax=1000, target_accept=0.8,
+                 gamma=0.05, k=0.75, t0=10, **kwargs):
+        """
+        Parameters
+        ----------
+            vars : list of Theano variables, default continuous vars
+            Emax : float, default 1000
+                maximum energy
+            target_accept : float (0,1) default .8
+                target for avg accept probability between final branch and initial position
+            gamma : float, default .05
+            k : float (.5,1) default .75
+                scaling of speed of adaptation
+            t0 : int, default 10
+                slows inital adapatation
+            kwargs: passed to BaseHMC
+        """
+        super(NUTS, self).__init__(vars, use_single_leapfrog=True, **kwargs)
+
+        self.Emax = Emax
+
+        self.target_accept = target_accept
+        self.gamma = gamma
+        self.k = k
+        self.t0 = t0
+
+        self.Hbar = 0
+        self.u = np.log(self.step_size * 10)
+        self.m = 1
+
+    def astep(self, q0):
+        Emax = self.Emax
+        e = self.step_size
+
+        p0 = self.potential.random()
+        E0 = self.compute_energy(q0, p0)
+
+        u = nr.uniform()
+        q = qn = qp = q0
+        p = pn = pp = p0
+
+        n, s, j = 1, 1, 0
+
+        while s == 1:
+            v = bern(0.5) * 2 - 1
+
+            if v == -1:
+                qn, pn, _, _, q1, n1, s1, a, na = buildtree(
+                    self.leapfrog, qn, pn, u, v, j, e, Emax, E0)
+            else:
+                _, _, qp, pp, q1, n1, s1, a, na = buildtree(
+                    self.leapfrog, qp, pp, u, v, j, e, Emax, E0)
+
+            if s1 == 1 and bern(min(1, n1 * 1. / n)):
+                q = q1
+
+            n = n + n1
+
+            span = qp - qn
+            s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
+            j = j + 1
+
+        p = -p
+
+        w = 1. / (self.m + self.t0)
+        self.Hbar = (1 - w) * self.Hbar + w * \
+            (self.target_accept - a * 1. / na)
+
+        self.step_size = np.exp(self.u - (self.m**self.k / self.gamma) * self.Hbar)
+        self.m += 1
+
+        return q
+
+    @staticmethod
+    def competence(var):
+        if var.dtype in continuous_types:
+            return Competence.IDEAL
+        return Competence.INCOMPATIBLE
+
+
+def buildtree(leapfrog, q, p, u, v, j, e, Emax, E0):
+    if j == 0:
+        q1, p1, E = leapfrog(q, p, np.array(v * e))
+        dE = E - E0
+
+        n1 = int(np.log(u) + dE <= 0)
+        s1 = int(np.log(u) + dE < Emax)
+        return q1, p1, q1, p1, q1, n1, s1, min(1, np.exp(-dE)), 1
+    qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree(leapfrog, q, p, u, v, j - 1, e, Emax, E0)
+    if s1 == 1:
+        if v == -1:
+            qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree(
+                leapfrog, qn, pn, u, v, j - 1, e, Emax, E0)
+        else:
+            _, _, qp, pp, q11, n11, s11, a11, na11 = buildtree(
+                leapfrog, qp, pp, u, v, j - 1, e, Emax, E0)
+
+        if bern(n11 * 1. / (max(n1 + n11, 1))):
+            q1 = q11
+
+        a1 = a1 + a11
+        na1 = na1 + na11
+
+        span = qp - qn
+        s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
+        n1 = n1 + n11
+    return qn, pn, qp, pp, q1, n1, s1, a1, na1
diff --git a/pymc3/step_methods/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
similarity index 100%
rename from pymc3/step_methods/quadpotential.py
rename to pymc3/step_methods/hmc/quadpotential.py
diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py
new file mode 100644
index 0000000000..b64790a21a
--- /dev/null
+++ b/pymc3/step_methods/hmc/trajectory.py
@@ -0,0 +1,184 @@
+from collections import namedtuple
+
+from pymc3.theanof import join_nonshared_inputs, gradient, CallableTensor
+
+import theano
+import theano.tensor as tt
+
+
+Hamiltonian = namedtuple("Hamiltonian", "logp, dlogp, pot")
+
+
+def _theano_hamiltonian(model_vars, shared, logpt, potential):
+    """Creates a Hamiltonian with shared inputs.
+
+    Parameters
+    ----------
+    model_vars: array of variables to be sampled
+    shared theano tensors that are already shared
+    logpt: model log probability
+    potential: hamiltonian potential
+
+    Returns
+    -------
+    Hamiltonian: namedtuple with log pdf, gradient of log pdf, and potential functions
+    q: Starting position variable.
+    """
+    dlogp = gradient(logpt, model_vars)
+    (logp, dlogp), q = join_nonshared_inputs([logpt, dlogp], model_vars, shared)
+    logp = CallableTensor(logp)
+    dlogp = CallableTensor(dlogp)
+    return Hamiltonian(logp, dlogp, potential), q
+
+
+def _theano_energy_function(H, q, **theano_kwargs):
+    """Creates a Hamiltonian with shared inputs.
+
+    Parameters
+    ----------
+    H: Hamiltonian namedtuple
+    q: theano variable, starting position
+    theano_kwargs: passed to theano.function
+
+    Returns
+    -------
+    energy_function: theano function that computes the energy at a point (p, q) in phase space
+    p: Starting momentum variable.
+    """
+    p = tt.dvector('p')
+    p.tag.test_value = q.tag.test_value
+    total_energy = H.pot.energy(p) - H.logp(q)
+    energy_function = theano.function(inputs=[q, p], outputs=total_energy, **theano_kwargs)
+    energy_function.trust_input = True
+
+    return energy_function, p
+
+
+def _theano_leapfrog_integrator(H, q, p, **theano_kwargs):
+    """Computes a theano function that computes one leapfrog step and the energy at the
+    end of the trajectory.
+
+    Parameters
+    ----------
+    H : Hamiltonian
+    q : theano.tensor
+    p : theano.tensor
+    theano_kwargs : passed to theano.function
+
+    Returns
+    -------
+    theano function which returns
+    q_new, p_new, energy_new
+    """
+    epsilon = tt.dscalar('epsilon')
+    epsilon.tag.test_value = 1
+
+    n_steps = tt.iscalar('n_steps')
+    n_steps.tag.test_value = 2
+
+    q_new, p_new = leapfrog(H, q, p, epsilon, n_steps)
+    energy_new = energy(H, q_new, p_new)
+
+    f = theano.function([q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs)
+    f.trust_input = True
+    return f
+
+
+def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential,
+                                     use_single_leapfrog=False, **theano_kwargs):
+    """Construct theano functions for the Hamiltonian, energy, and leapfrog integrator.
+
+    Parameters
+    ----------
+    model_vars : array of variables to be sampled
+    shared theano tensors that are already shared
+    logpt: model log probability
+    potential: hamiltonian potential
+    theano_kwargs : dictionary of keyword arguments to pass to theano functions
+    use_single_leapfrog: Boolean, if only 1 integration step is done at a time (as in NUTS),
+                         this provides a ~2x speedup
+
+    Returns
+    -------
+    H: Hamiltonian namedtuple
+    energy_function: theano function computing energy at a point in phase space
+    leapfrog_integrator: theano function integrating the Hamiltonian from a point in phase space
+    theano_variables: dictionary of variables used in the computation graph which may be useful
+    """
+    H, q = _theano_hamiltonian(model_vars, shared, logpt, potential)
+    energy_function, p = _theano_energy_function(H, q, **theano_kwargs)
+    if use_single_leapfrog:
+        leapfrog_integrator = _theano_single_leapfrog(H, q, p, **theano_kwargs)
+    else:
+        leapfrog_integrator = _theano_leapfrog_integrator(H, q, p, **theano_kwargs)
+    return H, energy_function, leapfrog_integrator, {'q': q, 'p': p}
+
+
+def energy(H, q, p):
+    """Compute the total energy for the Hamiltonian at a given position/momentum"""
+    return -(H.logp(q) - H.pot.energy(p))
+
+
+def leapfrog(H, q, p, epsilon, n_steps):
+    """Leapfrog integrator.
+
+    Estimates `p(t)` and `q(t)` at time :math:`t = n \cdot e`, by integrating the
+    Hamiltonian equations
+
+    .. math::
+
+        \frac{dq_i}{dt} = \frac{\partial H}{\partial p_i}
+
+        \frac{dp_i}{dt} = \frac{\partial H}{\partial q_i}
+
+    with :math:`p(0) = p`, :math:`q(0) = q`
+
+    Parameters
+    ----------
+    H : Hamiltonian instance.
+        Tuple of `logp, dlogp, potential`.
+    q: Theano.tensor
+        initial position vector
+    p: Theano.tensor
+        initial momentum vector
+    epsilon: float, step size
+    n_steps: int, number of iterations
+
+    Returns
+    -------
+    position: Theano.tensor
+        position estimate at time :math:`n \cdot e`.
+    momentum: Theano.tensor
+        momentum estimate at time :math:`n \cdot e`.
+    """
+    def full_update(p, q):
+        p = p + epsilon * H.dlogp(q)
+        q += epsilon * H.pot.velocity(p)
+        return p, q
+    #  This first line can't be +=, possibly because of theano
+    p = p + 0.5 * epsilon * H.dlogp(q)  # half momentum update
+    q += epsilon * H.pot.velocity(p)  # full position update
+    if tt.gt(n_steps, 1):
+        (p_seq, q_seq), _ = theano.scan(full_update, outputs_info=[p, q], n_steps=n_steps - 1)
+        p, q = p_seq[-1], q_seq[-1]
+    p += 0.5 * epsilon * H.dlogp(q)  # half momentum update
+    return q, p
+
+
+def _theano_single_leapfrog(H, q, p, **theano_kwargs):
+    """Leapfrog integrator for a single step.
+
+    See above for documentation.  This is optimized for the case where only a single step is
+    needed, in case of, for example, a recursive algorithm.
+    """
+    epsilon = tt.dscalar('epsilon')
+    epsilon.tag.test_value = 1
+
+    p_new = p + 0.5 * epsilon * H.dlogp(q)  # half momentum update
+    q_new = q + epsilon * H.pot.velocity(p_new)  # full position update
+    p_new += 0.5 * epsilon * H.dlogp(q_new)  # half momentum update
+    energy_new = energy(H, q_new, p_new)
+
+    f = theano.function(inputs=[q, p, epsilon], outputs=[q_new, p_new, energy_new], **theano_kwargs)
+    f.trust_input = True
+    return f
diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py
deleted file mode 100644
index 84bb3a90ba..0000000000
--- a/pymc3/step_methods/nuts.py
+++ /dev/null
@@ -1,238 +0,0 @@
-from .quadpotential import quad_potential
-from .arraystep import ArrayStepShared, ArrayStep, SamplerHist, Competence
-from ..model import modelcontext, Point
-from ..vartypes import continuous_types
-from numpy import exp, log, array
-from numpy.random import uniform
-from .hmc import leapfrog, Hamiltonian, bern, energy
-from ..tuning import guess_scaling
-import theano
-from ..theanof import (make_shared_replacements, join_nonshared_inputs, CallableTensor,
-                       gradient, inputvars)
-import theano.tensor as tt
-
-__all__ = ['NUTS']
-
-
-class NUTS(ArrayStepShared):
-    """
-    Automatically tunes step size and adjust number of steps for good performance.
-
-    Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in:
-
-    Hoffman, Matthew D., & Gelman, Andrew. (2011).
-    The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
-    """
-    default_blocked = True
-
-    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state=None,
-                 Emax=1000,
-                 target_accept=0.8,
-                 gamma=0.05,
-                 k=0.75,
-                 t0=10,
-                 model=None,
-                 profile=False,
-                 mode=None, **kwargs):
-        """
-        Parameters
-        ----------
-            vars : list of Theano variables, default continuous vars
-            scaling : array_like, ndim = {1,2} or point
-                Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
-            step_scale : float, default=.25
-                Size of steps to take, automatically scaled down by 1/n**(1/4)
-            is_cov : bool, default=False
-                Treat C as a covariance matrix/vector if True, else treat it as a precision matrix/vector
-            state
-                state to start from
-            Emax : float, default 1000
-                maximum energy
-            target_accept : float (0,1) default .8
-                target for avg accept probability between final branch and initial position
-            gamma : float, default .05
-            k : float (.5,1) default .75
-                scaling of speed of adaptation
-            t0 : int, default 10
-                slows inital adapatation
-            model : Model
-            profile : bool or ProfileStats
-                sets the functions to be profiled
-            mode :  string or `Mode` instance.
-                compilation mode passed to Theano functions
-        """
-        model = modelcontext(model)
-
-        if vars is None:
-            vars = model.cont_vars
-        vars = inputvars(vars)
-
-        if scaling is None:
-            scaling = model.test_point
-
-        if isinstance(scaling, dict):
-            scaling = guess_scaling(
-                Point(scaling, model=model), model=model, vars=vars)
-
-        n = scaling.shape[0]
-
-        self.step_size = step_scale / n**(1 / 4.)
-
-        self.potential = quad_potential(scaling, is_cov, as_cov=False)
-
-        if state is None:
-            state = SamplerHist()
-        self.state = state
-        self.Emax = Emax
-
-        self.target_accept = target_accept
-        self.gamma = gamma
-        self.t0 = t0
-        self.k = k
-
-        self.Hbar = 0
-        self.u = log(self.step_size * 10)
-        self.m = 1
-        self.mode = mode
-
-        shared = make_shared_replacements(vars, model)
-
-        def create_hamiltonian(vars, shared, model):
-            dlogp = gradient(model.logpt, vars)
-            (logp, dlogp), q = join_nonshared_inputs(
-                [model.logpt, dlogp], vars, shared)
-            logp = CallableTensor(logp)
-            dlogp = CallableTensor(dlogp)
-
-            return Hamiltonian(logp, dlogp, self.potential), q
-
-        def create_energy_func(q):
-            p = tt.dvector('p')
-            p.tag.test_value = q.tag.test_value
-            E0 = energy(self.H, q, p)
-            E0_func = theano.function([q, p], E0, mode=self.mode)
-            E0_func.trust_input = True
-
-            return E0_func
-
-        self.H, q = create_hamiltonian(vars, shared, model)
-        self.compute_energy = create_energy_func(q)
-
-        self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile, 
-                                        mode=self.mode)
-
-        super(NUTS, self).__init__(vars, shared, **kwargs)
-
-    def astep(self, q0):
-        leapfrog = self.leapfrog1_dE
-        Emax = self.Emax
-        e = self.step_size
-
-        p0 = self.potential.random()
-        E0 = self.compute_energy(q0, p0)
-
-        u = uniform()
-        q = qn = qp = q0
-        p = pn = pp = p0
-
-        n, s, j = 1, 1, 0
-
-        while s == 1:
-            v = bern(.5) * 2 - 1
-
-            if v == -1:
-                qn, pn, _, _, q1, n1, s1, a, na = buildtree(
-                    leapfrog, qn, pn, u, v, j, e, Emax, E0)
-            else:
-                _, _, qp, pp, q1, n1, s1, a, na = buildtree(
-                    leapfrog, qp, pp, u, v, j, e, Emax, E0)
-
-            if s1 == 1 and bern(min(1, n1 * 1. / n)):
-                q = q1
-
-            n = n + n1
-
-            span = qp - qn
-            s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
-            j = j + 1
-
-        p = -p
-
-        w = 1. / (self.m + self.t0)
-        self.Hbar = (1 - w) * self.Hbar + w * \
-            (self.target_accept - a * 1. / na)
-
-        self.step_size = exp(self.u - (self.m**self.k / self.gamma) * self.Hbar)
-        self.m += 1
-
-        return q
-
-    @staticmethod
-    def competence(var):
-        if var.dtype in continuous_types:
-            return Competence.IDEAL
-        return Competence.INCOMPATIBLE
-
-
-def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0):
-    if j == 0:
-        q1, p1, dE = leapfrog1_dE(q, p, array(v * e), E0)
-
-        n1 = int(log(u) + dE <= 0)
-        s1 = int(log(u) + dE < Emax)
-        return q1, p1, q1, p1, q1, n1, s1, min(1, exp(-dE)), 1
-    else:
-        qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree(
-            leapfrog1_dE, q, p, u, v, j - 1, e, Emax, E0)
-        if s1 == 1:
-            if v == -1:
-                qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree(
-                    leapfrog1_dE, qn, pn, u, v, j - 1, e, Emax, E0)
-            else:
-                _, _, qp, pp, q11, n11, s11, a11, na11 = buildtree(
-                    leapfrog1_dE, qp, pp, u, v, j - 1, e, Emax, E0)
-
-            if bern(n11 * 1. / (max(n1 + n11, 1))):
-                q1 = q11
-
-            a1 = a1 + a11
-            na1 = na1 + na11
-
-            span = qp - qn
-            s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
-            n1 = n1 + n11
-        return qn, pn, qp, pp, q1, n1, s1, a1, na1
-    return
-
-
-def leapfrog1_dE(H, q, profile, mode):
-    """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory.
-    Parameters
-    ----------
-    H : Hamiltonian
-    q : theano.tensor
-    profile : Boolean
-
-    Returns
-    -------
-    theano function which returns
-    q_new, p_new, dE
-    """
-    p = tt.dvector('p')
-    p.tag.test_value = q.tag.test_value
-
-    e = tt.dscalar('e')
-    e.tag.test_value = 1
-
-    q1, p1 = leapfrog(H, q, p, 1, e)
-    E = energy(H, q1, p1)
-
-    E0 = tt.dscalar('E0')
-    E0.tag.test_value = 1
-
-    dE = E - E0
-
-    f = theano.function([q, p, e, E0], [q1, p1, dE], 
-                        profile=profile, mode=mode)
-    f.trust_input = True
-    return f
diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py
index d8a195fd46..9c8ad87775 100644
--- a/pymc3/tests/models.py
+++ b/pymc3/tests/models.py
@@ -19,7 +19,7 @@ def simple_categorical():
     p = np.array([0.1, 0.2, 0.3, 0.4])
     v = np.array([0.0, 1.0, 2.0, 3.0])
     with Model() as model:
-        Categorical('x', p, shape = 3, testval = [1, 2, 3])
+        Categorical('x', p, shape=3, testval=[1, 2, 3])
 
     mu = np.dot(p, v)
     var = np.dot(p, (v - mu) ** 2)
diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py
index d64d89de16..e40aa3fd11 100644
--- a/pymc3/tests/test_hmc.py
+++ b/pymc3/tests/test_hmc.py
@@ -1,32 +1,25 @@
-import pymc3 as pm
 import numpy as np
+
+from pymc3.blocking import DictToArrayBijection
 from . import models
-from pymc3.step_methods.hmc import leapfrog, Hamiltonian
+from pymc3.step_methods.hmc.base_hmc import BaseHMC
 from .checks import close_to
-from ..blocking import DictToArrayBijection
 
 
 def test_leapfrog_reversible():
     n = 3
     start, model, _ = models.non_normal(n)
-
-    with model:
-        h = pm.find_hessian(start, model=model)
-        step = pm.HamiltonianMC(model.vars, h, model=model)
-
+    step = BaseHMC(vars=model.vars, model=model)
     bij = DictToArrayBijection(step.ordering, start)
-
-    logp, dlogp = list(map(bij.mapf, step.fs))
-    H = Hamiltonian(logp, dlogp, step.potential)
-
     q0 = bij.map(start)
     p0 = np.ones(n) * .05
-    for e in [.01, .1, 1.2]:
-        for L in [1, 2, 3, 4, 20]:
+
+    for epsilon in [.01, .1, 1.2]:
+        for n_steps in [1, 2, 3, 4, 20]:
 
             q, p = q0, p0
-            q, p = leapfrog(H, q, p, L, e)
-            q, p = leapfrog(H, q, -p, L, e)
+            q, p, _ = step.leapfrog(q, p, np.array(epsilon), np.array(n_steps, dtype='int32'))
+            q, p, _ = step.leapfrog(q, -p, np.array(epsilon), np.array(n_steps, dtype='int32'))
 
-            close_to(q, q0, 1e-8, str((L, e)))
-            close_to(-p, p0, 1e-8, str((L, e)))
+            close_to(q, q0, 1e-8, str((n_steps, epsilon)))
+            close_to(-p, p0, 1e-8, str((n_steps, epsilon)))
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index 9e8fa5c40d..924bd6ab29 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -1,5 +1,10 @@
+import os
 import unittest
 
+from numpy.testing import assert_array_almost_equal
+import numpy as np
+from tqdm import tqdm
+
 from .checks import close_to
 from .models import simple_categorical, mv_simple, mv_simple_discrete, simple_2model
 from pymc3.sampling import assign_step_methods, sample
@@ -8,12 +13,122 @@
                                 Metropolis, Slice, CompoundStep,
                                 MultivariateNormalProposal, HamiltonianMC)
 from pymc3.distributions import Binomial, Normal, Bernoulli, Categorical
-from numpy.testing import assert_almost_equal
-import numpy as np
 
 
 class TestStepMethods(object):  # yield test doesn't work subclassing unittest.TestCase
-    def check_stat(self, check, trace):
+    master_samples = {
+        Slice: np.array([
+            -8.13087389e-01, -3.08921856e-01, -6.79377098e-01, 6.50812585e-01, -7.63577596e-01,
+            -8.13199793e-01, -1.63823548e+00, -7.03863676e-02, 2.05107771e+00, 1.68598170e+00,
+            6.92463695e-01, -7.75120766e-01, -1.62296463e+00, 3.59722423e-01, -2.31421712e-01,
+            -7.80686956e-02, -6.05860731e-01, -1.13000202e-01, 1.55675942e-01, -6.78527612e-01,
+            6.31052333e-01, 6.09012517e-01, -1.56621643e+00, 5.04330883e-01, 3.14824082e-03,
+            -1.31287073e+00, 4.10706927e-01, 8.93815792e-01, 8.19317020e-01, 3.71900919e-01,
+            -2.62067312e+00, -3.47616592e+00, 1.50335041e+00, -1.05993351e+00, 2.41571723e-01,
+            -1.06258156e+00, 5.87999429e-01, -1.78480091e-01, -3.60278680e-01, 1.90615274e-01,
+            -1.24399204e-01, 4.03845589e-01, -1.47797573e-01, 7.90445804e-01, -1.21043819e+00,
+            -1.33964776e+00, 1.36366329e+00, -7.50175388e-01, 9.25241839e-01, -4.17493767e-01,
+            1.85311339e+00, -2.49715343e+00, -3.18571692e-01, -1.49099668e+00, -2.62079621e-01,
+            -5.82376852e-01, -2.53033395e+00, 2.07580503e+00, -9.82615856e-01, 6.00517782e-01,
+            -9.83941620e-01, -1.59014118e+00, -1.83931394e-03, -4.71163466e-01, 1.90073737e+00,
+            -2.08929125e-01, -6.98388847e-01, 1.64502092e+00, -1.19525944e+00, 1.44424109e+00,
+            1.52974876e+00, -5.70140077e-01, 5.08633322e-01, -1.70862492e-02, -1.69887948e-01,
+            5.19760297e-01, -4.15149647e-01, 8.63685174e-02, -3.66805233e-01, -9.24988952e-01,
+            2.33307122e+00, -2.60391496e-01, -5.86271814e-01, -5.01297170e-01, -1.53866195e+00,
+            5.71285373e-01, -1.30571830e+00, 8.59587795e-01, 6.72170694e-01, 9.12433943e-01,
+            7.04959179e-01, 8.37863464e-01, -5.24200836e-01, 1.28261340e+00, 9.08774240e-01,
+            8.80566763e-01, 7.82911967e-01, 8.01843432e-01, 7.09251098e-01, 5.73803618e-01]),
+        HamiltonianMC: np.array([
+            -1.56440708e-03, -2.37766120e-03, -6.95819902e-03, -4.88882715e-03, -6.54928517e-03,
+            -3.38653286e-03, -1.99381372e-03, -1.25904805e-03, -2.97173572e-04, -4.67391216e-04,
+            -2.03821237e-03, -1.33693751e-04, -2.17293248e-03, -4.11675406e-03, -4.23091782e-03,
+            -7.34120851e-03, -8.43726968e-03, -7.86976139e-03, -3.89551467e-03, -3.00788956e-03,
+            -3.82420513e-03, -1.35604792e-03, -2.49066947e-04, 4.03633859e-04, 9.34321408e-05,
+            1.77722574e-03, 1.63761359e-03, 2.86208401e-03, -1.72243038e-04, 1.86863525e-03,
+            1.76740215e-03, 1.79169049e-03, 1.07164602e-03, 1.41264547e-03, 2.49563456e-03,
+            1.76639216e-03, 3.01570589e-03, 1.44186424e-04, 1.45073846e-03, 2.95031617e-04,
+            -1.28811479e-04, -7.35945905e-04, -6.00689088e-04, 2.75468405e-04, 1.05245800e-03,
+            1.18892307e-03, 6.01165842e-04, 1.21016955e-03, -2.06751271e-03, -8.41426458e-04,
+            6.09905557e-04, 2.92765303e-03, 4.15216348e-03, 2.71863268e-03, 3.42922082e-03,
+            7.53890188e-03, 7.97507867e-03, 8.27371677e-03, 9.77811135e-03, 9.99705714e-03,
+            1.13996054e-02, 1.15745874e-02, 1.08182152e-02, 1.08277279e-02, 9.32254191e-03,
+            8.59914793e-03, 8.43927425e-03, 1.01570101e-02, 9.74607039e-03, 9.82868496e-03,
+            1.01745777e-02, 1.19312194e-02, 1.53760522e-02, 1.38691940e-02, 1.40131760e-02,
+            1.46184561e-02, 1.74382675e-02, 1.84241543e-02, 2.06913002e-02, 1.83520531e-02,
+            2.03072531e-02, 1.72912752e-02, 1.38959101e-02, 1.21933473e-02, 1.05084488e-02,
+            9.00532336e-03, 9.25863206e-03, 1.23618461e-02, 1.20207293e-02, 1.09334818e-02,
+            1.16528011e-02, 1.29967126e-02, 1.38940942e-02, 1.11408833e-02, 1.09263348e-02,
+            1.06521352e-02, 1.01622526e-02, 1.21998547e-02, 1.00880470e-02, 9.94787795e-03]),
+        Metropolis: np.array([
+            1.62434536, 1.01258895, 0.4844172, -0.58855142, 1.15626034, 0.39505344, 1.85716138,
+            -0.20297933, -0.20297933, -0.20297933, -0.20297933, -1.08083775, -1.08083775,
+            0.06388596, 0.96474191, 0.28101405, 0.01312597, 0.54348144, -0.14369126, -0.98889691,
+            -0.98889691, -0.75448121, -0.94631676, -0.94631676, -0.89550901, -0.89550901,
+            -0.77535005, -0.15814694, 0.14202338, -0.21022647, -0.4191207, 0.16750249, 0.45308981,
+            1.33823098, 1.8511608, 1.55306796, 1.55306796, 1.55306796, 1.55306796, 0.15657163,
+            0.3166087, 0.3166087, 0.3166087, 0.3166087, 0.54670343, 0.54670343, 0.32437529,
+            0.12361722, 0.32191694, 0.44092559, 0.56274686, 0.56274686, 0.18746191, 0.18746191,
+            -0.15639177, -0.11279491, -0.11279491, -0.11279491, -1.20770676, -1.03832432,
+            -0.29776787, -1.25146848, -1.25146848, -0.93630908, -0.5857631, -0.5857631,
+            -0.62445861, -0.62445861, -0.64907557, -0.64907557, -0.64907557, 0.58708846,
+            -0.61217957, 0.25116575, 0.25116575, 0.80170324, 1.59451011, 0.97097938, 1.77284041,
+            1.81940771, 1.81940771, 1.81940771, 1.81940771, 1.95710892, 2.18960348, 2.18960348,
+            2.18960348, 2.18960348, 2.63096792, 2.53081269, 2.5482221, 1.42620337, 0.90910891,
+            -0.08791792, 0.40729341, 0.23259025, 0.23259025, 0.23259025, 2.76091595, 2.51228118]),
+        NUTS: np.array([
+            0.68819657, 0.1767813, -0.59467679, -0.64216066, 1.63681405, 2.13404699, 0.03126563,
+            0.31817152, 0.31817152, 0.40191527, 0.40191527, 0.99220141, 0.93036804, -0.41228181,
+            -1.80465851, -1.70577291, 0.19406438, 0.19406438, -0.03965181, -0.76135744,
+            0.70023098, 1.07183677, 1.07183677, 0.2829979, 1.13524135, -0.26461224,
+            -0.39442329, -1.04109657, 0.79971205, 0.79971205, 0.96839778, 0.91868626,
+            0.19468837, 0.19468837, -0.67755668, -0.67755668, -0.43722432, 0.12072881,
+            0.6267432, 0.6861771, 0.4669198, 0.4669198, -0.08143768, 0.27691068, 0.11510718,
+            2.29821426, 2.18308403, 1.16618069, -0.45615197, -0.45615197, -0.37076172,
+            -0.37076172, -0.38889599, 0.36200553, -0.55179735, -0.55179735, -0.18946703,
+            1.11552335, 0.98985795, 0.98985795, 1.00313687, -0.18458164, 0.44025584, 0.97610126,
+            -0.1558578, -0.1558578, -0.01247235, -0.08303131, 0.52019377, -1.52329796,
+            -1.72856248, -1.19049049, -1.19049049, -0.8651521, -0.36421118, -0.40590409,
+            -0.78925074, -0.53960924, -0.53960924, 0.1069186, 0.40849997, 0.1560954,
+            0.35461684, 0.35461684, -0.83935418, -0.85295353, -0.13990269, -0.1412904,
+            -0.1412904, -0.30071575, -0.296461, 0.06540186, -0.15145479, -0.15145479,
+            -0.21406771, -0.21533218, 0.06833495, 0.06833495, -0.18763595, 0.34138144]),
+    }
+
+    def test_sample_exact(self):
+        for step_method in self.master_samples:
+            yield self.check_trace, step_method
+
+    def check_trace(self, step_method):
+        """Tests whether the trace for step methods is exactly the same as on master.
+
+        Code changes that effect how random numbers are drawn may change this, and require
+        `master_samples` to be updated, but such changes should be noted and justified in the
+        commit.
+
+        This method may also be used to benchmark step methods across commits, by running, for
+        example
+
+        ```
+        BENCHMARK=100000 ./scripts/test.sh -s pymc3/tests/test_step.py:TestStepMethods
+        ```
+
+        on multiple commits.
+        """
+        test_steps = 100
+        n_steps = int(os.getenv('BENCHMARK', 100))
+        benchmarking = (n_steps != test_steps)
+        if benchmarking:
+            tqdm.write('Benchmarking {} with {:,d} samples'.format(step_method.__name__, n_steps))
+        else:
+            tqdm.write('Checking {} has same trace as on master'.format(step_method.__name__))
+        with Model():
+            Normal('x', mu=0, sd=1)
+            trace = sample(n_steps, step=step_method(), random_seed=1)
+
+        if not benchmarking:
+            assert_array_almost_equal(trace.get_values('x'), self.master_samples[step_method])
+
+    def check_stat(self, check, trace, name):
         for (var, stat, value, bound) in check:
             s = stat(trace[var][2000:], axis=0)
             close_to(s, value, bound)
@@ -38,7 +153,7 @@ def test_step_continuous(self):
             )
         for step in steps:
             trace = sample(8000, step=step, start=start, model=model, random_seed=1)
-            yield self.check_stat, check, trace
+            yield self.check_stat, check, trace, step.__class__.__name__
 
     def test_step_discrete(self):
         start, model, (mu, C) = mv_simple_discrete()
@@ -51,7 +166,7 @@ def test_step_discrete(self):
             )
         for step in steps:
             trace = sample(20000, step=step, start=start, model=model, random_seed=1)
-            self.check_stat(check, trace)
+            yield self.check_stat, check, trace, step.__class__.__name__
 
     def test_step_categorical(self):
         start, model, (mu, C) = simple_categorical()
@@ -60,12 +175,12 @@ def test_step_categorical(self):
                  ('x', np.std, unc, unc / 10.))
         with model:
             steps = (
-                CategoricalGibbsMetropolis(model.x, proposal = 'uniform'),
-                CategoricalGibbsMetropolis(model.x, proposal = 'proportional'),
+                CategoricalGibbsMetropolis(model.x, proposal='uniform'),
+                CategoricalGibbsMetropolis(model.x, proposal='proportional'),
             )
         for step in steps:
             trace = sample(8000, step=step, start=start, model=model, random_seed=1)
-            self.check_stat(check, trace)
+            yield self.check_stat, check, trace, step.__class__.__name__
 
 
 class TestCompoundStep(unittest.TestCase):
diff --git a/pymc3/theanof.py b/pymc3/theanof.py
index 04289aabbb..a391b198fc 100644
--- a/pymc3/theanof.py
+++ b/pymc3/theanof.py
@@ -210,8 +210,8 @@ def reshape_t(x, shape):
 
 
 class CallableTensor(object):
-    """Turns a symbolic variable with one input into a function that returns symbolic arguments with the one variable replaced with the input.
-
+    """Turns a symbolic variable with one input into a function that returns symbolic arguments
+    with the one variable replaced with the input.
     """
 
     def __init__(self, tensor):

From 55fb1bddb563299088855c4ac824098468034d21 Mon Sep 17 00:00:00 2001
From: colin <colin@kensho.com>
Date: Sat, 3 Dec 2016 13:47:31 -0500
Subject: [PATCH 6/6] Reformat docs

---
 pymc3/step_methods/hmc/base_hmc.py      | 33 ++++++++--------
 pymc3/step_methods/hmc/hmc.py           | 14 +++----
 pymc3/step_methods/hmc/nuts.py          | 22 +++++------
 pymc3/step_methods/hmc/quadpotential.py | 18 ++++-----
 pymc3/step_methods/hmc/trajectory.py    | 52 ++++++++++++-------------
 5 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 7d87f5d3c8..0c594b09e8 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -12,24 +12,25 @@ class BaseHMC(ArrayStepShared):
 
     def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                  model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs):
-        """
+        """Superclass to implement Hamiltonian/hybrid monte carlo
+
         Parameters
         ----------
-            vars : list of theano variables
-            scaling : array_like, ndim = {1,2}
-                Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
-            step_scale : float, default=0.25
-                Size of steps to take, automatically scaled down by 1/n**(1/4)
-            is_cov : bool, default=False
-                Treat scaling as a covariance matrix/vector if True, else treat it as a
-                precision matrix/vector
-            state
-                State object
-            model : pymc3 Model instance.  default=Context model
-            blocked: Boolean, default True
-            use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
-                default False.
-            **theano_kwargs: passed to theano functions
+        vars : list of theano variables
+        scaling : array_like, ndim = {1,2}
+            Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
+        step_scale : float, default=0.25
+            Size of steps to take, automatically scaled down by 1/n**(1/4)
+        is_cov : bool, default=False
+            Treat scaling as a covariance matrix/vector if True, else treat it as a
+            precision matrix/vector
+        state
+            State object
+        model : pymc3 Model instance.  default=Context model
+        blocked: Boolean, default True
+        use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
+            default False.
+        **theano_kwargs: passed to theano functions
         """
         model = modelcontext(model)
 
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index 70a042f263..1e0b2c3dd4 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -27,13 +27,13 @@ def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs):
         """
         Parameters
         ----------
-            vars : list of theano variables
-            path_length : float, default=2
-                total length to travel
-            step_rand : function float -> float, default=unif
-                A function which takes the step size and returns an new one used to
-                randomize the step size at each iteration.
-            **kwargs : passed to BaseHMC
+        vars : list of theano variables
+        path_length : float, default=2
+            total length to travel
+        step_rand : function float -> float, default=unif
+            A function which takes the step size and returns an new one used to
+            randomize the step size at each iteration.
+        **kwargs : passed to BaseHMC
         """
         super(HamiltonianMC, self).__init__(vars, **kwargs)
         self.path_length = path_length
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index e22a3cb8ec..d171159cd9 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -28,17 +28,17 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
         """
         Parameters
         ----------
-            vars : list of Theano variables, default continuous vars
-            Emax : float, default 1000
-                maximum energy
-            target_accept : float (0,1) default .8
-                target for avg accept probability between final branch and initial position
-            gamma : float, default .05
-            k : float (.5,1) default .75
-                scaling of speed of adaptation
-            t0 : int, default 10
-                slows inital adapatation
-            kwargs: passed to BaseHMC
+        vars : list of Theano variables, default continuous vars
+        Emax : float, default 1000
+            maximum energy
+        target_accept : float (0,1) default .8
+            target for avg accept probability between final branch and initial position
+        gamma : float, default .05
+        k : float (.5,1) default .75
+            scaling of speed of adaptation
+        t0 : int, default 10
+            slows inital adapatation
+        kwargs: passed to BaseHMC
         """
         super(NUTS, self).__init__(vars, use_single_leapfrog=True, **kwargs)
 
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index e12fe1dcbc..0c85d2c214 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -14,18 +14,18 @@ def quad_potential(C, is_cov, as_cov):
     """
     Parameters
     ----------
-        C : arraylike, 0 <= ndim <= 2
-            scaling matrix for the potential
-            vector treated as diagonal matrix
-        is_cov : Boolean
-            whether C is provided as a covariance matrix or hessian
-        as_cov : Boolean
-            whether the random draws should come from the normal dist
-            using the covariance matrix above or the inverse
+    C : arraylike, 0 <= ndim <= 2
+        scaling matrix for the potential
+        vector treated as diagonal matrix
+    is_cov : Boolean
+        whether C is provided as a covariance matrix or hessian
+    as_cov : Boolean
+        whether the random draws should come from the normal dist
+        using the covariance matrix above or the inverse
 
     Returns
     -------
-        q : Quadpotential
+    q : Quadpotential
     """
 
     if issparse(C) and is_cov != as_cov:
diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py
index b64790a21a..ebc8c44d5c 100644
--- a/pymc3/step_methods/hmc/trajectory.py
+++ b/pymc3/step_methods/hmc/trajectory.py
@@ -14,15 +14,15 @@ def _theano_hamiltonian(model_vars, shared, logpt, potential):
 
     Parameters
     ----------
-    model_vars: array of variables to be sampled
-    shared theano tensors that are already shared
-    logpt: model log probability
-    potential: hamiltonian potential
+    model_vars : array of variables to be sampled
+    shared : theano tensors that are already shared
+    logpt : model log probability
+    potential : hamiltonian potential
 
     Returns
     -------
-    Hamiltonian: namedtuple with log pdf, gradient of log pdf, and potential functions
-    q: Starting position variable.
+    Hamiltonian : namedtuple with log pdf, gradient of log pdf, and potential functions
+    q : Starting position variable.
     """
     dlogp = gradient(logpt, model_vars)
     (logp, dlogp), q = join_nonshared_inputs([logpt, dlogp], model_vars, shared)
@@ -36,14 +36,14 @@ def _theano_energy_function(H, q, **theano_kwargs):
 
     Parameters
     ----------
-    H: Hamiltonian namedtuple
-    q: theano variable, starting position
-    theano_kwargs: passed to theano.function
+    H : Hamiltonian namedtuple
+    q : theano variable, starting position
+    theano_kwargs : passed to theano.function
 
     Returns
     -------
-    energy_function: theano function that computes the energy at a point (p, q) in phase space
-    p: Starting momentum variable.
+    energy_function : theano function that computes the energy at a point (p, q) in phase space
+    p : Starting momentum variable.
     """
     p = tt.dvector('p')
     p.tag.test_value = q.tag.test_value
@@ -91,19 +91,19 @@ def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential,
     Parameters
     ----------
     model_vars : array of variables to be sampled
-    shared theano tensors that are already shared
-    logpt: model log probability
-    potential: hamiltonian potential
+    shared : theano tensors that are already shared
+    logpt : model log probability
+    potential : Hamiltonian potential
     theano_kwargs : dictionary of keyword arguments to pass to theano functions
-    use_single_leapfrog: Boolean, if only 1 integration step is done at a time (as in NUTS),
-                         this provides a ~2x speedup
+    use_single_leapfrog : Boolean, if only 1 integration step is done at a time (as in NUTS),
+                          this provides a ~2x speedup
 
     Returns
     -------
-    H: Hamiltonian namedtuple
-    energy_function: theano function computing energy at a point in phase space
-    leapfrog_integrator: theano function integrating the Hamiltonian from a point in phase space
-    theano_variables: dictionary of variables used in the computation graph which may be useful
+    H : Hamiltonian namedtuple
+    energy_function : theano function computing energy at a point in phase space
+    leapfrog_integrator : theano function integrating the Hamiltonian from a point in phase space
+    theano_variables : dictionary of variables used in the computation graph which may be useful
     """
     H, q = _theano_hamiltonian(model_vars, shared, logpt, potential)
     energy_function, p = _theano_energy_function(H, q, **theano_kwargs)
@@ -137,18 +137,18 @@ def leapfrog(H, q, p, epsilon, n_steps):
     ----------
     H : Hamiltonian instance.
         Tuple of `logp, dlogp, potential`.
-    q: Theano.tensor
+    q : Theano.tensor
         initial position vector
-    p: Theano.tensor
+    p : Theano.tensor
         initial momentum vector
-    epsilon: float, step size
-    n_steps: int, number of iterations
+    epsilon : float, step size
+    n_steps : int, number of iterations
 
     Returns
     -------
-    position: Theano.tensor
+    position : Theano.tensor
         position estimate at time :math:`n \cdot e`.
-    momentum: Theano.tensor
+    momentum : Theano.tensor
         momentum estimate at time :math:`n \cdot e`.
     """
     def full_update(p, q):