From 2d6fee8405d3b3d68abb25495fe318f3fa9be06e Mon Sep 17 00:00:00 2001 From: Chris Fonnesbeck Date: Thu, 17 Nov 2016 13:43:51 -0500 Subject: [PATCH 1/6] Added mode argument to several step methods and advi to allow mode setting (e.g. nanguardmode) for Theano functions --- pymc3/step_methods/metropolis.py | 7 +++++-- pymc3/step_methods/nuts.py | 11 ++++++++--- pymc3/variational/advi.py | 9 ++++++--- pymc3/variational/advi_minibatch.py | 7 +++++-- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 6775eeb2f3..3645542f48 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -66,7 +66,8 @@ class Metropolis(ArrayStepShared): The frequency of tuning. Defaults to 100 iterations. model : PyMC Model Optional model for sampling step. Defaults to None (taken from context). - + mode : string or `Mode` instance. + compilation mode passed to Theano functions """ default_blocked = False @@ -93,6 +94,8 @@ def __init__(self, vars=None, S=None, proposal_dist=NormalProposal, scaling=1., [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() + + self.mode = mode shared = pm.make_shared_replacements(vars, model) self.delta_logp = delta_logp(model.logpt, vars, shared) @@ -430,6 +433,6 @@ def delta_logp(logp, vars, shared): logp1 = pm.CallableTensor(logp0)(inarray1) - f = theano.function([inarray1, inarray0], logp1 - logp0) + f = theano.function([inarray1, inarray0], logp1 - logp0, mode=self.mode) f.trust_input = True return f diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py index b72813156d..17404fb1e4 100644 --- a/pymc3/step_methods/nuts.py +++ b/pymc3/step_methods/nuts.py @@ -32,7 +32,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state k=0.75, t0=10, model=None, - profile=False, **kwargs): + profile=False, + mode=None, **kwargs): """ Parameters ---------- @@ -57,6 +58,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state model : Model profile : bool or ProfileStats sets the functions to be profiled + mode : string or `Mode` instance. + compilation mode passed to Theano functions """ model = modelcontext(model) @@ -90,6 +93,7 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state self.Hbar = 0 self.u = log(self.step_size * 10) self.m = 1 + self.mode = mode shared = make_shared_replacements(vars, model) @@ -106,7 +110,7 @@ def create_energy_func(q): p = tt.dvector('p') p.tag.test_value = q.tag.test_value E0 = energy(self.H, q, p) - E0_func = theano.function([q, p], E0) + E0_func = theano.function([q, p], E0, mode=self.mode) E0_func.trust_input = True return E0_func @@ -227,6 +231,7 @@ def leapfrog1_dE(H, q, profile): dE = E - E0 - f = theano.function([q, p, e, E0], [q1, p1, dE], profile=profile) + f = theano.function([q, p, e, E0], [q1, p1, dE], + profile=profile, mode=self.mode) f.trust_input = True return f diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py index 67889529b0..a89195e085 100644 --- a/pymc3/variational/advi.py +++ b/pymc3/variational/advi.py @@ -36,7 +36,8 @@ def gen_random_state(): def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, - optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None): + optimizer=None, learning_rate=.001, epsilon=.1, mode=None, + random_seed=None): """Perform automatic differentiation variational inference (ADVI). This function implements the meanfield ADVI, where the variational @@ -87,7 +88,9 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, This parameter is ignored when optimizer is given. random_seed : int or None Seed to initialize random state. None uses current seed. - + mode : string or `Mode` instance. + Compilation mode passed to Theano functions + Returns ------- ADVIFit @@ -137,7 +140,7 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, uw_shared = theano.shared(uw, 'uw_shared') elbo = pm.CallableTensor(elbo)(uw_shared) updates = optimizer(loss=-1 * elbo, param=[uw_shared]) - f = theano.function([], [uw_shared, elbo], updates=updates) + f = theano.function([], [uw_shared, elbo], updates=updates, mode=mode) # Optimization loop elbos = np.empty(n) diff --git a/pymc3/variational/advi_minibatch.py b/pymc3/variational/advi_minibatch.py index 79ed5a0638..097ff16825 100644 --- a/pymc3/variational/advi_minibatch.py +++ b/pymc3/variational/advi_minibatch.py @@ -207,7 +207,8 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, minibatch_RVs=None, minibatch_tensors=None, minibatches=None, local_RVs=None, observed_RVs=None, encoder_params=[], total_size=None, optimizer=None, - learning_rate=.001, epsilon=.1, random_seed=None): + learning_rate=.001, epsilon=.1, random_seed=None, + mode=None): """Perform mini-batch ADVI. This function implements a mini-batch ADVI with the meanfield @@ -297,6 +298,8 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, This parameter is ignored when an optimizer is given. random_seed : int Seed to initialize random state. + mode : string or `Mode` instance. + compilation mode passed to Theano functions Returns ------- @@ -382,7 +385,7 @@ def is_shared(t): if 0 < len(global_RVs): params += [uw_global_shared] updates = OrderedDict(optimizer(loss=-1 * elbo, param=params)) - f = theano.function(tensors, elbo, updates=updates) + f = theano.function(tensors, elbo, updates=updates, mode=mode) # Optimization loop elbos = np.empty(n) From 4e5b9c205de139c9ac6226eeb976377162414ba6 Mon Sep 17 00:00:00 2001 From: Chris Fonnesbeck Date: Thu, 17 Nov 2016 14:25:18 -0500 Subject: [PATCH 2/6] Fixed namespace bugs in mode attribute --- pymc3/step_methods/metropolis.py | 2 +- pymc3/step_methods/nuts.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 3645542f48..67b7873bb5 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -72,7 +72,7 @@ class Metropolis(ArrayStepShared): default_blocked = False def __init__(self, vars=None, S=None, proposal_dist=NormalProposal, scaling=1., - tune=True, tune_interval=100, model=None, **kwargs): + tune=True, tune_interval=100, model=None, mode=None, **kwargs): model = pm.modelcontext(model) diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py index 17404fb1e4..84bb3a90ba 100644 --- a/pymc3/step_methods/nuts.py +++ b/pymc3/step_methods/nuts.py @@ -118,7 +118,8 @@ def create_energy_func(q): self.H, q = create_hamiltonian(vars, shared, model) self.compute_energy = create_energy_func(q) - self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile) + self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile, + mode=self.mode) super(NUTS, self).__init__(vars, shared, **kwargs) @@ -204,7 +205,7 @@ def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0): return -def leapfrog1_dE(H, q, profile): +def leapfrog1_dE(H, q, profile, mode): """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory. Parameters ---------- @@ -232,6 +233,6 @@ def leapfrog1_dE(H, q, profile): dE = E - E0 f = theano.function([q, p, e, E0], [q1, p1, dE], - profile=profile, mode=self.mode) + profile=profile, mode=mode) f.trust_input = True return f From 0ebaacdee2fe34fb6451aede880d9dd5d69f0ccc Mon Sep 17 00:00:00 2001 From: Chris Fonnesbeck Date: Thu, 17 Nov 2016 16:40:37 -0500 Subject: [PATCH 3/6] Reverted function in delta_logp to not accept mode argument --- pymc3/step_methods/metropolis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 67b7873bb5..74f64a27d0 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -433,6 +433,6 @@ def delta_logp(logp, vars, shared): logp1 = pm.CallableTensor(logp0)(inarray1) - f = theano.function([inarray1, inarray0], logp1 - logp0, mode=self.mode) + f = theano.function([inarray1, inarray0], logp1 - logp0) f.trust_input = True return f From 55c8ce644ff762f591dd1df7b44c825005ecc066 Mon Sep 17 00:00:00 2001 From: Maxim Date: Mon, 28 Nov 2016 07:38:18 +0300 Subject: [PATCH 4/6] ENH User model (#1525) * Started to write Base class for pymc3.models * mode `add_var` to public api * Added some docstrings * Added some docstrings * added getitem and fixed a typo * added assertion check * added resolve var method * decided not to add resolve method * Added linear component * Docs fix * patsy's intercept is inited properly now * refactored code * updated docs * added possibility to init coefficients with random variables * added glm * refactored api, fixed formula init * refactored linear model, extended acceptable types * moved useful matrix and labels creation to utils file * code style * removed redundant evaluation of shape * refactored resolver for constructing matrix and labels * changed error message * changed signature of init * simplified utils any_to_tensor_and_labels code * tests for `any_to_tensor_and_labels` * added docstring for `any_to_tensor_and_labels` util * forgot to document return type in `any_to_tensor_and_labels` * refactored code for dict * dict tests fix(do not check labels there) * added access to random vars of model * added a shortcut for all variables so there is a unified way to get them * added default priors for linear model * update docs for linear * refactored UserModel api, made it more similar to pm.Model class * Lots of refactoring, tests for base class, more plain api design * deleted unused module variable * fixed some typos in docstring * Refactored pm.Model class, now it is ready for inheritance * Added documentation for Model class * Small typo in docstring * nested contains for treedict (needed for add_random_variable) * More accurate duplicate implementation of treedict/treelist * refactored treedict/treelist * changed `__imul__` of treelist * added `root` property and `isroot` indicator for base model * protect `parent` and `model` attributes from violation * travis' python2 did not fail on bad syntax(maybe it's too new), fixed * decided not to use functools wrapper Unfortunately functools wrapper fails when decorating built-in methods so I need to fix that improper behaviour. Some bad but needed tricks were implemented * Added models package to setup script * Refactor utils * Fix some typos in pm.model --- pymc3/model.py | 271 +++++++++++++++++++++++++++--- pymc3/models/__init__.py | 6 + pymc3/models/linear.py | 134 +++++++++++++++ pymc3/models/utils.py | 126 ++++++++++++++ pymc3/tests/test_model.py | 120 +++++++++++++ pymc3/tests/test_models_linear.py | 108 ++++++++++++ pymc3/tests/test_models_utils.py | 79 +++++++++ requirements.txt | 1 + setup.py | 5 +- 9 files changed, 829 insertions(+), 21 deletions(-) create mode 100644 pymc3/models/__init__.py create mode 100644 pymc3/models/linear.py create mode 100644 pymc3/models/utils.py create mode 100644 pymc3/tests/test_model.py create mode 100644 pymc3/tests/test_models_linear.py create mode 100644 pymc3/tests/test_models_utils.py diff --git a/pymc3/model.py b/pymc3/model.py index 5f079b5686..1220c5529d 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1,3 +1,4 @@ +import six import numpy as np import theano import theano.tensor as tt @@ -168,17 +169,215 @@ def logpt(self): return tt.sum(self.logp_elemwiset) -class Model(Context, Factor): - """Encapsulates the variables and likelihood factors of a model.""" +class InitContextMeta(type): + """Metaclass that executes `__init__` of instance in it's context""" + def __call__(cls, *args, **kwargs): + instance = cls.__new__(cls, *args, **kwargs) + with instance: # appends context + instance.__init__(*args, **kwargs) + return instance + + +def withparent(meth): + """Helper wrapper that passes calls to parent's instance""" + def wrapped(self, *args, **kwargs): + res = meth(self, *args, **kwargs) + if getattr(self, 'parent', None) is not None: + getattr(self.parent, meth.__name__)(*args, **kwargs) + return res + # Unfortunately functools wrapper fails + # when decorating built-in methods so we + # need to fix that improper behaviour + wrapped.__name__ = meth.__name__ + return wrapped + + +class treelist(list): + """A list that passes mutable extending operations used in Model + to parent list instance. + Extending treelist you will also extend its parent + """ + def __init__(self, iterable=(), parent=None): + super(treelist, self).__init__(iterable) + assert isinstance(parent, list) or parent is None + self.parent = parent + if self.parent is not None: + self.parent.extend(self) + # typechecking here works bad + append = withparent(list.append) + __iadd__ = withparent(list.__iadd__) + extend = withparent(list.extend) + + def tree_contains(self, item): + if isinstance(self.parent, treedict): + return (list.__contains__(self, item) or + self.parent.tree_contains(item)) + elif isinstance(self.parent, list): + return (list.__contains__(self, item) or + self.parent.__contains__(item)) + else: + return list.__contains__(self, item) + + def __setitem__(self, key, value): + raise NotImplementedError('Method is removed as we are not' + ' able to determine ' + 'appropriate logic for it') + + def __imul__(self, other): + t0 = len(self) + list.__imul__(self, other) + if self.parent is not None: + self.parent.extend(self[t0:]) + + +class treedict(dict): + """A dict that passes mutable extending operations used in Model + to parent dict instance. + Extending treedict you will also extend its parent + """ + def __init__(self, iterable=(), parent=None, **kwargs): + super(treedict, self).__init__(iterable, **kwargs) + assert isinstance(parent, dict) or parent is None + self.parent = parent + if self.parent is not None: + self.parent.update(self) + # typechecking here works bad + __setitem__ = withparent(dict.__setitem__) + update = withparent(dict.update) + + def tree_contains(self, item): + # needed for `add_random_variable` method + if isinstance(self.parent, treedict): + return (dict.__contains__(self, item) or + self.parent.tree_contains(item)) + elif isinstance(self.parent, dict): + return (dict.__contains__(self, item) or + self.parent.__contains__(item)) + else: + return dict.__contains__(self, item) + + +class Model(six.with_metaclass(InitContextMeta, Context, Factor)): + """Encapsulates the variables and likelihood factors of a model. - def __init__(self): - self.named_vars = {} - self.free_RVs = [] - self.observed_RVs = [] - self.deterministics = [] - self.potentials = [] - self.missing_values = [] - self.model = self + Model class can be used for creating class based models. To create + a class based model you should inherit from `Model` and + override `__init__` with arbitrary definitions + (do not forget to call base class `__init__` first). + + Parameters + ---------- + name : str, default '' - name that will be used as prefix for + names of all random variables defined within model + model : Model, default None - instance of Model that is + supposed to be a parent for the new instance. If None, + context will be used. All variables defined within instance + will be passed to the parent instance. So that 'nested' model + contributes to the variables and likelihood factors of + parent model. + + Examples + -------- + # How to define a custom model + class CustomModel(Model): + # 1) override init + def __init__(self, mean=0, sd=1, name='', model=None): + # 2) call super's init first, passing model and name to it + # name will be prefix for all variables here + # if no name specified for model there will be no prefix + super(CustomModel, self).__init__(name, model) + # now you are in the context of instance, + # `modelcontext` will return self + # you can define variables in several ways + # note, that all variables will get model's name prefix + + # 3) you can create variables with Var method + self.Var('v1', Normal.dist(mu=mean, sd=sd)) + # this will create variable named like '{prefix_}v1' + # and assign attribute 'v1' to instance + # created variable can be accessed with self.v1 or self['v1'] + + # 4) this syntax will also work as we are in the context + # of instance itself, names are given as usual + Normal('v2', mu=mean, sd=sd) + + # something more complex is allowed too + Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.)) + + # Deterministic variables can be used in usual way + Deterministic('v3_sq', self.v3 ** 2) + # Potentials too + Potential('p1', tt.constant(1)) + + # After defining a class CustomModel you can use it in several ways + + # I: + # state the model within a context + with Model() as model: + CustomModel() + # arbitrary actions + + # II: + # use new class as entering point in context + with CustomModel() as model: + Normal('new_normal_var', mu=1, sd=0) + + # III: + # just get model instance with all that was defined in it + model = CustomModel() + + # IV: + # use many custom models within one context + with Model() as model: + CustomModel(mean=1, name='first') + CustomModel(mean=2, name='second') + """ + def __new__(cls, *args, **kwargs): + # resolves the parent instance + instance = object.__new__(cls) + if kwargs.get('model') is not None: + instance._parent = kwargs.get('model') + elif cls.get_contexts(): + instance._parent = cls.get_contexts()[-1] + else: + instance._parent = None + return instance + + def __init__(self, name='', model=None): + self.name = name + if self.parent is not None: + self.named_vars = treedict(parent=self.parent.named_vars) + self.free_RVs = treelist(parent=self.parent.free_RVs) + self.observed_RVs = treelist(parent=self.parent.observed_RVs) + self.deterministics = treelist(parent=self.parent.deterministics) + self.potentials = treelist(parent=self.parent.potentials) + self.missing_values = treelist(parent=self.parent.missing_values) + else: + self.named_vars = treedict() + self.free_RVs = treelist() + self.observed_RVs = treelist() + self.deterministics = treelist() + self.potentials = treelist() + self.missing_values = treelist() + + @property + def model(self): + return self + + @property + def parent(self): + return self._parent + + @property + def root(self): + model = self + while not model.isroot: + model = model.parent + return model + + @property + def isroot(self): + return self.parent is None @property @memoize @@ -271,6 +470,7 @@ def Var(self, name, dist, data=None): ------- FreeRV or ObservedRV """ + name = self.name_for(name) if data is None: if getattr(dist, "transform", None) is None: var = FreeRV(name=name, distribution=dist, model=self) @@ -308,15 +508,46 @@ def Var(self, name, dist, data=None): def add_random_variable(self, var): """Add a random variable to the named variables of the model.""" - if var.name in self.named_vars: + if self.named_vars.tree_contains(var.name): raise ValueError( "Variable name {} already exists.".format(var.name)) self.named_vars[var.name] = var - if not hasattr(self, var.name): - setattr(self, var.name, var) + if not hasattr(self, self.name_of(var.name)): + setattr(self, self.name_of(var.name), var) + + @property + def prefix(self): + return '%s_' % self.name if self.name else '' + + def name_for(self, name): + """Checks if name has prefix and adds if needed + """ + if self.prefix: + if not name.startswith(self.prefix): + return '{}{}'.format(self.prefix, name) + else: + return name + else: + return name + + def name_of(self, name): + """Checks if name has prefix and deletes if needed + """ + if not self.prefix or not name: + return name + elif name.startswith(self.prefix): + return name[len(self.prefix):] + else: + return name def __getitem__(self, key): - return self.named_vars[key] + try: + return self.named_vars[key] + except KeyError as e: + try: + return self.named_vars[self.name_for(key)] + except KeyError: + raise e @memoize def makefn(self, outs, mode=None, *args, **kwargs): @@ -633,9 +864,10 @@ def Deterministic(name, var, model=None): ------- n : var but with name name """ - var.name = name - modelcontext(model).deterministics.append(var) - modelcontext(model).add_random_variable(var) + model = modelcontext(model) + var.name = model.name_for(name) + model.deterministics.append(var) + model.add_random_variable(var) return var @@ -651,8 +883,9 @@ def Potential(name, var, model=None): ------- var : var, with name attribute """ - var.name = name - modelcontext(model).potentials.append(var) + model = modelcontext(model) + var.name = model.name_for(name) + model.potentials.append(var) return var diff --git a/pymc3/models/__init__.py b/pymc3/models/__init__.py new file mode 100644 index 0000000000..17e6772687 --- /dev/null +++ b/pymc3/models/__init__.py @@ -0,0 +1,6 @@ +from .linear import LinearComponent, Glm + +__all__ = [ + 'LinearComponent', + 'Glm' +] diff --git a/pymc3/models/linear.py b/pymc3/models/linear.py new file mode 100644 index 0000000000..b6a224f2b4 --- /dev/null +++ b/pymc3/models/linear.py @@ -0,0 +1,134 @@ +import theano.tensor as tt +import pandas as pd +import numpy as np +from ..distributions import Normal, Flat +from ..glm import families +from ..model import Model, Deterministic +from .utils import any_to_tensor_and_labels + +__all__ = [ + 'LinearComponent', + 'Glm' +] + + +class LinearComponent(Model): + """Creates linear component, y_est is accessible via attribute + Parameters + ---------- + name : str - name, associated with the linear component + x : pd.DataFrame or np.ndarray + y : pd.Series or np.array + intercept : bool - fit with intercept or not? + labels : list - replace variable names with these labels + priors : dict - priors for coefficients + use `Intercept` key for defining Intercept prior + defaults to Flat.dist() + use `Regressor` key for defining default prior for all regressors + defaults to Normal.dist(mu=0, tau=1.0E-6) + vars : dict - random variables instead of creating new ones + """ + default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6) + default_intercept_prior = Flat.dist() + + def __init__(self, x, y, intercept=True, labels=None, + priors=None, vars=None, name='', model=None): + super(LinearComponent, self).__init__(name, model) + if priors is None: + priors = {} + if vars is None: + vars = {} + x, labels = any_to_tensor_and_labels(x, labels) + # now we have x, shape and labels + if intercept: + x = tt.concatenate( + [tt.ones((x.shape[0], 1), x.dtype), x], + axis=1 + ) + labels = ['Intercept'] + labels + coeffs = list() + for name in labels: + if name == 'Intercept': + if name in vars: + v = Deterministic(name, vars[name]) + else: + v = self.Var( + name=name, + dist=priors.get( + name, + self.default_intercept_prior + ) + ) + coeffs.append(v) + else: + if name in vars: + v = Deterministic(name, vars[name]) + else: + v = self.Var( + name=name, + dist=priors.get( + name, + priors.get( + 'Regressor', + self.default_regressor_prior + ) + ) + ) + coeffs.append(v) + self.coeffs = tt.stack(coeffs, axis=0) + self.y_est = x.dot(self.coeffs) + + @classmethod + def from_formula(cls, formula, data, priors=None, vars=None, name='', model=None): + import patsy + y, x = patsy.dmatrices(formula, data) + labels = x.design_info.column_names + return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels, + priors=priors, vars=vars, name=name, model=model) + + +class Glm(LinearComponent): + """Creates glm model, y_est is accessible via attribute + Parameters + ---------- + name : str - name, associated with the linear component + x : pd.DataFrame or np.ndarray + y : pd.Series or np.array + intercept : bool - fit with intercept or not? + labels : list - replace variable names with these labels + priors : dict - priors for coefficients + use `Intercept` key for defining Intercept prior + defaults to Flat.dist() + use `Regressor` key for defining default prior for all regressors + defaults to Normal.dist(mu=0, tau=1.0E-6) + init : dict - test_vals for coefficients + vars : dict - random variables instead of creating new ones + family : pymc3.glm.families object + """ + def __init__(self, x, y, intercept=True, labels=None, + priors=None, vars=None, family='normal', name='', model=None): + super(Glm, self).__init__( + x, y, intercept=intercept, labels=labels, + priors=priors, vars=vars, name=name, model=model + ) + + _families = dict( + normal=families.Normal, + student=families.StudentT, + binomial=families.Binomial, + poisson=families.Poisson + ) + if isinstance(family, str): + family = _families[family]() + self.y_est = family.create_likelihood( + name='', y_est=self.y_est, + y_data=y, model=self) + + @classmethod + def from_formula(cls, formula, data, priors=None, + vars=None, family='normal', name='', model=None): + import patsy + y, x = patsy.dmatrices(formula, data) + labels = x.design_info.column_names + return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels, + priors=priors, vars=vars, family=family, name=name, model=model) diff --git a/pymc3/models/utils.py b/pymc3/models/utils.py new file mode 100644 index 0000000000..ea073fcd1b --- /dev/null +++ b/pymc3/models/utils.py @@ -0,0 +1,126 @@ +import six +import pandas as pd +from pandas.core.common import PandasError +import numpy as np +import theano.tensor as tt + + +def any_to_tensor_and_labels(x, labels=None): + """Util for converting input x to tensor trying to + create labels for columns if they are not provided. + + Default names for columns are ['x0', 'x1', ...], for mappable + arrays (e.g. pd.DataFrame) their names are treated as labels. + You can override them with `labels` argument. + + If you have tensor input you should provide labels as we + cannot get their shape directly + + If you pass dict input we cannot rely on labels order thus dict + keys are treated as labels anyway + + Parameters + ---------- + x : np.ndarray | pd.DataFrame | tt.Variable | dict | list + labels : list - names for columns of output tensor + + Returns + ------- + (x, labels) - tensor and labels for its columns + """ + if isinstance(labels, six.string_types): + labels = [labels] + # pandas.DataFrame + # labels can come from here + # we can override them + if isinstance(x, pd.DataFrame): + if not labels: + labels = x.columns + x = x.as_matrix() + + # pandas.Series + # there can still be a label + # we can override labels + elif isinstance(x, pd.Series): + if not labels: + labels = [x.name] + x = x.as_matrix()[:, None] + + # dict + # labels are keys, + # cannot override them + elif isinstance(x, dict): + # try to do it via pandas + try: + x = pd.DataFrame.from_dict(x) + labels = x.columns + x = x.as_matrix() + # some types fail there + # another approach is to construct + # variable by hand + except (PandasError, TypeError): + res = [] + labels = [] + for k, v in x.items(): + res.append(v) + labels.append(k) + x = tt.stack(res, axis=1) + if x.ndim == 1: + x = x[:, None] + # case when it can appear to be some + # array like value like lists of lists + # numpy deals with it + elif not isinstance(x, tt.Variable): + x = np.asarray(x) + if x.ndim == 0: + raise ValueError('Cannot use scalars') + elif x.ndim == 1: + x = x[:, None] + # something really strange goes here, + # but user passes labels trusting seems + # to be a good option + elif labels is not None: + x = tt.as_tensor_variable(x) + if x.ndim == 0: + raise ValueError('Cannot use scalars') + elif x.ndim == 1: + x = x[:, None] + else: # trust input + pass + # we should check that we can extract labels + if labels is None and not isinstance(x, tt.Variable): + labels = ['x%d' % i for i in range(x.shape[1])] + # for theano variables we should have labels from user + elif labels is None: + raise ValueError('Please provide labels as ' + 'we cannot infer shape of input') + else: # trust labels, user knows what he is doing + pass + # it's time to check shapes if we can + if not isinstance(x, tt.Variable): + if not len(labels) == x.shape[1]: + raise ValueError( + 'Please provide full list ' + 'of labels for coefficients, ' + 'got len(labels)=%d instead of %d' + % (len(labels), x.shape[1]) + ) + else: + # trust labels, as we raised an + # error in bad case, we have labels + pass + # convert labels to list + if isinstance(labels, pd.RangeIndex): + labels = ['x%d' % i for i in labels] + # maybe it was a tuple ot whatever + elif not isinstance(labels, list): + labels = list(labels) + # as output we need tensor + if not isinstance(x, tt.Variable): + x = tt.as_tensor_variable(x) + # finally check dimensions + if x.ndim == 0: + raise ValueError('Cannot use scalars') + elif x.ndim == 1: + x = x[:, None] + return x, labels diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py new file mode 100644 index 0000000000..4eb96e6789 --- /dev/null +++ b/pymc3/tests/test_model.py @@ -0,0 +1,120 @@ +import unittest +import theano.tensor as tt +import pymc3 as pm +from pymc3.distributions import HalfCauchy, Normal +from pymc3 import Potential, Deterministic + + +class NewModel(pm.Model): + def __init__(self, name='', model=None): + super(NewModel, self).__init__(name, model) + assert pm.modelcontext(None) is self + # 1) init variables with Var method + self.Var('v1', pm.Normal.dist()) + self.v2 = pm.Normal('v2', mu=0, sd=1) + # 2) Potentials and Deterministic variables with method too + # be sure that names will not overlap with other same models + pm.Deterministic('d', tt.constant(1)) + pm.Potential('p', tt.constant(1)) + + +class DocstringModel(pm.Model): + def __init__(self, mean=0, sd=1, name='', model=None): + super(DocstringModel, self).__init__(name, model) + self.Var('v1', Normal.dist(mu=mean, sd=sd)) + Normal('v2', mu=mean, sd=sd) + Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.)) + Deterministic('v3_sq', self.v3 ** 2) + Potential('p1', tt.constant(1)) + + +class TestBaseModel(unittest.TestCase): + def test_setattr_properly_works(self): + with pm.Model() as model: + pm.Normal('v1') + self.assertEqual(len(model.vars), 1) + with pm.Model('sub') as submodel: + submodel.Var('v1', pm.Normal.dist()) + self.assertTrue(hasattr(submodel, 'v1')) + self.assertEqual(len(submodel.vars), 1) + self.assertEqual(len(model.vars), 2) + with submodel: + submodel.Var('v2', pm.Normal.dist()) + self.assertTrue(hasattr(submodel, 'v2')) + self.assertEqual(len(submodel.vars), 2) + self.assertEqual(len(model.vars), 3) + + def test_context_passes_vars_to_parent_model(self): + with pm.Model() as model: + # a set of variables is created + NewModel() + # another set of variables are created but with prefix 'another' + usermodel2 = NewModel(name='another') + # you can enter in a context with submodel + with usermodel2: + usermodel2.Var('v3', pm.Normal.dist()) + pm.Normal('v4') + # this variable is created in parent model too + self.assertIn('another_v2', model.named_vars) + self.assertIn('another_v3', model.named_vars) + self.assertIn('another_v3', usermodel2.named_vars) + self.assertIn('another_v4', model.named_vars) + self.assertIn('another_v4', usermodel2.named_vars) + self.assertTrue(hasattr(usermodel2, 'v3')) + self.assertTrue(hasattr(usermodel2, 'v2')) + self.assertTrue(hasattr(usermodel2, 'v4')) + # When you create a class based model you should follow some rules + with model: + m = NewModel('one_more') + self.assertTrue(m.d is model['one_more_d']) + self.assertTrue(m['d'] is model['one_more_d']) + self.assertTrue(m['one_more_d'] is model['one_more_d']) + + +class TestNested(unittest.TestCase): + def test_nest_context_works(self): + with pm.Model() as m: + new = NewModel() + with new: + self.assertTrue( + pm.modelcontext(None) is new + ) + self.assertTrue( + pm.modelcontext(None) is m + ) + self.assertIn('v1', m.named_vars) + self.assertIn('v2', m.named_vars) + + def test_named_context(self): + with pm.Model() as m: + NewModel(name='new') + self.assertIn('new_v1', m.named_vars) + self.assertIn('new_v2', m.named_vars) + + def test_docstring_example1(self): + usage1 = DocstringModel() + self.assertIn('v1', usage1.named_vars) + self.assertIn('v2', usage1.named_vars) + self.assertIn('v3', usage1.named_vars) + self.assertIn('v3_sq', usage1.named_vars) + self.assertTrue(len(usage1.potentials), 1) + + def test_docstring_example2(self): + with pm.Model() as model: + DocstringModel(name='prefix') + self.assertIn('prefix_v1', model.named_vars) + self.assertIn('prefix_v2', model.named_vars) + self.assertIn('prefix_v3', model.named_vars) + self.assertIn('prefix_v3_sq', model.named_vars) + self.assertTrue(len(model.potentials), 1) + + def test_duplicates_detection(self): + with pm.Model(): + DocstringModel(name='prefix') + self.assertRaises(ValueError, DocstringModel, name='prefix') + + def test_model_root(self): + with pm.Model() as model: + self.assertTrue(model is model.root) + with pm.Model() as sub: + self.assertTrue(model is sub.root) diff --git a/pymc3/tests/test_models_linear.py b/pymc3/tests/test_models_linear.py new file mode 100644 index 0000000000..fac66fc0de --- /dev/null +++ b/pymc3/tests/test_models_linear.py @@ -0,0 +1,108 @@ +import numpy as np +from .helpers import SeededTest +from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample +from pymc3.models.linear import LinearComponent, Glm + + +# Generate data +def generate_data(intercept, slope, size=700): + x = np.linspace(-1, 1, size) + y = intercept + x * slope + return x, y + + +class TestGLM(SeededTest): + @classmethod + def setUpClass(cls): + super(TestGLM, cls).setUpClass() + cls.intercept = 1 + cls.slope = 3 + cls.sd = .05 + x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000) + cls.y_linear += np.random.normal(size=1000, scale=cls.sd) + cls.data_linear = dict(x=x_linear, y=cls.y_linear) + + x_logistic, y_logistic = generate_data(cls.intercept, cls.slope, size=3000) + y_logistic = 1 / (1 + np.exp(-y_logistic)) + bern_trials = [np.random.binomial(1, i) for i in y_logistic] + cls.data_logistic = dict(x=x_logistic, y=bern_trials) + + def test_linear_component(self): + vars_to_create = { + 'sigma_interval_', + 'y_obs', + 'lm_x0', + 'lm_Intercept' + } + with Model() as model: + lm = LinearComponent( + self.data_linear['x'], + self.data_linear['y'], + name='lm' + ) # yields lm_x0, lm_Intercept + sigma = Uniform('sigma', 0, 20) # yields sigma_interval_ + Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) # yields y_obs + start = find_MAP(vars=[sigma]) + step = Slice(model.vars) + trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed) + + self.assertAlmostEqual(np.mean(trace['lm_Intercept']), self.intercept, 1) + self.assertAlmostEqual(np.mean(trace['lm_x0']), self.slope, 1) + self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1) + self.assertSetEqual(vars_to_create, set(model.named_vars.keys())) + + def test_linear_component_from_formula(self): + with Model() as model: + lm = LinearComponent.from_formula('y ~ x', self.data_linear) + sigma = Uniform('sigma', 0, 20) + Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) + start = find_MAP(vars=[sigma]) + step = Slice(model.vars) + trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed) + + self.assertAlmostEqual(np.mean(trace['Intercept']), self.intercept, 1) + self.assertAlmostEqual(np.mean(trace['x']), self.slope, 1) + self.assertAlmostEqual(np.mean(trace['sigma']), self.sd, 1) + + def test_glm(self): + with Model() as model: + vars_to_create = { + 'glm_sd_log_', + 'glm_y', + 'glm_x0', + 'glm_Intercept' + } + Glm( + self.data_linear['x'], + self.data_linear['y'], + name='glm' + ) + start = find_MAP() + step = Slice(model.vars) + trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed) + self.assertAlmostEqual(np.mean(trace['glm_Intercept']), self.intercept, 1) + self.assertAlmostEqual(np.mean(trace['glm_x0']), self.slope, 1) + self.assertAlmostEqual(np.mean(trace['glm_sd']), self.sd, 1) + self.assertSetEqual(vars_to_create, set(model.named_vars.keys())) + + def test_glm_from_formula(self): + with Model() as model: + NAME = 'glm' + Glm.from_formula('y ~ x', self.data_linear, name=NAME) + start = find_MAP() + step = Slice(model.vars) + trace = sample(500, step, start, progressbar=False, random_seed=self.random_seed) + + self.assertAlmostEqual(np.mean(trace['%s_Intercept' % NAME]), self.intercept, 1) + self.assertAlmostEqual(np.mean(trace['%s_x' % NAME]), self.slope, 1) + self.assertAlmostEqual(np.mean(trace['%s_sd' % NAME]), self.sd, 1) + + def test_strange_types(self): + with Model(): + self.assertRaises( + ValueError, + Glm, + 1, + self.data_linear['y'], + name='lm' + ) diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py new file mode 100644 index 0000000000..160846bcbc --- /dev/null +++ b/pymc3/tests/test_models_utils.py @@ -0,0 +1,79 @@ +import unittest +import numpy as np +import pandas as pd +import theano.tensor as tt +import pymc3.models.utils as utils + + +class TestUtils(unittest.TestCase): + def setUp(self): + self.data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6])) + + def assertMatrixLabels(self, m, l, mt=None, lt=None): + self.assertTrue( + np.all( + np.equal( + m.eval(), + mt if mt is not None else self.data.as_matrix() + ) + ) + ) + self.assertEqual(l, list(lt or self.data.columns)) + + def test_numpy_init(self): + m, l = utils.any_to_tensor_and_labels(self.data.as_matrix()) + self.assertMatrixLabels(m, l, lt=['x0', 'x1']) + m, l = utils.any_to_tensor_and_labels(self.data.as_matrix(), labels=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + + def test_pandas_init(self): + m, l = utils.any_to_tensor_and_labels(self.data) + self.assertMatrixLabels(m, l) + m, l = utils.any_to_tensor_and_labels(self.data, labels=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + + def test_dict_input(self): + m, l = utils.any_to_tensor_and_labels(self.data.to_dict('dict')) + self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) + + m, l = utils.any_to_tensor_and_labels(self.data.to_dict('series')) + self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) + + m, l = utils.any_to_tensor_and_labels(self.data.to_dict('list')) + self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) + + inp = {k: tt.as_tensor_variable(v) for k, v in self.data.to_dict('series').items()} + m, l = utils.any_to_tensor_and_labels(inp) + self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) + + def test_list_input(self): + m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist()) + self.assertMatrixLabels(m, l, lt=['x0', 'x1']) + m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + + def test_tensor_input(self): + m, l = utils.any_to_tensor_and_labels( + tt.as_tensor_variable(self.data.as_matrix().tolist()), + labels=['x0', 'x1'] + ) + self.assertMatrixLabels(m, l, lt=['x0', 'x1']) + m, l = utils.any_to_tensor_and_labels( + tt.as_tensor_variable(self.data.as_matrix().tolist()), + labels=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + + def test_user_mistakes(self): + # no labels for tensor variable + self.assertRaises( + ValueError, + utils.any_to_tensor_and_labels, + tt.as_tensor_variable(self.data.as_matrix().tolist()) + ) + # len of labels is bad + self.assertRaises( + ValueError, + utils.any_to_tensor_and_labels, + self.data.as_matrix().tolist(), + labels=['x'] + ) diff --git a/requirements.txt b/requirements.txt index fc6625c5d9..2e53afc286 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ recommonmark sphinx nbsphinx numpydoc +six diff --git a/setup.py b/setup.py index 708cfb1006..017c28898a 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ DISTNAME = 'pymc3' DESCRIPTION = "PyMC3" -LONG_DESCRIPTION = """Bayesian estimation, particularly using Markov chain Monte Carlo (MCMC), is an increasingly relevant approach to statistical estimation. However, few statistical software packages implement MCMC samplers, and they are non-trivial to code by hand. ``pymc3`` is a python package that implements the Metropolis-Hastings algorithm as a python class, and is extremely flexible and applicable to a large suite of problems. ``pymc3`` includes methods for summarizing output, plotting, goodness-of-fit and convergence diagnostics.""" +LONG_DESCRIPTION = """Bayesian estimation, particularly using Markov chain Monte Carlo (MCMC), is an increasingly relevant approach to statistical estimation. However, few statistical software packages implement MCMC samplers, and they are non-trivial to code by hand. ``pymc3`` is a python package that implements the Metropolis-Hastings algorithm as a python class, and is extremely flexible and applicable to a large suite of problems. ``pymc3`` includes methods for summarizing output, plotting, goodness-of-fit and convergence diagnostics.""" MAINTAINER = 'Thomas Wiecki' MAINTAINER_EMAIL = 'thomas.wiecki@gmail.com' AUTHOR = 'John Salvatier and Christopher Fonnesbeck' @@ -50,7 +50,8 @@ packages=['pymc3', 'pymc3.distributions', 'pymc3.step_methods', 'pymc3.tuning', 'pymc3.tests', 'pymc3.glm', 'pymc3.examples', - 'pymc3.backends', 'pymc3.variational', 'docs', '.'], + 'pymc3.models', 'pymc3.backends', + 'pymc3.variational', 'docs', '.'], classifiers=classifiers, install_requires=install_reqs, tests_require=test_reqs, From d6f57a7333801aa126f9df85f2b6f45c4aacc5a7 Mon Sep 17 00:00:00 2001 From: colin Date: Tue, 8 Nov 2016 00:29:28 -0500 Subject: [PATCH 5/6] Refactor Hamiltonian methods into single class --- pymc3/step_methods/__init__.py | 4 +- pymc3/step_methods/arraystep.py | 29 ++- pymc3/step_methods/hmc.py | 127 ---------- pymc3/step_methods/hmc/__init__.py | 2 + pymc3/step_methods/hmc/base_hmc.py | 57 +++++ pymc3/step_methods/hmc/hmc.py | 56 +++++ pymc3/step_methods/hmc/nuts.py | 132 ++++++++++ pymc3/step_methods/{ => hmc}/quadpotential.py | 0 pymc3/step_methods/hmc/trajectory.py | 184 ++++++++++++++ pymc3/step_methods/nuts.py | 238 ------------------ pymc3/tests/models.py | 2 +- pymc3/tests/test_hmc.py | 29 +-- pymc3/tests/test_step.py | 131 +++++++++- pymc3/theanof.py | 4 +- 14 files changed, 589 insertions(+), 406 deletions(-) delete mode 100644 pymc3/step_methods/hmc.py create mode 100644 pymc3/step_methods/hmc/__init__.py create mode 100644 pymc3/step_methods/hmc/base_hmc.py create mode 100644 pymc3/step_methods/hmc/hmc.py create mode 100644 pymc3/step_methods/hmc/nuts.py rename pymc3/step_methods/{ => hmc}/quadpotential.py (100%) create mode 100644 pymc3/step_methods/hmc/trajectory.py delete mode 100644 pymc3/step_methods/nuts.py diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py index ccb5e22c47..07ee4a453c 100644 --- a/pymc3/step_methods/__init__.py +++ b/pymc3/step_methods/__init__.py @@ -1,6 +1,6 @@ from .compound import CompoundStep -from .hmc import HamiltonianMC +from .hmc import HamiltonianMC, NUTS from .metropolis import Metropolis from .metropolis import BinaryMetropolis @@ -15,5 +15,3 @@ from .gibbs import ElemwiseCategorical from .slicer import Slice - -from .nuts import NUTS \ No newline at end of file diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index c0619ae653..da83191139 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -4,7 +4,6 @@ from ..blocking import ArrayOrdering, DictToArrayBijection import numpy as np from numpy.random import uniform -from numpy import log, isfinite from enum import IntEnum, unique __all__ = ['ArrayStep', 'ArrayStepShared', 'metrop_select', 'SamplerHist', @@ -90,9 +89,9 @@ class ArrayStep(BlockedStep): ---------- vars : list List of variables for sampler. + fs: list of logp theano functions allvars: Boolean (default False) blocked: Boolean (default True) - fs: logp theano function """ def __init__(self, vars, fs, allvars=False, blocked=True): @@ -114,10 +113,11 @@ def step(self, point): class ArrayStepShared(BlockedStep): - """Faster version of ArrayStep that requires the substep method that does not wrap the functions the step method uses. + """Faster version of ArrayStep that requires the substep method that does not wrap + the functions the step method uses. - Works by setting shared variables before using the step. This eliminates the mapping and unmapping overhead as well - as moving fewer variables around. + Works by setting shared variables before using the step. This eliminates the mapping + and unmapping overhead as well as moving fewer variables around. """ def __init__(self, vars, shared, blocked=True): @@ -144,14 +144,25 @@ def step(self, point): def metrop_select(mr, q, q0): - # Perform rejection/acceptance step for Metropolis class samplers + """Perform rejection/acceptance step for Metropolis class samplers. + Returns the new sample q if a uniform random number is less than the + metropolis acceptance rate (`mr`), and the old sample otherwise. + + Parameters + ---------- + mr : float, Metropolis acceptance rate + q : proposed sample + q0 : current sample + + Returns + ------- + q or q0 + """ # Compare acceptance ratio to uniform random number - if isfinite(mr) and log(uniform()) < mr: - # Accept proposed value + if np.isfinite(mr) and np.log(uniform()) < mr: return q else: - # Reject proposed value return q0 diff --git a/pymc3/step_methods/hmc.py b/pymc3/step_methods/hmc.py deleted file mode 100644 index 295b6122d0..0000000000 --- a/pymc3/step_methods/hmc.py +++ /dev/null @@ -1,127 +0,0 @@ -''' -Created on Mar 7, 2011 - -@author: johnsalvatier -''' -from numpy import floor -from .quadpotential import quad_potential -from .arraystep import ArrayStep, SamplerHist, metrop_select, Competence -from ..tuning import guess_scaling -from ..model import modelcontext, Point -from ..theanof import inputvars -from ..vartypes import discrete_types - -import numpy as np -from scipy.sparse import issparse - -from collections import namedtuple - -__all__ = ['HamiltonianMC'] - -# TODO: -# add constraint handling via page 37 of Radford's -# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html - - -def unif(step_size, elow=.85, ehigh=1.15): - return np.random.uniform(elow, ehigh) * step_size - - -class HamiltonianMC(ArrayStep): - default_blocked = True - - def __init__(self, vars=None, scaling=None, step_scale=.25, path_length=2., is_cov=False, step_rand=unif, state=None, model=None, **kwargs): - """ - Parameters - ---------- - vars : list of theano variables - scaling : array_like, ndim = {1,2} - Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. - step_scale : float, default=.25 - Size of steps to take, automatically scaled down by 1/n**(1/4) (defaults to .25) - path_length : float, default=2 - total length to travel - is_cov : bool, default=False - Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector - step_rand : function float -> float, default=unif - A function which takes the step size and returns an new one used to randomize the step size at each iteration. - state - State object - model : Model - """ - model = modelcontext(model) - - if vars is None: - vars = model.cont_vars - vars = inputvars(vars) - - if scaling is None: - scaling = model.test_point - - if isinstance(scaling, dict): - scaling = guess_scaling(Point(scaling, model=model), model=model) - - n = scaling.shape[0] - - self.step_size = step_scale / n ** (1 / 4.) - - self.potential = quad_potential(scaling, is_cov, as_cov=False) - - self.path_length = path_length - self.step_rand = step_rand - - if state is None: - state = SamplerHist() - self.state = state - - super(HamiltonianMC, self).__init__( - vars, [model.fastlogp, model.fastdlogp(vars)], **kwargs) - - def astep(self, q0, logp, dlogp): - H = Hamiltonian(logp, dlogp, self.potential) - - e = self.step_rand(self.step_size) - nstep = int(self.path_length / e) - - p0 = H.pot.random() - - q, p = leapfrog(H, q0, p0, nstep, e) - p = -p - - mr = energy(H, q0, p0) - energy(H, q, p) - - self.state.metrops.append(mr) - - return metrop_select(mr, q, q0) - - @staticmethod - def competence(var): - if var.dtype in discrete_types: - return Competence.INCOMPATIBLE - return Competence.COMPATIBLE - - -def bern(p): - return np.random.uniform() < p - -Hamiltonian = namedtuple("Hamiltonian", "logp, dlogp, pot") - - -def energy(H, q, p): - return -(H.logp(q) - H.pot.energy(p)) - - -def leapfrog(H, q, p, n, e): - _, dlogp, pot = H - - p = p - (e / 2) * -dlogp(q) # half momentum update - - for i in range(n): - # alternate full variable and momentum updates - q = q + e * pot.velocity(p) - - if i != n - 1: - p = p - e * -dlogp(q) - - p = p - (e / 2) * -dlogp(q) # do a half step momentum update to finish off - return q, p diff --git a/pymc3/step_methods/hmc/__init__.py b/pymc3/step_methods/hmc/__init__.py new file mode 100644 index 0000000000..07fa92af67 --- /dev/null +++ b/pymc3/step_methods/hmc/__init__.py @@ -0,0 +1,2 @@ +from .hmc import HamiltonianMC +from .nuts import NUTS diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py new file mode 100644 index 0000000000..7d87f5d3c8 --- /dev/null +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -0,0 +1,57 @@ +from ..arraystep import ArrayStepShared +from .trajectory import get_theano_hamiltonian_functions + +from pymc3.tuning import guess_scaling +from pymc3.model import modelcontext, Point +from .quadpotential import quad_potential +from pymc3.theanof import inputvars, make_shared_replacements + + +class BaseHMC(ArrayStepShared): + default_blocked = True + + def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, + model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs): + """ + Parameters + ---------- + vars : list of theano variables + scaling : array_like, ndim = {1,2} + Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. + step_scale : float, default=0.25 + Size of steps to take, automatically scaled down by 1/n**(1/4) + is_cov : bool, default=False + Treat scaling as a covariance matrix/vector if True, else treat it as a + precision matrix/vector + state + State object + model : pymc3 Model instance. default=Context model + blocked: Boolean, default True + use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. + default False. + **theano_kwargs: passed to theano functions + """ + model = modelcontext(model) + + if vars is None: + vars = model.cont_vars + vars = inputvars(vars) + + if scaling is None: + scaling = model.test_point + + if isinstance(scaling, dict): + scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) + + n = scaling.shape[0] + self.step_size = step_scale / (n ** 0.25) + self.potential = quad_potential(scaling, is_cov, as_cov=False) + + shared = make_shared_replacements(vars, model) + if theano_kwargs is None: + theano_kwargs = {} + + self.H, self.compute_energy, self.leapfrog, self._vars = get_theano_hamiltonian_functions( + vars, shared, model.logpt, self.potential, use_single_leapfrog, **theano_kwargs) + + super(BaseHMC, self).__init__(vars, shared, blocked=blocked) diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py new file mode 100644 index 0000000000..70a042f263 --- /dev/null +++ b/pymc3/step_methods/hmc/hmc.py @@ -0,0 +1,56 @@ +''' +Created on Mar 7, 2011 + +@author: johnsalvatier +''' +from ..arraystep import metrop_select, Competence +from .base_hmc import BaseHMC +from pymc3.vartypes import discrete_types +import numpy as np + + +__all__ = ['HamiltonianMC'] + +# TODO: +# add constraint handling via page 37 of Radford's +# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html + + +def unif(step_size, elow=.85, ehigh=1.15): + return np.random.uniform(elow, ehigh) * step_size + + +class HamiltonianMC(BaseHMC): + default_blocked = True + + def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs): + """ + Parameters + ---------- + vars : list of theano variables + path_length : float, default=2 + total length to travel + step_rand : function float -> float, default=unif + A function which takes the step size and returns an new one used to + randomize the step size at each iteration. + **kwargs : passed to BaseHMC + """ + super(HamiltonianMC, self).__init__(vars, **kwargs) + self.path_length = path_length + self.step_rand = step_rand + + def astep(self, q0): + e = np.array(self.step_rand(self.step_size)) + n_steps = np.array(self.path_length / e, dtype='int32') + q = q0 + p = self.H.pot.random() # initialize momentum + initial_energy = self.compute_energy(q, p) + q, p, current_energy = self.leapfrog(q, p, e, n_steps) + energy_change = current_energy - initial_energy + return metrop_select(energy_change, q, q0) + + @staticmethod + def competence(var): + if var.dtype in discrete_types: + return Competence.INCOMPATIBLE + return Competence.COMPATIBLE diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py new file mode 100644 index 0000000000..e22a3cb8ec --- /dev/null +++ b/pymc3/step_methods/hmc/nuts.py @@ -0,0 +1,132 @@ +from ..arraystep import Competence +from .base_hmc import BaseHMC +from pymc3.vartypes import continuous_types + +import numpy as np +import numpy.random as nr + +__all__ = ['NUTS'] + + +def bern(p): + return np.random.uniform() < p + + +class NUTS(BaseHMC): + """ + Automatically tunes step size and adjust number of steps for good performance. + + Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in: + + Hoffman, Matthew D., & Gelman, Andrew. (2011). + The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo. + """ + default_blocked = True + + def __init__(self, vars=None, Emax=1000, target_accept=0.8, + gamma=0.05, k=0.75, t0=10, **kwargs): + """ + Parameters + ---------- + vars : list of Theano variables, default continuous vars + Emax : float, default 1000 + maximum energy + target_accept : float (0,1) default .8 + target for avg accept probability between final branch and initial position + gamma : float, default .05 + k : float (.5,1) default .75 + scaling of speed of adaptation + t0 : int, default 10 + slows inital adapatation + kwargs: passed to BaseHMC + """ + super(NUTS, self).__init__(vars, use_single_leapfrog=True, **kwargs) + + self.Emax = Emax + + self.target_accept = target_accept + self.gamma = gamma + self.k = k + self.t0 = t0 + + self.Hbar = 0 + self.u = np.log(self.step_size * 10) + self.m = 1 + + def astep(self, q0): + Emax = self.Emax + e = self.step_size + + p0 = self.potential.random() + E0 = self.compute_energy(q0, p0) + + u = nr.uniform() + q = qn = qp = q0 + p = pn = pp = p0 + + n, s, j = 1, 1, 0 + + while s == 1: + v = bern(0.5) * 2 - 1 + + if v == -1: + qn, pn, _, _, q1, n1, s1, a, na = buildtree( + self.leapfrog, qn, pn, u, v, j, e, Emax, E0) + else: + _, _, qp, pp, q1, n1, s1, a, na = buildtree( + self.leapfrog, qp, pp, u, v, j, e, Emax, E0) + + if s1 == 1 and bern(min(1, n1 * 1. / n)): + q = q1 + + n = n + n1 + + span = qp - qn + s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0) + j = j + 1 + + p = -p + + w = 1. / (self.m + self.t0) + self.Hbar = (1 - w) * self.Hbar + w * \ + (self.target_accept - a * 1. / na) + + self.step_size = np.exp(self.u - (self.m**self.k / self.gamma) * self.Hbar) + self.m += 1 + + return q + + @staticmethod + def competence(var): + if var.dtype in continuous_types: + return Competence.IDEAL + return Competence.INCOMPATIBLE + + +def buildtree(leapfrog, q, p, u, v, j, e, Emax, E0): + if j == 0: + q1, p1, E = leapfrog(q, p, np.array(v * e)) + dE = E - E0 + + n1 = int(np.log(u) + dE <= 0) + s1 = int(np.log(u) + dE < Emax) + return q1, p1, q1, p1, q1, n1, s1, min(1, np.exp(-dE)), 1 + qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree(leapfrog, q, p, u, v, j - 1, e, Emax, E0) + if s1 == 1: + if v == -1: + qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree( + leapfrog, qn, pn, u, v, j - 1, e, Emax, E0) + else: + _, _, qp, pp, q11, n11, s11, a11, na11 = buildtree( + leapfrog, qp, pp, u, v, j - 1, e, Emax, E0) + + if bern(n11 * 1. / (max(n1 + n11, 1))): + q1 = q11 + + a1 = a1 + a11 + na1 = na1 + na11 + + span = qp - qn + s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0) + n1 = n1 + n11 + return qn, pn, qp, pp, q1, n1, s1, a1, na1 diff --git a/pymc3/step_methods/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py similarity index 100% rename from pymc3/step_methods/quadpotential.py rename to pymc3/step_methods/hmc/quadpotential.py diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py new file mode 100644 index 0000000000..b64790a21a --- /dev/null +++ b/pymc3/step_methods/hmc/trajectory.py @@ -0,0 +1,184 @@ +from collections import namedtuple + +from pymc3.theanof import join_nonshared_inputs, gradient, CallableTensor + +import theano +import theano.tensor as tt + + +Hamiltonian = namedtuple("Hamiltonian", "logp, dlogp, pot") + + +def _theano_hamiltonian(model_vars, shared, logpt, potential): + """Creates a Hamiltonian with shared inputs. + + Parameters + ---------- + model_vars: array of variables to be sampled + shared theano tensors that are already shared + logpt: model log probability + potential: hamiltonian potential + + Returns + ------- + Hamiltonian: namedtuple with log pdf, gradient of log pdf, and potential functions + q: Starting position variable. + """ + dlogp = gradient(logpt, model_vars) + (logp, dlogp), q = join_nonshared_inputs([logpt, dlogp], model_vars, shared) + logp = CallableTensor(logp) + dlogp = CallableTensor(dlogp) + return Hamiltonian(logp, dlogp, potential), q + + +def _theano_energy_function(H, q, **theano_kwargs): + """Creates a Hamiltonian with shared inputs. + + Parameters + ---------- + H: Hamiltonian namedtuple + q: theano variable, starting position + theano_kwargs: passed to theano.function + + Returns + ------- + energy_function: theano function that computes the energy at a point (p, q) in phase space + p: Starting momentum variable. + """ + p = tt.dvector('p') + p.tag.test_value = q.tag.test_value + total_energy = H.pot.energy(p) - H.logp(q) + energy_function = theano.function(inputs=[q, p], outputs=total_energy, **theano_kwargs) + energy_function.trust_input = True + + return energy_function, p + + +def _theano_leapfrog_integrator(H, q, p, **theano_kwargs): + """Computes a theano function that computes one leapfrog step and the energy at the + end of the trajectory. + + Parameters + ---------- + H : Hamiltonian + q : theano.tensor + p : theano.tensor + theano_kwargs : passed to theano.function + + Returns + ------- + theano function which returns + q_new, p_new, energy_new + """ + epsilon = tt.dscalar('epsilon') + epsilon.tag.test_value = 1 + + n_steps = tt.iscalar('n_steps') + n_steps.tag.test_value = 2 + + q_new, p_new = leapfrog(H, q, p, epsilon, n_steps) + energy_new = energy(H, q_new, p_new) + + f = theano.function([q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs) + f.trust_input = True + return f + + +def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential, + use_single_leapfrog=False, **theano_kwargs): + """Construct theano functions for the Hamiltonian, energy, and leapfrog integrator. + + Parameters + ---------- + model_vars : array of variables to be sampled + shared theano tensors that are already shared + logpt: model log probability + potential: hamiltonian potential + theano_kwargs : dictionary of keyword arguments to pass to theano functions + use_single_leapfrog: Boolean, if only 1 integration step is done at a time (as in NUTS), + this provides a ~2x speedup + + Returns + ------- + H: Hamiltonian namedtuple + energy_function: theano function computing energy at a point in phase space + leapfrog_integrator: theano function integrating the Hamiltonian from a point in phase space + theano_variables: dictionary of variables used in the computation graph which may be useful + """ + H, q = _theano_hamiltonian(model_vars, shared, logpt, potential) + energy_function, p = _theano_energy_function(H, q, **theano_kwargs) + if use_single_leapfrog: + leapfrog_integrator = _theano_single_leapfrog(H, q, p, **theano_kwargs) + else: + leapfrog_integrator = _theano_leapfrog_integrator(H, q, p, **theano_kwargs) + return H, energy_function, leapfrog_integrator, {'q': q, 'p': p} + + +def energy(H, q, p): + """Compute the total energy for the Hamiltonian at a given position/momentum""" + return -(H.logp(q) - H.pot.energy(p)) + + +def leapfrog(H, q, p, epsilon, n_steps): + """Leapfrog integrator. + + Estimates `p(t)` and `q(t)` at time :math:`t = n \cdot e`, by integrating the + Hamiltonian equations + + .. math:: + + \frac{dq_i}{dt} = \frac{\partial H}{\partial p_i} + + \frac{dp_i}{dt} = \frac{\partial H}{\partial q_i} + + with :math:`p(0) = p`, :math:`q(0) = q` + + Parameters + ---------- + H : Hamiltonian instance. + Tuple of `logp, dlogp, potential`. + q: Theano.tensor + initial position vector + p: Theano.tensor + initial momentum vector + epsilon: float, step size + n_steps: int, number of iterations + + Returns + ------- + position: Theano.tensor + position estimate at time :math:`n \cdot e`. + momentum: Theano.tensor + momentum estimate at time :math:`n \cdot e`. + """ + def full_update(p, q): + p = p + epsilon * H.dlogp(q) + q += epsilon * H.pot.velocity(p) + return p, q + # This first line can't be +=, possibly because of theano + p = p + 0.5 * epsilon * H.dlogp(q) # half momentum update + q += epsilon * H.pot.velocity(p) # full position update + if tt.gt(n_steps, 1): + (p_seq, q_seq), _ = theano.scan(full_update, outputs_info=[p, q], n_steps=n_steps - 1) + p, q = p_seq[-1], q_seq[-1] + p += 0.5 * epsilon * H.dlogp(q) # half momentum update + return q, p + + +def _theano_single_leapfrog(H, q, p, **theano_kwargs): + """Leapfrog integrator for a single step. + + See above for documentation. This is optimized for the case where only a single step is + needed, in case of, for example, a recursive algorithm. + """ + epsilon = tt.dscalar('epsilon') + epsilon.tag.test_value = 1 + + p_new = p + 0.5 * epsilon * H.dlogp(q) # half momentum update + q_new = q + epsilon * H.pot.velocity(p_new) # full position update + p_new += 0.5 * epsilon * H.dlogp(q_new) # half momentum update + energy_new = energy(H, q_new, p_new) + + f = theano.function(inputs=[q, p, epsilon], outputs=[q_new, p_new, energy_new], **theano_kwargs) + f.trust_input = True + return f diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py deleted file mode 100644 index 84bb3a90ba..0000000000 --- a/pymc3/step_methods/nuts.py +++ /dev/null @@ -1,238 +0,0 @@ -from .quadpotential import quad_potential -from .arraystep import ArrayStepShared, ArrayStep, SamplerHist, Competence -from ..model import modelcontext, Point -from ..vartypes import continuous_types -from numpy import exp, log, array -from numpy.random import uniform -from .hmc import leapfrog, Hamiltonian, bern, energy -from ..tuning import guess_scaling -import theano -from ..theanof import (make_shared_replacements, join_nonshared_inputs, CallableTensor, - gradient, inputvars) -import theano.tensor as tt - -__all__ = ['NUTS'] - - -class NUTS(ArrayStepShared): - """ - Automatically tunes step size and adjust number of steps for good performance. - - Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in: - - Hoffman, Matthew D., & Gelman, Andrew. (2011). - The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo. - """ - default_blocked = True - - def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state=None, - Emax=1000, - target_accept=0.8, - gamma=0.05, - k=0.75, - t0=10, - model=None, - profile=False, - mode=None, **kwargs): - """ - Parameters - ---------- - vars : list of Theano variables, default continuous vars - scaling : array_like, ndim = {1,2} or point - Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. - step_scale : float, default=.25 - Size of steps to take, automatically scaled down by 1/n**(1/4) - is_cov : bool, default=False - Treat C as a covariance matrix/vector if True, else treat it as a precision matrix/vector - state - state to start from - Emax : float, default 1000 - maximum energy - target_accept : float (0,1) default .8 - target for avg accept probability between final branch and initial position - gamma : float, default .05 - k : float (.5,1) default .75 - scaling of speed of adaptation - t0 : int, default 10 - slows inital adapatation - model : Model - profile : bool or ProfileStats - sets the functions to be profiled - mode : string or `Mode` instance. - compilation mode passed to Theano functions - """ - model = modelcontext(model) - - if vars is None: - vars = model.cont_vars - vars = inputvars(vars) - - if scaling is None: - scaling = model.test_point - - if isinstance(scaling, dict): - scaling = guess_scaling( - Point(scaling, model=model), model=model, vars=vars) - - n = scaling.shape[0] - - self.step_size = step_scale / n**(1 / 4.) - - self.potential = quad_potential(scaling, is_cov, as_cov=False) - - if state is None: - state = SamplerHist() - self.state = state - self.Emax = Emax - - self.target_accept = target_accept - self.gamma = gamma - self.t0 = t0 - self.k = k - - self.Hbar = 0 - self.u = log(self.step_size * 10) - self.m = 1 - self.mode = mode - - shared = make_shared_replacements(vars, model) - - def create_hamiltonian(vars, shared, model): - dlogp = gradient(model.logpt, vars) - (logp, dlogp), q = join_nonshared_inputs( - [model.logpt, dlogp], vars, shared) - logp = CallableTensor(logp) - dlogp = CallableTensor(dlogp) - - return Hamiltonian(logp, dlogp, self.potential), q - - def create_energy_func(q): - p = tt.dvector('p') - p.tag.test_value = q.tag.test_value - E0 = energy(self.H, q, p) - E0_func = theano.function([q, p], E0, mode=self.mode) - E0_func.trust_input = True - - return E0_func - - self.H, q = create_hamiltonian(vars, shared, model) - self.compute_energy = create_energy_func(q) - - self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile, - mode=self.mode) - - super(NUTS, self).__init__(vars, shared, **kwargs) - - def astep(self, q0): - leapfrog = self.leapfrog1_dE - Emax = self.Emax - e = self.step_size - - p0 = self.potential.random() - E0 = self.compute_energy(q0, p0) - - u = uniform() - q = qn = qp = q0 - p = pn = pp = p0 - - n, s, j = 1, 1, 0 - - while s == 1: - v = bern(.5) * 2 - 1 - - if v == -1: - qn, pn, _, _, q1, n1, s1, a, na = buildtree( - leapfrog, qn, pn, u, v, j, e, Emax, E0) - else: - _, _, qp, pp, q1, n1, s1, a, na = buildtree( - leapfrog, qp, pp, u, v, j, e, Emax, E0) - - if s1 == 1 and bern(min(1, n1 * 1. / n)): - q = q1 - - n = n + n1 - - span = qp - qn - s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0) - j = j + 1 - - p = -p - - w = 1. / (self.m + self.t0) - self.Hbar = (1 - w) * self.Hbar + w * \ - (self.target_accept - a * 1. / na) - - self.step_size = exp(self.u - (self.m**self.k / self.gamma) * self.Hbar) - self.m += 1 - - return q - - @staticmethod - def competence(var): - if var.dtype in continuous_types: - return Competence.IDEAL - return Competence.INCOMPATIBLE - - -def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0): - if j == 0: - q1, p1, dE = leapfrog1_dE(q, p, array(v * e), E0) - - n1 = int(log(u) + dE <= 0) - s1 = int(log(u) + dE < Emax) - return q1, p1, q1, p1, q1, n1, s1, min(1, exp(-dE)), 1 - else: - qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree( - leapfrog1_dE, q, p, u, v, j - 1, e, Emax, E0) - if s1 == 1: - if v == -1: - qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree( - leapfrog1_dE, qn, pn, u, v, j - 1, e, Emax, E0) - else: - _, _, qp, pp, q11, n11, s11, a11, na11 = buildtree( - leapfrog1_dE, qp, pp, u, v, j - 1, e, Emax, E0) - - if bern(n11 * 1. / (max(n1 + n11, 1))): - q1 = q11 - - a1 = a1 + a11 - na1 = na1 + na11 - - span = qp - qn - s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0) - n1 = n1 + n11 - return qn, pn, qp, pp, q1, n1, s1, a1, na1 - return - - -def leapfrog1_dE(H, q, profile, mode): - """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory. - Parameters - ---------- - H : Hamiltonian - q : theano.tensor - profile : Boolean - - Returns - ------- - theano function which returns - q_new, p_new, dE - """ - p = tt.dvector('p') - p.tag.test_value = q.tag.test_value - - e = tt.dscalar('e') - e.tag.test_value = 1 - - q1, p1 = leapfrog(H, q, p, 1, e) - E = energy(H, q1, p1) - - E0 = tt.dscalar('E0') - E0.tag.test_value = 1 - - dE = E - E0 - - f = theano.function([q, p, e, E0], [q1, p1, dE], - profile=profile, mode=mode) - f.trust_input = True - return f diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py index d8a195fd46..9c8ad87775 100644 --- a/pymc3/tests/models.py +++ b/pymc3/tests/models.py @@ -19,7 +19,7 @@ def simple_categorical(): p = np.array([0.1, 0.2, 0.3, 0.4]) v = np.array([0.0, 1.0, 2.0, 3.0]) with Model() as model: - Categorical('x', p, shape = 3, testval = [1, 2, 3]) + Categorical('x', p, shape=3, testval=[1, 2, 3]) mu = np.dot(p, v) var = np.dot(p, (v - mu) ** 2) diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py index d64d89de16..e40aa3fd11 100644 --- a/pymc3/tests/test_hmc.py +++ b/pymc3/tests/test_hmc.py @@ -1,32 +1,25 @@ -import pymc3 as pm import numpy as np + +from pymc3.blocking import DictToArrayBijection from . import models -from pymc3.step_methods.hmc import leapfrog, Hamiltonian +from pymc3.step_methods.hmc.base_hmc import BaseHMC from .checks import close_to -from ..blocking import DictToArrayBijection def test_leapfrog_reversible(): n = 3 start, model, _ = models.non_normal(n) - - with model: - h = pm.find_hessian(start, model=model) - step = pm.HamiltonianMC(model.vars, h, model=model) - + step = BaseHMC(vars=model.vars, model=model) bij = DictToArrayBijection(step.ordering, start) - - logp, dlogp = list(map(bij.mapf, step.fs)) - H = Hamiltonian(logp, dlogp, step.potential) - q0 = bij.map(start) p0 = np.ones(n) * .05 - for e in [.01, .1, 1.2]: - for L in [1, 2, 3, 4, 20]: + + for epsilon in [.01, .1, 1.2]: + for n_steps in [1, 2, 3, 4, 20]: q, p = q0, p0 - q, p = leapfrog(H, q, p, L, e) - q, p = leapfrog(H, q, -p, L, e) + q, p, _ = step.leapfrog(q, p, np.array(epsilon), np.array(n_steps, dtype='int32')) + q, p, _ = step.leapfrog(q, -p, np.array(epsilon), np.array(n_steps, dtype='int32')) - close_to(q, q0, 1e-8, str((L, e))) - close_to(-p, p0, 1e-8, str((L, e))) + close_to(q, q0, 1e-8, str((n_steps, epsilon))) + close_to(-p, p0, 1e-8, str((n_steps, epsilon))) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index 9e8fa5c40d..924bd6ab29 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -1,5 +1,10 @@ +import os import unittest +from numpy.testing import assert_array_almost_equal +import numpy as np +from tqdm import tqdm + from .checks import close_to from .models import simple_categorical, mv_simple, mv_simple_discrete, simple_2model from pymc3.sampling import assign_step_methods, sample @@ -8,12 +13,122 @@ Metropolis, Slice, CompoundStep, MultivariateNormalProposal, HamiltonianMC) from pymc3.distributions import Binomial, Normal, Bernoulli, Categorical -from numpy.testing import assert_almost_equal -import numpy as np class TestStepMethods(object): # yield test doesn't work subclassing unittest.TestCase - def check_stat(self, check, trace): + master_samples = { + Slice: np.array([ + -8.13087389e-01, -3.08921856e-01, -6.79377098e-01, 6.50812585e-01, -7.63577596e-01, + -8.13199793e-01, -1.63823548e+00, -7.03863676e-02, 2.05107771e+00, 1.68598170e+00, + 6.92463695e-01, -7.75120766e-01, -1.62296463e+00, 3.59722423e-01, -2.31421712e-01, + -7.80686956e-02, -6.05860731e-01, -1.13000202e-01, 1.55675942e-01, -6.78527612e-01, + 6.31052333e-01, 6.09012517e-01, -1.56621643e+00, 5.04330883e-01, 3.14824082e-03, + -1.31287073e+00, 4.10706927e-01, 8.93815792e-01, 8.19317020e-01, 3.71900919e-01, + -2.62067312e+00, -3.47616592e+00, 1.50335041e+00, -1.05993351e+00, 2.41571723e-01, + -1.06258156e+00, 5.87999429e-01, -1.78480091e-01, -3.60278680e-01, 1.90615274e-01, + -1.24399204e-01, 4.03845589e-01, -1.47797573e-01, 7.90445804e-01, -1.21043819e+00, + -1.33964776e+00, 1.36366329e+00, -7.50175388e-01, 9.25241839e-01, -4.17493767e-01, + 1.85311339e+00, -2.49715343e+00, -3.18571692e-01, -1.49099668e+00, -2.62079621e-01, + -5.82376852e-01, -2.53033395e+00, 2.07580503e+00, -9.82615856e-01, 6.00517782e-01, + -9.83941620e-01, -1.59014118e+00, -1.83931394e-03, -4.71163466e-01, 1.90073737e+00, + -2.08929125e-01, -6.98388847e-01, 1.64502092e+00, -1.19525944e+00, 1.44424109e+00, + 1.52974876e+00, -5.70140077e-01, 5.08633322e-01, -1.70862492e-02, -1.69887948e-01, + 5.19760297e-01, -4.15149647e-01, 8.63685174e-02, -3.66805233e-01, -9.24988952e-01, + 2.33307122e+00, -2.60391496e-01, -5.86271814e-01, -5.01297170e-01, -1.53866195e+00, + 5.71285373e-01, -1.30571830e+00, 8.59587795e-01, 6.72170694e-01, 9.12433943e-01, + 7.04959179e-01, 8.37863464e-01, -5.24200836e-01, 1.28261340e+00, 9.08774240e-01, + 8.80566763e-01, 7.82911967e-01, 8.01843432e-01, 7.09251098e-01, 5.73803618e-01]), + HamiltonianMC: np.array([ + -1.56440708e-03, -2.37766120e-03, -6.95819902e-03, -4.88882715e-03, -6.54928517e-03, + -3.38653286e-03, -1.99381372e-03, -1.25904805e-03, -2.97173572e-04, -4.67391216e-04, + -2.03821237e-03, -1.33693751e-04, -2.17293248e-03, -4.11675406e-03, -4.23091782e-03, + -7.34120851e-03, -8.43726968e-03, -7.86976139e-03, -3.89551467e-03, -3.00788956e-03, + -3.82420513e-03, -1.35604792e-03, -2.49066947e-04, 4.03633859e-04, 9.34321408e-05, + 1.77722574e-03, 1.63761359e-03, 2.86208401e-03, -1.72243038e-04, 1.86863525e-03, + 1.76740215e-03, 1.79169049e-03, 1.07164602e-03, 1.41264547e-03, 2.49563456e-03, + 1.76639216e-03, 3.01570589e-03, 1.44186424e-04, 1.45073846e-03, 2.95031617e-04, + -1.28811479e-04, -7.35945905e-04, -6.00689088e-04, 2.75468405e-04, 1.05245800e-03, + 1.18892307e-03, 6.01165842e-04, 1.21016955e-03, -2.06751271e-03, -8.41426458e-04, + 6.09905557e-04, 2.92765303e-03, 4.15216348e-03, 2.71863268e-03, 3.42922082e-03, + 7.53890188e-03, 7.97507867e-03, 8.27371677e-03, 9.77811135e-03, 9.99705714e-03, + 1.13996054e-02, 1.15745874e-02, 1.08182152e-02, 1.08277279e-02, 9.32254191e-03, + 8.59914793e-03, 8.43927425e-03, 1.01570101e-02, 9.74607039e-03, 9.82868496e-03, + 1.01745777e-02, 1.19312194e-02, 1.53760522e-02, 1.38691940e-02, 1.40131760e-02, + 1.46184561e-02, 1.74382675e-02, 1.84241543e-02, 2.06913002e-02, 1.83520531e-02, + 2.03072531e-02, 1.72912752e-02, 1.38959101e-02, 1.21933473e-02, 1.05084488e-02, + 9.00532336e-03, 9.25863206e-03, 1.23618461e-02, 1.20207293e-02, 1.09334818e-02, + 1.16528011e-02, 1.29967126e-02, 1.38940942e-02, 1.11408833e-02, 1.09263348e-02, + 1.06521352e-02, 1.01622526e-02, 1.21998547e-02, 1.00880470e-02, 9.94787795e-03]), + Metropolis: np.array([ + 1.62434536, 1.01258895, 0.4844172, -0.58855142, 1.15626034, 0.39505344, 1.85716138, + -0.20297933, -0.20297933, -0.20297933, -0.20297933, -1.08083775, -1.08083775, + 0.06388596, 0.96474191, 0.28101405, 0.01312597, 0.54348144, -0.14369126, -0.98889691, + -0.98889691, -0.75448121, -0.94631676, -0.94631676, -0.89550901, -0.89550901, + -0.77535005, -0.15814694, 0.14202338, -0.21022647, -0.4191207, 0.16750249, 0.45308981, + 1.33823098, 1.8511608, 1.55306796, 1.55306796, 1.55306796, 1.55306796, 0.15657163, + 0.3166087, 0.3166087, 0.3166087, 0.3166087, 0.54670343, 0.54670343, 0.32437529, + 0.12361722, 0.32191694, 0.44092559, 0.56274686, 0.56274686, 0.18746191, 0.18746191, + -0.15639177, -0.11279491, -0.11279491, -0.11279491, -1.20770676, -1.03832432, + -0.29776787, -1.25146848, -1.25146848, -0.93630908, -0.5857631, -0.5857631, + -0.62445861, -0.62445861, -0.64907557, -0.64907557, -0.64907557, 0.58708846, + -0.61217957, 0.25116575, 0.25116575, 0.80170324, 1.59451011, 0.97097938, 1.77284041, + 1.81940771, 1.81940771, 1.81940771, 1.81940771, 1.95710892, 2.18960348, 2.18960348, + 2.18960348, 2.18960348, 2.63096792, 2.53081269, 2.5482221, 1.42620337, 0.90910891, + -0.08791792, 0.40729341, 0.23259025, 0.23259025, 0.23259025, 2.76091595, 2.51228118]), + NUTS: np.array([ + 0.68819657, 0.1767813, -0.59467679, -0.64216066, 1.63681405, 2.13404699, 0.03126563, + 0.31817152, 0.31817152, 0.40191527, 0.40191527, 0.99220141, 0.93036804, -0.41228181, + -1.80465851, -1.70577291, 0.19406438, 0.19406438, -0.03965181, -0.76135744, + 0.70023098, 1.07183677, 1.07183677, 0.2829979, 1.13524135, -0.26461224, + -0.39442329, -1.04109657, 0.79971205, 0.79971205, 0.96839778, 0.91868626, + 0.19468837, 0.19468837, -0.67755668, -0.67755668, -0.43722432, 0.12072881, + 0.6267432, 0.6861771, 0.4669198, 0.4669198, -0.08143768, 0.27691068, 0.11510718, + 2.29821426, 2.18308403, 1.16618069, -0.45615197, -0.45615197, -0.37076172, + -0.37076172, -0.38889599, 0.36200553, -0.55179735, -0.55179735, -0.18946703, + 1.11552335, 0.98985795, 0.98985795, 1.00313687, -0.18458164, 0.44025584, 0.97610126, + -0.1558578, -0.1558578, -0.01247235, -0.08303131, 0.52019377, -1.52329796, + -1.72856248, -1.19049049, -1.19049049, -0.8651521, -0.36421118, -0.40590409, + -0.78925074, -0.53960924, -0.53960924, 0.1069186, 0.40849997, 0.1560954, + 0.35461684, 0.35461684, -0.83935418, -0.85295353, -0.13990269, -0.1412904, + -0.1412904, -0.30071575, -0.296461, 0.06540186, -0.15145479, -0.15145479, + -0.21406771, -0.21533218, 0.06833495, 0.06833495, -0.18763595, 0.34138144]), + } + + def test_sample_exact(self): + for step_method in self.master_samples: + yield self.check_trace, step_method + + def check_trace(self, step_method): + """Tests whether the trace for step methods is exactly the same as on master. + + Code changes that effect how random numbers are drawn may change this, and require + `master_samples` to be updated, but such changes should be noted and justified in the + commit. + + This method may also be used to benchmark step methods across commits, by running, for + example + + ``` + BENCHMARK=100000 ./scripts/test.sh -s pymc3/tests/test_step.py:TestStepMethods + ``` + + on multiple commits. + """ + test_steps = 100 + n_steps = int(os.getenv('BENCHMARK', 100)) + benchmarking = (n_steps != test_steps) + if benchmarking: + tqdm.write('Benchmarking {} with {:,d} samples'.format(step_method.__name__, n_steps)) + else: + tqdm.write('Checking {} has same trace as on master'.format(step_method.__name__)) + with Model(): + Normal('x', mu=0, sd=1) + trace = sample(n_steps, step=step_method(), random_seed=1) + + if not benchmarking: + assert_array_almost_equal(trace.get_values('x'), self.master_samples[step_method]) + + def check_stat(self, check, trace, name): for (var, stat, value, bound) in check: s = stat(trace[var][2000:], axis=0) close_to(s, value, bound) @@ -38,7 +153,7 @@ def test_step_continuous(self): ) for step in steps: trace = sample(8000, step=step, start=start, model=model, random_seed=1) - yield self.check_stat, check, trace + yield self.check_stat, check, trace, step.__class__.__name__ def test_step_discrete(self): start, model, (mu, C) = mv_simple_discrete() @@ -51,7 +166,7 @@ def test_step_discrete(self): ) for step in steps: trace = sample(20000, step=step, start=start, model=model, random_seed=1) - self.check_stat(check, trace) + yield self.check_stat, check, trace, step.__class__.__name__ def test_step_categorical(self): start, model, (mu, C) = simple_categorical() @@ -60,12 +175,12 @@ def test_step_categorical(self): ('x', np.std, unc, unc / 10.)) with model: steps = ( - CategoricalGibbsMetropolis(model.x, proposal = 'uniform'), - CategoricalGibbsMetropolis(model.x, proposal = 'proportional'), + CategoricalGibbsMetropolis(model.x, proposal='uniform'), + CategoricalGibbsMetropolis(model.x, proposal='proportional'), ) for step in steps: trace = sample(8000, step=step, start=start, model=model, random_seed=1) - self.check_stat(check, trace) + yield self.check_stat, check, trace, step.__class__.__name__ class TestCompoundStep(unittest.TestCase): diff --git a/pymc3/theanof.py b/pymc3/theanof.py index 04289aabbb..a391b198fc 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -210,8 +210,8 @@ def reshape_t(x, shape): class CallableTensor(object): - """Turns a symbolic variable with one input into a function that returns symbolic arguments with the one variable replaced with the input. - + """Turns a symbolic variable with one input into a function that returns symbolic arguments + with the one variable replaced with the input. """ def __init__(self, tensor): From 55fb1bddb563299088855c4ac824098468034d21 Mon Sep 17 00:00:00 2001 From: colin Date: Sat, 3 Dec 2016 13:47:31 -0500 Subject: [PATCH 6/6] Reformat docs --- pymc3/step_methods/hmc/base_hmc.py | 33 ++++++++-------- pymc3/step_methods/hmc/hmc.py | 14 +++---- pymc3/step_methods/hmc/nuts.py | 22 +++++------ pymc3/step_methods/hmc/quadpotential.py | 18 ++++----- pymc3/step_methods/hmc/trajectory.py | 52 ++++++++++++------------- 5 files changed, 70 insertions(+), 69 deletions(-) diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 7d87f5d3c8..0c594b09e8 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -12,24 +12,25 @@ class BaseHMC(ArrayStepShared): def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs): - """ + """Superclass to implement Hamiltonian/hybrid monte carlo + Parameters ---------- - vars : list of theano variables - scaling : array_like, ndim = {1,2} - Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. - step_scale : float, default=0.25 - Size of steps to take, automatically scaled down by 1/n**(1/4) - is_cov : bool, default=False - Treat scaling as a covariance matrix/vector if True, else treat it as a - precision matrix/vector - state - State object - model : pymc3 Model instance. default=Context model - blocked: Boolean, default True - use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. - default False. - **theano_kwargs: passed to theano functions + vars : list of theano variables + scaling : array_like, ndim = {1,2} + Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. + step_scale : float, default=0.25 + Size of steps to take, automatically scaled down by 1/n**(1/4) + is_cov : bool, default=False + Treat scaling as a covariance matrix/vector if True, else treat it as a + precision matrix/vector + state + State object + model : pymc3 Model instance. default=Context model + blocked: Boolean, default True + use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. + default False. + **theano_kwargs: passed to theano functions """ model = modelcontext(model) diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py index 70a042f263..1e0b2c3dd4 100644 --- a/pymc3/step_methods/hmc/hmc.py +++ b/pymc3/step_methods/hmc/hmc.py @@ -27,13 +27,13 @@ def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs): """ Parameters ---------- - vars : list of theano variables - path_length : float, default=2 - total length to travel - step_rand : function float -> float, default=unif - A function which takes the step size and returns an new one used to - randomize the step size at each iteration. - **kwargs : passed to BaseHMC + vars : list of theano variables + path_length : float, default=2 + total length to travel + step_rand : function float -> float, default=unif + A function which takes the step size and returns an new one used to + randomize the step size at each iteration. + **kwargs : passed to BaseHMC """ super(HamiltonianMC, self).__init__(vars, **kwargs) self.path_length = path_length diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index e22a3cb8ec..d171159cd9 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -28,17 +28,17 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8, """ Parameters ---------- - vars : list of Theano variables, default continuous vars - Emax : float, default 1000 - maximum energy - target_accept : float (0,1) default .8 - target for avg accept probability between final branch and initial position - gamma : float, default .05 - k : float (.5,1) default .75 - scaling of speed of adaptation - t0 : int, default 10 - slows inital adapatation - kwargs: passed to BaseHMC + vars : list of Theano variables, default continuous vars + Emax : float, default 1000 + maximum energy + target_accept : float (0,1) default .8 + target for avg accept probability between final branch and initial position + gamma : float, default .05 + k : float (.5,1) default .75 + scaling of speed of adaptation + t0 : int, default 10 + slows inital adapatation + kwargs: passed to BaseHMC """ super(NUTS, self).__init__(vars, use_single_leapfrog=True, **kwargs) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index e12fe1dcbc..0c85d2c214 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -14,18 +14,18 @@ def quad_potential(C, is_cov, as_cov): """ Parameters ---------- - C : arraylike, 0 <= ndim <= 2 - scaling matrix for the potential - vector treated as diagonal matrix - is_cov : Boolean - whether C is provided as a covariance matrix or hessian - as_cov : Boolean - whether the random draws should come from the normal dist - using the covariance matrix above or the inverse + C : arraylike, 0 <= ndim <= 2 + scaling matrix for the potential + vector treated as diagonal matrix + is_cov : Boolean + whether C is provided as a covariance matrix or hessian + as_cov : Boolean + whether the random draws should come from the normal dist + using the covariance matrix above or the inverse Returns ------- - q : Quadpotential + q : Quadpotential """ if issparse(C) and is_cov != as_cov: diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py index b64790a21a..ebc8c44d5c 100644 --- a/pymc3/step_methods/hmc/trajectory.py +++ b/pymc3/step_methods/hmc/trajectory.py @@ -14,15 +14,15 @@ def _theano_hamiltonian(model_vars, shared, logpt, potential): Parameters ---------- - model_vars: array of variables to be sampled - shared theano tensors that are already shared - logpt: model log probability - potential: hamiltonian potential + model_vars : array of variables to be sampled + shared : theano tensors that are already shared + logpt : model log probability + potential : hamiltonian potential Returns ------- - Hamiltonian: namedtuple with log pdf, gradient of log pdf, and potential functions - q: Starting position variable. + Hamiltonian : namedtuple with log pdf, gradient of log pdf, and potential functions + q : Starting position variable. """ dlogp = gradient(logpt, model_vars) (logp, dlogp), q = join_nonshared_inputs([logpt, dlogp], model_vars, shared) @@ -36,14 +36,14 @@ def _theano_energy_function(H, q, **theano_kwargs): Parameters ---------- - H: Hamiltonian namedtuple - q: theano variable, starting position - theano_kwargs: passed to theano.function + H : Hamiltonian namedtuple + q : theano variable, starting position + theano_kwargs : passed to theano.function Returns ------- - energy_function: theano function that computes the energy at a point (p, q) in phase space - p: Starting momentum variable. + energy_function : theano function that computes the energy at a point (p, q) in phase space + p : Starting momentum variable. """ p = tt.dvector('p') p.tag.test_value = q.tag.test_value @@ -91,19 +91,19 @@ def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential, Parameters ---------- model_vars : array of variables to be sampled - shared theano tensors that are already shared - logpt: model log probability - potential: hamiltonian potential + shared : theano tensors that are already shared + logpt : model log probability + potential : Hamiltonian potential theano_kwargs : dictionary of keyword arguments to pass to theano functions - use_single_leapfrog: Boolean, if only 1 integration step is done at a time (as in NUTS), - this provides a ~2x speedup + use_single_leapfrog : Boolean, if only 1 integration step is done at a time (as in NUTS), + this provides a ~2x speedup Returns ------- - H: Hamiltonian namedtuple - energy_function: theano function computing energy at a point in phase space - leapfrog_integrator: theano function integrating the Hamiltonian from a point in phase space - theano_variables: dictionary of variables used in the computation graph which may be useful + H : Hamiltonian namedtuple + energy_function : theano function computing energy at a point in phase space + leapfrog_integrator : theano function integrating the Hamiltonian from a point in phase space + theano_variables : dictionary of variables used in the computation graph which may be useful """ H, q = _theano_hamiltonian(model_vars, shared, logpt, potential) energy_function, p = _theano_energy_function(H, q, **theano_kwargs) @@ -137,18 +137,18 @@ def leapfrog(H, q, p, epsilon, n_steps): ---------- H : Hamiltonian instance. Tuple of `logp, dlogp, potential`. - q: Theano.tensor + q : Theano.tensor initial position vector - p: Theano.tensor + p : Theano.tensor initial momentum vector - epsilon: float, step size - n_steps: int, number of iterations + epsilon : float, step size + n_steps : int, number of iterations Returns ------- - position: Theano.tensor + position : Theano.tensor position estimate at time :math:`n \cdot e`. - momentum: Theano.tensor + momentum : Theano.tensor momentum estimate at time :math:`n \cdot e`. """ def full_update(p, q):