From fe74c8ca6f479636ce3fa1ef5bcca34d5cf3cc9d Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Sun, 1 Dec 2019 16:47:56 -0800 Subject: [PATCH 1/2] Migrate to AbstractMCMC, add manual --- Project.toml | 6 +- docs/_data/toc.yml | 3 + docs/src/interface-manual.md | 17 -- docs/src/using-turing/interface.md | 321 +++++++++++++++++++++++++++++ src/Turing.jl | 9 +- src/inference/Inference.jl | 29 ++- 6 files changed, 344 insertions(+), 41 deletions(-) delete mode 100644 docs/src/interface-manual.md create mode 100644 docs/src/using-turing/interface.md diff --git a/Project.toml b/Project.toml index 8f045f77ba..3c176b0638 100644 --- a/Project.toml +++ b/Project.toml @@ -1,8 +1,9 @@ name = "Turing" uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.7.3" +version = "0.7.4" [deps] +AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" BinaryProvider = "b99e7846-7c00-51b0-8f62-c81ae34c0232" @@ -27,6 +28,7 @@ Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] +AbstractMCMC = "~0.1" AdvancedHMC = "0.2.4" Bijectors = "0.4.0" BinaryProvider = "0.5.6" @@ -35,8 +37,8 @@ DistributionsAD = "0.1.0" FiniteDifferences = "0.9" ForwardDiff = "0.10.3" Libtask = "0.3.1" -MCMCChains = "0.3.15" LogDensityProblems = "^0.9" +MCMCChains = "~0.4.0" MacroTools = "0.5.1" PDMats = "0.9.9" ProgressMeter = "1.0.0" diff --git a/docs/_data/toc.yml b/docs/_data/toc.yml index 950c0f1fe2..a2c49d25a6 100644 --- a/docs/_data/toc.yml +++ b/docs/_data/toc.yml @@ -13,6 +13,9 @@ - title: "Advanced Usage" url: "docs/using-turing/advanced" + - title: "Interface Guide" + url: "docs/using-turing/interface" + - title: "Automatic Differentiation" url: "docs/using-turing/autodiff" diff --git a/docs/src/interface-manual.md b/docs/src/interface-manual.md deleted file mode 100644 index e5d46193e1..0000000000 --- a/docs/src/interface-manual.md +++ /dev/null @@ -1,17 +0,0 @@ -# The Sampling Interface - -Turing's sampling interface presents several structures and functions that one needs to overload in order to implement an interface-compatible sampler. - -1. A subtype of `AbstractSampler`, defined as a mutable struct containing state information. -2. A subtype of `AbstractTransition`, which represents a single draw from the sampler. -3. A function `transition_type` which returns the `AbstractTransition` type used by an implementation of an `AbstractSampler`, or a function `transition_init` with returns a `Vector{AbstractTransition}` of length `N`. -4. A function `sample_init!` which performs any necessary set up. -5. A function `step!` which returns an `AbstractTransition`. -6. A function `sample_end!` which handles any sampler wrap-up. -7. A function `MCMCChains.Chains` which accepts an `Vector{<:AbstractTransition}` and returns an `MCMCChains` object. - -The interface methods with exclamation points are those that are intended to allow for some state mutation. Any mutating function is meant to allow mutation where needed -- you might use - -- `sample_init!` to run some kind of sampler preparation, before sampling begins. This could mutate a sampler's state. -- `step!` might mutate a sampler flag after each sample. MH does this for example by using a `violating_support` flag. -- `sample_end!` contains any wrap-up you might need to do. If you were sampling in a transformed space, this might be where you convert everything back to a constrained space. \ No newline at end of file diff --git a/docs/src/using-turing/interface.md b/docs/src/using-turing/interface.md new file mode 100644 index 0000000000..24e999953a --- /dev/null +++ b/docs/src/using-turing/interface.md @@ -0,0 +1,321 @@ +--- +title: Interface Guide +toc: true +--- + +# The sampling interface + +Turing implements a sampling interface (hosted at [AbstractMCMC](https://github.com/TuringLang/AbstractMCMC.jl)) that is intended to provide a common framework for Markov Chain Monte Carlo samplers. The interface presents several structures and functions that one needs to overload in order to implement an interface-compatible sampler. + +This guide will demonstrate how to implement the interface without Turing, and then demonstrate how to implement the same sampler with Turing. + +## Interface overview + +Any implementation of an inference method that uses the AbstractMCMC interface should implement some combination of the following types and functions: + +1. A subtype of `AbstractSampler`, defined as a mutable struct containing state information or sampler parameters. +2. A subtype of `AbstractTransition`, which represents a single draw from the sampler. +3. A function `transition_type` which returns the `AbstractTransition` type used by an implementation of an `AbstractSampler`, or a function `transition_init` with returns a `Vector{AbstractTransition}` of length `N`. +4. A function `sample_init!` which performs any necessary set up. +5. A function `step!` which returns an `AbstractTransition`. +6. A function `sample_end!` which handles any sampler wrap-up. +7. A function `bundle_samples` which accepts an `Vector{<:AbstractTransition}` and returns some `AbstractChains` object. Most implementations of the interface return an `MCMCChains.Chains` struct. + +The interface methods with exclamation points are those that are intended to allow for some state mutation. Any mutating function is meant to allow mutation where needed -- you might use: + +- `sample_init!` to run some kind of sampler preparation, before sampling begins. This could mutate a sampler's state. +- `step!` might mutate a sampler flag after each sample. +- `sample_end!` contains any wrap-up you might need to do. If you were sampling in a transformed space, this might be where you convert everything back to a constrained space. + +## Why do you have an interface? + +The motivation for the interface is to allow Julia's fantastic probabilistic programming language community to have a set of standards and common implementations so we can all thrive together. Markov-Chain Monte Carlo methods tend to have a very similar framework to one another, and so a common interface should help more great inference methods built in single-purpose packages to experience more use among the community. + +## Implementing Metropolis-Hastings without Turing + +[Metropolis-Hastings](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo) is often the first sampling method that people are exposed to. It is a very straightforward algorithm and is accordingly the easiest to implement, so it makes for a good example. In this section, you will learn how to use the types and functions listed above to implement the Metropolis-Hastings sampler using the MCMC interface. + +The full code for this implementation is housed in [AdvancedMH.jl](https://github.com/TuringLang/AdvancedMH.jl). + +### Imports + +Let's begin by importing the relevant libraries. We'll import `AbstracMCMC`, which contains the interface framework we'll fill out. We also need `Distributions` and `Random`. + +```julia +# Import the relevant libraries. +using AbstractMCMC +using Distributions +using Random +``` + +An interface extension (like the one we're writing right now) typically requires that you overload or implement several functions. Specifically, you should `import` the functions you intend to overload. This next code block accomplishes that. + +From `Distributions`, we need `Sampleable`, `VariateForm`, and `ValueSupport`, three abstract types that define a distribution. Models in the interface are assumed to be subtypes of `Sampleable{VariateForm, ValueSupport}`. In this section our model is going be be extremely simple, so we will not end up using these except to make sure that the inference functions are dispatching correctly. + +We need the `MCMCChains.Chains` function, because at the end of sampling we need to be able to convert a vector of `<:AbstractTransitions` to an `MCMCChains` object. + +Lastly, we import the specific functions from `AbstractMCMC` that we need to overload. Because Metropolis-Hastings is so simple we only need a few functions/types: + +- `step!` +- `AbstractSampler` +- `AbstractTransition` +- `transition_type` + +More complex inference methods may need to import additional functions. For now, let's focus on the code: + +```julia +# Import specific functions and types to use or overload. +import MCMCChains: Chains +import AbstractMCMC: step!, AbstractSampler, AbstractTransition, transition_type, bundle_samples +``` + +### Sampler + +Let's begin our sampler definition by defining a sampler called `MetropolisHastings` which is a subtype of `AbstractSampler`. Correct typing is very important for proper interface implementation -- if you are missing a subtype, your method may not be dispatched to when you call `sample`. + +```julia +# Define a sampler type. +struct MetropolisHastings{T, D} <: AbstractSampler + init_θ :: T + proposal :: D +end + +# Default constructors. +MetropolisHastings(init_θ::Real) = MetropolisHastings(init_θ, Normal(0,1)) +MetropolisHastings(init_θ::Vector{<:Real}) = MetropolisHastings(init_θ, MvNormal(length(init_θ),1)) +``` + +Above, we have defined a sampler that stores the initial parameterization of the prior, and a distribution object from which proposals are drawn. You can have a struct that has no fields, and simply use it for dispatching onto the relevant functions, or you can store a large amount of state information in your sampler. + +The general intuition for what to store in your sampler struct is that anything you may need to perform inference between samples but you don't want to store in an `AbstractTransition` should go into the sampler struct. It's the only way you can carry non-sample related state information between `step!` calls. + +### Model + +Next, we need to have a model of some kind. A model is a struct that's a subtype of `AbstractModel` that contains whatever information is necessary to perform inference on your problem. In our case we want to know the mean and variance parameters for a standard Normal distribution, so we can keep our model to the log density of a Normal. + +Note that we only have to do this because we are not yet integrating the sampler with Turing -- Turing has a very sophisticated modelling engine that removes the need to define custom model structs. + +```julia +# Define a model type. Stores the log density function. +struct DensityModel{F<:Function} <: AbstractModel + ℓπ :: F +end + +# Default density constructor. +DensityModel(π::Function, data::T) where T = DensityModel{VariateForm, ValueSupport, T}(π, data) +``` + +### Transition + +The next step is to define some subtype of `AbstractTransition` which we will return from each `step!` call. We'll keep it simple by just defining a wrapper struct that contains the parameter draws and the log density of that draw: + +```julia +# Create a very basic Transition type, only stores the +# parameter draws and the log probability of the draw. +struct Transition{T<:Union{Vector{<:Real}, <:Real}, L<:Real} <: AbstractTransition + θ :: T + lp :: L +end + +# Store the new draw and its log density. +Transition(model::M, θ::T) where {M<:DensityModel, T} = Transition(θ, ℓπ(model, θ)) +``` + +`Transition` can now store any type of parameter, whether it's a vector of draws from multiple parameters or a single univariate draw. We should also tell the interface what specific subtype of `AbstractTransition` we're going to use, so we can just define a new method on `transition_type`: + +```julia +# Tell the interface what transition type we would like to use. +transition_type(model::DensityModel, spl::MetropolisHastings) = + typeof(Transition(spl.init_θ, ℓπ(model, spl.init_θ))) +``` + +### Metropolis-Hastings + +Now it's time to get into the actual inference. We've defined all of the core pieces we need, but we need to implement the `step!` function which actually perform inference. + +As a refresher, Metropolis-Hastings implements a very basic algorithm: + +1. Pick some initial state, $\theta_0$. +2. For $t$ in $[1,N]$, do + + a. Generate a proposal parameterization $θ'_t \sim q(\theta'_t \mid \theta_{t-1})$. + + b. Calculate the acceptance probability, $\alpha = \text{min}\Big[1,\frac{\pi(θ'_t)}{\pi(\theta_{t-1})} \frac{q(θ_{t-1} \mid θ'_t)}{q(θ'_t \mid θ_{t-1})}) \Big]$. + + c. If $U \le α$ where $U \sim [0,1]$, then $\theta_t = \theta'_t$. Otherwise, $\theta_t = \theta_{t-1}$. + +Of course, it's much easier to do this in the log space, so the acceptance probability is more commonly written as + +$$ +\alpha = \min\Big[\log \pi(θ'_t) - \log \pi(θ_{t-1}) + \log q(θ_{t-1} \mid θ'_t) - \log q(θ'_t \mid θ_{t-1}), 0\Big] +$$ + +In interface terms, we should do the following: + +1. Make a new transition containing a proposed sample. +2. Calculate the acceptance probability. +3. If we accept, return the new transition, otherwise, return the old one. + +### Steps + +The `step!` function is the function that performs the bulk of your inference. In our case, we will implement two `step!` functions -- one for the very first iteration, and one for every subsequent iteration. + +```julia +# Define the first step! function, which is called at the +# beginning of sampling. Return the initial parameter used +# to define the sampler. +function step!( + rng::AbstractRNG, + model::DensityModel, + spl::MetropolisHastings, + N::Integer; + kwargs... +) + return Transition(model, spl.init_θ) +end +``` + +The first `step!` function just packages up the initial parameterization inside the sampler, and returns it. We implicity accept the very first parameterization. + +The other `step!` function performs the usual steps from Metropolis-Hastings. Included are several helper functions, `proposal` and `q`, which are designed to replicate the functions in the pseudocode above. + +- `proposal` generates a new proposal in the form of a `Transition`, which can be univariate if the value passed in is univariate, or it can be multivariate if the `Transition` given is multivariate. Proposals use a basic `Normal` or `MvNormal` proposal distribution. +- `q` returns the log density of one parameterization conditional on another, according to the proposal distribution. +- `step!` generates a new proposal, checks the acceptance probability, and then returns either the previous transition or the proposed transition. + +```julia +# Define a function that makes a basic proposal depending on a univariate +# parameterization or a multivariate parameterization. +propose(spl::MetropolisHastings, model::DensityModel, θ::Real) = + Transition(model, θ + rand(spl.proposal)) +propose(spl::MetropolisHastings, model::DensityModel, θ::Vector{<:Real}) = + Transition(model, θ + rand(spl.proposal)) +propose(spl::MetropolisHastings, model::DensityModel, t::Transition) = propose(spl, model, t.θ) + +# Calculates the probability `q(θ | θcond)`, using the proposal distribution `spl.proposal`. +q(spl::MetropolisHastings, θ::Real, θcond::Real) = logpdf(spl.proposal, θ - θcond) +q(spl::MetropolisHastings, θ::Vector{<:Real}, θcond::Vector{<:Real}) = logpdf(spl.proposal, θ - θcond) +q(spl::MetropolisHastings, t1::Transition, t2::Transition) = q(spl, t1.θ, t2.θ) + +# Calculate the density of the model given some parameterization. +ℓπ(model::DensityModel, θ::T) where T = model.ℓπ(θ) +ℓπ(model::DensityModel, t::Transition) = t.lp + +# Define the other step function. Returns a Transition containing +# either a new proposal (if accepted) or the previous proposal +# (if not accepted). +function step!( + rng::AbstractRNG, + model::DensityModel, + spl::MetropolisHastings, + ::Integer, + θ_prev::Transition; + kwargs... +) + # Generate a new proposal. + θ = propose(spl, model, θ_prev) + + # Calculate the log acceptance probability. + α = ℓπ(model, θ) - ℓπ(model, θ_prev) + q(spl, θ_prev, θ) - q(spl, θ, θ_prev) + + # Decide whether to return the previous θ or the new one. + if log(rand(rng)) < min(α, 0.0) + return θ + else + return θ_prev + end +end +``` + +### Chains + +The last piece in our puzzle is a `bundle_samples` function, which accepts a `Vector{T<:Transition}` and returns an `MCMCChains.Chains` struct. Fortunately, our `Transition` is incredibly simple, and we only need to build a little bit of functionality to accept custom parameter names passed in by the user. + +```julia +# A basic chains constructor that works with the Transition struct we defined. +function bundle_samples( + rng::AbstractRNG, + ℓ::DensityModel, + s::MetropolisHastings, + N::Integer, + ts::Vector{T}; + param_names=missing, + kwargs... +) where {ModelType<:AbstractModel, T<:AbstractTransition} + # Turn all the transitions into a vector-of-vectors. + vals = copy(reduce(hcat,[vcat(t.θ, t.lp) for t in ts])') + + # Check if we received any parameter names. + if ismissing(param_names) + param_names = ["Parameter $i" for i in 1:(length(first(vals))-1)] + end + + # Add the log density field to the parameter names. + push!(param_names, "lp") + + # Bundle everything up and return a Chains struct. + return Chains(vals, param_names, (internals=["lp"],)) +end +``` + +All done! + +### Testing the implementation + +Now that we have all the pieces, we should test the implementation by defining a model to calculate the mean and variance parameters of a Normal distribution. We can do this by constructing a target density function, providing a sample of data, and then running the sampler with `sample`. + +```julia +# Generate a set of data from the posterior we want to estimate. +data = rand(Normal(5, 3), 30) + +# Define the components of a basic model. +insupport(θ) = θ[2] >= 0 +dist(θ) = Normal(θ[1], θ[2]) +density(θ) = insupport(θ) ? sum(logpdf.(dist(θ), data)) : -Inf + +# Construct a DensityModel. +model = DensityModel(density) + +# Set up our sampler with initial parameters. +spl = MetropolisHastings([0.0, 0.0]) + +# Sample from the posterior. +chain = sample(model, spl, 100000; param_names=["μ", "σ"]) +``` + +If all the interface functions have been extended properly, you should get an output from `display(chain)` that looks something like this: + +``` +Object of type Chains, with data of type 100000×3×1 Array{Float64,3} + +Iterations = 1:100000 +Thinning interval = 1 +Chains = 1 +Samples per chain = 100000 +internals = lp +parameters = μ, σ + +2-element Array{ChainDataFrame,1} + +Summary Statistics + +│ Row │ parameters │ mean │ std │ naive_se │ mcse │ ess │ r_hat │ +│ │ Symbol │ Float64 │ Float64 │ Float64 │ Float64 │ Any │ Any │ +├─────┼────────────┼─────────┼──────────┼────────────┼────────────┼─────────┼─────────┤ +│ 1 │ μ │ 5.33157 │ 0.854193 │ 0.0027012 │ 0.00893069 │ 8344.75 │ 1.00009 │ +│ 2 │ σ │ 4.54992 │ 0.632916 │ 0.00200146 │ 0.00534942 │ 14260.8 │ 1.00005 │ + +Quantiles + +│ Row │ parameters │ 2.5% │ 25.0% │ 50.0% │ 75.0% │ 97.5% │ +│ │ Symbol │ Float64 │ Float64 │ Float64 │ Float64 │ Float64 │ +├─────┼────────────┼─────────┼─────────┼─────────┼─────────┼─────────┤ +│ 1 │ μ │ 3.6595 │ 4.77754 │ 5.33182 │ 5.89509 │ 6.99651 │ +│ 2 │ σ │ 3.5097 │ 4.09732 │ 4.47805 │ 4.93094 │ 5.96821 │ +``` + +It looks like we're extremely close to our true parameters of `Normal(5,3)`, though with a fairly high variance due to the low sample size. + +## Conclusion + +We've seen how to implement the sampling interface for general projects. Turing's interface methods are ever-evolving, so please open an issue at [AbstractMCMC](https://github.com/TuringLang/AbstractMCMC.jl) with feature requests or problems. \ No newline at end of file diff --git a/src/Turing.jl b/src/Turing.jl index 7c2816d6cd..4b7a7abb33 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -12,6 +12,7 @@ using Requires, Reexport, ForwardDiff using Bijectors, StatsFuns, SpecialFunctions using Statistics, LinearAlgebra, ProgressMeter using Markdown, Libtask, MacroTools +using AbstractMCMC @reexport using Distributions, MCMCChains, Libtask using Tracker: Tracker @@ -31,12 +32,6 @@ const CACHERANGES = 0b01 const DEBUG = Bool(parse(Int, get(ENV, "DEBUG_TURING", "0"))) -# Include the interface. Temporary until the interface is moved -# to MCMCChains. CSP 2019-05-12 -include("interface/Interface.jl") -using .Interface -import .Interface: AbstractSampler - """ struct Model{pvars, dvars, F, TData, TDefaults} f::F @@ -52,7 +47,7 @@ struct Model{pvars, F, TData, TDefaults -} <: Sampleable{VariateForm,ValueSupport} # May need to find better types +} <: AbstractModel f::F data::TData defaults::TDefaults diff --git a/src/inference/Inference.jl b/src/inference/Inference.jl index c06390fc60..6e04047be9 100644 --- a/src/inference/Inference.jl +++ b/src/inference/Inference.jl @@ -12,14 +12,13 @@ using ..Turing: Model, runmodel!, get_pvars, get_dvars, using ..Turing: in_pvars, in_dvars, Turing using StatsFuns: logsumexp using Random: GLOBAL_RNG, AbstractRNG -using ..Turing.Interface +using AbstractMCMC import MCMCChains: Chains import AdvancedHMC; const AHMC = AdvancedHMC import ..Turing: getspace -import Distributions: sample import ..Core: getchunksize, getADtype -import ..Interface: AbstractTransition, sample, step!, sample_init!, +import AbstractMCMC: AbstractTransition, sample, step!, sample_init!, transitions_init, sample_end!, AbstractSampler, transition_type, callback, init_callback, AbstractCallback, psample @@ -140,28 +139,28 @@ const TURING_INTERNAL_VARS = (internals = [ # Default definitions for the interface # ######################################### -function Interface.sample( +function AbstractMCMC.sample( rng::AbstractRNG, model::ModelType, alg::AlgType, N::Integer; kwargs... ) where { - ModelType<:Sampleable, + ModelType<:AbstractModel, SamplerType<:AbstractSampler, AlgType<:InferenceAlgorithm } return sample(rng, model, Sampler(alg, model), N; progress=PROGRESS[], kwargs...) end -function Interface.sample( +function AbstractMCMC.sample( model::ModelType, alg::AlgType, N::Integer; resume_from=nothing, kwargs... ) where { - ModelType<:Sampleable, + ModelType<:AbstractModel, SamplerType<:AbstractSampler, AlgType<:InferenceAlgorithm } @@ -173,21 +172,21 @@ function Interface.sample( end -function Interface.psample( +function AbstractMCMC.psample( model::ModelType, alg::AlgType, N::Integer, n_chains::Integer; kwargs... ) where { - ModelType<:Sampleable, + ModelType<:AbstractModel, SamplerType<:AbstractSampler, AlgType<:InferenceAlgorithm } return psample(GLOBAL_RNG, model, alg, N, n_chains; progress=false, kwargs...) end -function Interface.psample( +function AbstractMCMC.psample( rng::AbstractRNG, model::ModelType, alg::AlgType, @@ -195,14 +194,14 @@ function Interface.psample( n_chains::Integer; kwargs... ) where { - ModelType<:Sampleable, + ModelType<:AbstractModel, SamplerType<:AbstractSampler, AlgType<:InferenceAlgorithm } return psample(rng, model, Sampler(alg, model), N, n_chains; progress=false, kwargs...) end -function Interface.sample_init!( +function AbstractMCMC.sample_init!( ::AbstractRNG, model::Model, spl::Sampler, @@ -216,7 +215,7 @@ function Interface.sample_init!( initialize_parameters!(spl; kwargs...) end -function Interface.sample_end!( +function AbstractMCMC.sample_end!( ::AbstractRNG, ::Model, spl::AbstractSampler, @@ -379,7 +378,7 @@ function get_transition_extras(ts::Vector{T}) where T<:AbstractTransition end # Default Chains constructor. -function Chains( +function AbstractMCMC.bundle_samples( rng::AbstractRNG, model::ModelType, spl::Sampler, @@ -388,7 +387,7 @@ function Chains( discard_adapt::Bool=true, save_state=false, kwargs... -) where {ModelType<:Sampleable, T<:AbstractTransition} +) where {ModelType<:AbstractModel, T<:AbstractTransition} # Check if we have adaptation samples. if discard_adapt && :n_adapts in fieldnames(typeof(spl.alg)) ts = ts[(spl.alg.n_adapts+1):end] From bf5f2e761ba961b9da3c3fac0952107aba98e1e1 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Sun, 1 Dec 2019 19:16:47 -0800 Subject: [PATCH 2/2] Remove interface call from DynamicHMC glue. --- src/contrib/inference/dynamichmc.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/contrib/inference/dynamichmc.jl b/src/contrib/inference/dynamichmc.jl index d83a61364c..afd771be61 100644 --- a/src/contrib/inference/dynamichmc.jl +++ b/src/contrib/inference/dynamichmc.jl @@ -1,4 +1,4 @@ -using ..Turing.Interface: init_callback, NoCallback +using AbstractMCMC: init_callback, NoCallback ### ### DynamicHMC backend - https://github.com/tpapp/DynamicHMC.jl