From 5f24ae3e16cdcff488bfe522f88da5b165436eb1 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 22 Apr 2020 19:09:44 -0700 Subject: [PATCH 01/35] Add Optim dependency --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index ace95523d9..4cc7334687 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -41,6 +42,7 @@ ForwardDiff = "0.10.3" Libtask = "0.3.1" LogDensityProblems = "^0.9, 0.10" MCMCChains = "3.0.7" +Optim = "0.20" ProgressLogging = "0.1" Reexport = "0.2.0" Requires = "0.5, 1.0" From f8edbab0e84525627c91745180bc48f4f5a1488e Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 22 Apr 2020 19:09:56 -0700 Subject: [PATCH 02/35] Export MLE/MAP --- src/Turing.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Turing.jl b/src/Turing.jl index 7f82bb4717..0a5a816f25 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -36,6 +36,8 @@ include("inference/Inference.jl") # inference algorithms using .Inference include("variational/VariationalInference.jl") using .Variational +include("modes/ModeEstimation.jl") +using .ModeEstimation # TODO: re-design `sample` interface in MCMCChains, which unify CmdStan and Turing. # Related: https://github.com/TuringLang/Turing.jl/issues/746 @@ -85,7 +87,7 @@ export @model, # modelling CSMC, PG, - vi, # variational inference + vi, # variational inference ADVI, sample, # inference @@ -108,5 +110,8 @@ export @model, # modelling LogPoisson, NamedDist, filldist, - arraydist + arraydist, + + MLE, # mode estimation tools + MAP end From 7f6a03b99cd95a8d2237244f54f34364af689e49 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 22 Apr 2020 19:10:09 -0700 Subject: [PATCH 03/35] Fix stupid _params_to_array behavior --- src/inference/Inference.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/inference/Inference.jl b/src/inference/Inference.jl index 4e21a9a583..83c5825308 100644 --- a/src/inference/Inference.jl +++ b/src/inference/Inference.jl @@ -265,18 +265,20 @@ end # Chain making utilities # ########################## -function _params_to_array(ts::Vector, spl::Sampler) - names_set = Set{String}() +function _params_to_array(ts::Vector) + names = Vector{String}() # Extract the parameter names and values from each transition. dicts = map(ts) do t nms, vs = flatten_namedtuple(t.θ) for nm in nms - push!(names_set, nm) + if !(nm in names) + push!(names, nm) + end end # Convert the names and values to a single dictionary. return Dict(nms[j] => vs[j] for j in 1:length(vs)) end - names = collect(names_set) + # names = collect(names_set) vals = [get(dicts[i], key, missing) for i in eachindex(dicts), (j, key) in enumerate(names)] @@ -356,7 +358,7 @@ function AbstractMCMC.bundle_samples( # Convert transitions to array format. # Also retrieve the variable names. - nms, vals = _params_to_array(ts, spl) + nms, vals = _params_to_array(ts) # Get the values of the extra parameters in each Transition struct. extra_params, extra_values = get_transition_extras(ts) From d682d9b95a352125c3d753307976b55ab5661af8 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 22 Apr 2020 19:10:18 -0700 Subject: [PATCH 04/35] Add MLE/MAP --- src/modes/ModeEstimation.jl | 235 ++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/modes/ModeEstimation.jl diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl new file mode 100644 index 0000000000..721f977a58 --- /dev/null +++ b/src/modes/ModeEstimation.jl @@ -0,0 +1,235 @@ +module ModeEstimation + +using ..Turing +using ..Bijectors +using LinearAlgebra + +import ..DynamicPPL +import Optim +import NamedArrays +import ..ForwardDiff + +export MAP, MLE + +""" + ModeResult{ + V<:NamedArrays.NamedArray, + M<:NamedArrays.NamedArray, + O<:Optim.MultivariateOptimizationResults, + S<:NamedArrays.NamedArray + } + +A wrapper struct to store various results from a MAP or MLE estimation. + +Fields: + +- `values` is a vector with the resulting point estimates +- `info_matrix` is the inverse Hessian +- `optim_result` is the stored Optim.jl results +- `summary_table` is a summary table with parameters, standard errors, and + t-statistics computed from the information matrix. +- `lp` is the final likelihood. +""" +struct ModeResult{ + V<:NamedArrays.NamedArray, + M<:Union{Missing, NamedArrays.NamedArray}, + O<:Optim.MultivariateOptimizationResults, + S<:NamedArrays.NamedArray +} + values :: V + info_matrix :: M + optim_result :: O + summary_table :: S + lp :: Float64 +end + +function Base.show(io::IO, m::ModeResult) + show(io, m.summary_table) +end + +""" + make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) + +Constructs a log density function that accepts a vector `z` and returns +a tuple (-likelihood, `varinfo`). The model is run using the provided +context `ctx`. +""" +function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) + # setup + varinfo_init = Turing.VarInfo(model) + spl = DynamicPPL.SampleFromPrior() + DynamicPPL.link!(varinfo_init, spl) + + function logπ(z; unlinked = false) + varinfo = DynamicPPL.VarInfo(varinfo_init, spl, z) + + unlinked && DynamicPPL.invlink!(varinfo_init, spl) + model(varinfo, spl, ctx) + unlinked && DynamicPPL.link!(varinfo_init, spl) + + return -DynamicPPL.getlogp(varinfo) + end + + return logπ +end + +""" + mode_estimation( + model::DynamicPPL.Model, + lpf; + optim_options=Optim.Options(), + kwargs... + ) + +An internal function that handles the computation of a MLE or MAP estimate. + +Arguments: + +- `model` is a `DynamicPPL.Model`. +- `lpf` is a function returned by `make_logjoint`. + +Optional arguments: + +- `optim_options` is a `Optim.Options` struct that allows you to change the number + of iterations run in an MLE estimate. + +""" +function mode_estimation( + model::DynamicPPL.Model, + lpf; + optim_options=Optim.Options(), + kwargs... +) + # Do some initialization. + b = bijector(model) + binv = inv(b) + + spl = DynamicPPL.SampleFromPrior() + vi = DynamicPPL.VarInfo(model) + init_params = model(vi, spl) + init_vals = vi[spl] + + # Construct target function. + target(x) = lpf(x) + hess_target(x) = lpf(x; unlinked=true) + + # Optimize! + M = Optim.optimize(target, init_vals, optim_options) + + # Retrieve the estimated values. + vals = binv(M.minimizer) + + # Get the VarInfo at the MLE/MAP point, and run the model to ensure + # correct dimensionality. + vi[spl] = vals + model(vi) # XXX: Is this a necessary step? + + # Make one transition to get the parameter names. + ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(vi), DynamicPPL.getlogp(vi))] + varnames, _ = Turing.Inference._params_to_array(ts) + + # Store the parameters and their names in an array. + vmat = NamedArrays.NamedArray(vals, varnames) + + # Try to generate the information matrix. + try + # Calculate Hessian and information matrix. + info = ForwardDiff.hessian(hess_target, vals) + info = inv(info) + mat = NamedArrays.NamedArray(info, (varnames, varnames)) + + # Create the standard errors. + ses = sqrt.(diag(info)) + + # Calculate t-stats. + tstat = vals ./ ses + + # Make a summary table. + stable = NamedArrays.NamedArray( + [vals ses tstat], + (varnames, ["parameter", "std_err", "tstat"])) + + # Return a wrapped-up table. + return ModeResult(vmat, mat, M, stable, M.minimum) + catch err + @warn "Could not compute Hessian matrix" err + stable = NamedArrays.NamedArray([vals repeat([missing], length(vals)) repeat([missing], length(vals))], (varnames, ["parameter", "std_err", "tstat"])) + return ModeResult(vmat, missing, M, stable, M.minimum) + end +end + +""" + MLE(model::DynamicPPL.Model; kwargs...) + +Returns a maximum likelihood estimate of the given `model`. + +Arguments: + +- `model` is a `DynamicPPL.Model`. + +Keyword arguments: + +- `optim_options` is a `Optim.Options` struct that allows you to change the number + of iterations run in an MLE estimate. + +Usage: + +```julia +using Turing + +@model function f() + m ~ Normal(0, 1) + 1.5 ~ Normal(m, 1) + 2.0 ~ Normal(m, 1) +end + +model = f() +mle_estimate = MLE(model) + +# Manually setting the optimizers settings. +mle_estimate = MLE(model, optim_options=Optim.Options(iterations=500)) +``` +""" +function MLE(model::DynamicPPL.Model; kwargs...) + lpf = make_logjoint(model, DynamicPPL.LikelihoodContext()) + return mode_estimation(model, lpf; kwargs...) +end + +""" + MAP(model::DynamicPPL.Model; kwargs...) + +Returns the maximum a posteriori estimate of the given `model`. + +Arguments: + +- `model` is a `DynamicPPL.Model`. + +Keyword arguments: + +- `optim_options` is a `Optim.Options` struct that allows you to change the number + of iterations run in an MLE estimate. + +Usage: + +```julia +using Turing + +@model function f() + m ~ Normal(0, 1) + 1.5 ~ Normal(m, 1) + 2.0 ~ Normal(m, 1) +end + +model = f() +mle_estimate = MAP(model) + +# Manually setting the optimizers settings. +mle_estimate = MAP(model, optim_options=Optim.Options(iterations=500)) +``` +""" +function MAP(model::DynamicPPL.Model; kwargs...) + lpf = make_logjoint(model, DynamicPPL.DefaultContext()) + return mode_estimation(model, lpf; kwargs...) +end + +end #module \ No newline at end of file From 3611719c895ffe226847cb8adff3dbc3209d1aff Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 23 Apr 2020 06:07:43 -0700 Subject: [PATCH 05/35] Update src/modes/ModeEstimation.jl Co-Authored-By: David Widmann --- src/modes/ModeEstimation.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 721f977a58..80d28e3048 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -50,9 +50,9 @@ end """ make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) -Constructs a log density function that accepts a vector `z` and returns -a tuple (-likelihood, `varinfo`). The model is run using the provided -context `ctx`. +Construct a log density function. + +The `model` is run using the provided context `ctx`. """ function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) # setup @@ -232,4 +232,4 @@ function MAP(model::DynamicPPL.Model; kwargs...) return mode_estimation(model, lpf; kwargs...) end -end #module \ No newline at end of file +end #module From 40c80dbe323be7fb1fa7c46692b722a96da40704 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 28 Apr 2020 10:23:09 -0700 Subject: [PATCH 06/35] Change optimizer. --- src/modes/ModeEstimation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 80d28e3048..44b0f55c9f 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -114,7 +114,7 @@ function mode_estimation( hess_target(x) = lpf(x; unlinked=true) # Optimize! - M = Optim.optimize(target, init_vals, optim_options) + M = Optim.optimize(target, init_vals, Optim.BFGS(), optim_options) # Retrieve the estimated values. vals = binv(M.minimizer) From 2ba1c81fe673875f26b86e38d2006f9b79784053 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Fri, 1 May 2020 06:13:58 -0700 Subject: [PATCH 07/35] Match csp/hessian-bug --- src/inference/Inference.jl | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/inference/Inference.jl b/src/inference/Inference.jl index eb8c55f032..a0f801b17f 100644 --- a/src/inference/Inference.jl +++ b/src/inference/Inference.jl @@ -522,26 +522,19 @@ end function get_matching_type( spl::AbstractSampler, vi::VarInfo, - ::Type{<:AbstractFloat}, -) - return floatof(eltype(vi, spl)) -end -function get_matching_type( - spl::Sampler{<:Hamiltonian}, - vi::VarInfo, ::Type{<:Union{Missing, AbstractFloat}}, ) return Union{Missing, floatof(eltype(vi, spl))} end function get_matching_type( - spl::Sampler{<:Hamiltonian}, + spl::AbstractSampler, vi::VarInfo, ::Type{<:AbstractFloat}, ) return floatof(eltype(vi, spl)) end function get_matching_type( - spl::Sampler{<:Hamiltonian}, + spl::AbstractSampler, vi::VarInfo, ::Type{TV}, ) where {T, N, TV <: Array{T, N}} From dc0dd1c4a984e2ac5db853e68afadf447fe937cc Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 7 May 2020 20:08:21 -0700 Subject: [PATCH 08/35] Addressing comments, fixing bugs, adding tests --- Project.toml | 21 ++- src/Turing.jl | 7 +- src/modes/ModeEstimation.jl | 257 ++++++++++++----------------------- test/modes/ModeEstimation.jl | 59 ++++++++ test/runtests.jl | 4 + 5 files changed, 172 insertions(+), 176 deletions(-) create mode 100644 test/modes/ModeEstimation.jl diff --git a/Project.toml b/Project.toml index e5c0b7f7a7..4f673b87a4 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,6 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -42,7 +41,7 @@ ForwardDiff = "0.10.3" Libtask = "0.4" LogDensityProblems = "^0.9, 0.10" MCMCChains = "3.0.7" -Optim = "0.20" +Optim = "0.20, 0.21" ProgressLogging = "0.1" Reexport = "0.2.0" Requires = "0.5, 1.0" @@ -58,6 +57,7 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -68,4 +68,19 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization"] +test = [ + "Pkg", + "PDMats", + "TerminalLoggers", + "Test", + "UnicodePlots", + "StatsBase", + "FiniteDifferences", + "DynamicHMC", + "CmdStan", + "BenchmarkTools", + "Zygote", + "ReverseDiff", + "Memoization", + "Optim" +] diff --git a/src/Turing.jl b/src/Turing.jl index 9b51ecbe36..beb5cd6172 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -36,8 +36,6 @@ include("inference/Inference.jl") # inference algorithms using .Inference include("variational/VariationalInference.jl") using .Variational -include("modes/ModeEstimation.jl") -using .ModeEstimation # TODO: re-design `sample` interface in MCMCChains, which unify CmdStan and Turing. # Related: https://github.com/TuringLang/Turing.jl/issues/746 @@ -60,6 +58,11 @@ using .ModeEstimation end end +@init @require Optim="429524aa-4258-5aef-a3af-852621145aeb" @eval begin + include("modes/ModeEstimation.jl") + export optimize +end + ########### # Exports # ########### diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 44b0f55c9f..903002b372 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -1,15 +1,52 @@ -module ModeEstimation - using ..Turing using ..Bijectors using LinearAlgebra import ..DynamicPPL +import ..DynamicPPL: Model, AbstractContext, VarInfo import Optim +import Optim: optimize import NamedArrays import ..ForwardDiff +import StatsBase +import Printf + +export MAP, MLE, optimize + +struct MLE end +struct MAP end + +""" + OptimLogDensity{M<:Model,C<:Context,V<:VarInfo} + +A struct that stores the log density function of a `DynamicPPL` model. +""" +struct OptimLogDensity{M<:Model,C<:AbstractContext,V<:VarInfo} + "A `DynamicPPL.Model` constructed either with the `@model` macro or manually." + model::M + "A `DynamicPPL.AbstractContext` used to evaluate the model. `LikelihoodContext` or `DefaultContext` are typical for MAP/MLE." + context::C + "A `DynamicPPL.VarInfo` struct that will be used to update model parameters." + init::V +end -export MAP, MLE +function OptimLogDensity(model::Model, context::AbstractContext) + init = VarInfo(model) + DynamicPPL.link!(init, SampleFromPrior()) + return OptimLogDensity(model, context, init) +end + +function (f::OptimLogDensity)(z; unlinked::Bool = false) + spl = DynamicPPL.SampleFromPrior() + + varinfo = DynamicPPL.VarInfo(f.init, spl, z) + + unlinked && DynamicPPL.invlink!(f.init, spl) + f.model(varinfo, spl, f.context) + unlinked && DynamicPPL.link!(f.init, spl) + + return -DynamicPPL.getlogp(varinfo) +end """ ModeResult{ @@ -20,86 +57,68 @@ export MAP, MLE } A wrapper struct to store various results from a MAP or MLE estimation. - -Fields: - -- `values` is a vector with the resulting point estimates -- `info_matrix` is the inverse Hessian -- `optim_result` is the stored Optim.jl results -- `summary_table` is a summary table with parameters, standard errors, and - t-statistics computed from the information matrix. -- `lp` is the final likelihood. """ struct ModeResult{ V<:NamedArrays.NamedArray, - M<:Union{Missing, NamedArrays.NamedArray}, - O<:Optim.MultivariateOptimizationResults, - S<:NamedArrays.NamedArray -} + O<:Optim.MultivariateOptimizationResults, + M<:OptimLogDensity +} <: StatsBase.StatisticalModel + "A vector with the resulting point estimates." values :: V - info_matrix :: M + "The stored Optim.jl results." optim_result :: O - summary_table :: S + "The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run." lp :: Float64 + "The evaluation function used to calculate the output." + f :: M end +############################# +# Various StatsBase methods # +############################# + function Base.show(io::IO, m::ModeResult) - show(io, m.summary_table) + println(io, Printf.@sprintf("ModeResult with minimized lp of %.2f", m.lp), "\n") + show(io, m.values) end -""" - make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) - -Construct a log density function. - -The `model` is run using the provided context `ctx`. -""" -function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) - # setup - varinfo_init = Turing.VarInfo(model) - spl = DynamicPPL.SampleFromPrior() - DynamicPPL.link!(varinfo_init, spl) - - function logπ(z; unlinked = false) - varinfo = DynamicPPL.VarInfo(varinfo_init, spl, z) - - unlinked && DynamicPPL.invlink!(varinfo_init, spl) - model(varinfo, spl, ctx) - unlinked && DynamicPPL.link!(varinfo_init, spl) +function StatsBase.coeftable(m::ModeResult) + # Get columns for coeftable. + terms = StatsBase.coefnames(m) + estimates = m.values.array[:,1] + stderrors = StatsBase.stderror(m) + tstats = estimates ./ stderrors - return -DynamicPPL.getlogp(varinfo) - end - - return logπ + StatsBase.CoefTable([estimates, stderrors, tstats], ["estimate", "stderror", "tstat"], terms) end -""" - mode_estimation( - model::DynamicPPL.Model, - lpf; - optim_options=Optim.Options(), - kwargs... - ) - -An internal function that handles the computation of a MLE or MAP estimate. +function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) + # Calculate Hessian and information matrix. + varnames = StatsBase.coefnames(m) + info = hessian_function(x -> m.f(x, unlinked=true), m.values.array[:, 1]) + info = inv(info) + return NamedArrays.NamedArray(info, (varnames, varnames)) +end -Arguments: +StatsBase.coef(m::ModeResult) = m.values +StatsBase.coefnames(m::ModeResult) = names(m.values)[1] +StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) +StatsBase.vcov(m::ModeResult) = StatsBase.informationmatrix(m) +StatsBase.loglikelihood(m::ModeResult) = m.lp -- `model` is a `DynamicPPL.Model`. -- `lpf` is a function returned by `make_logjoint`. +#################### +# Optim.jl methods # +#################### -Optional arguments: +function Optim.optimize(model::Model, ::MLE, args...; kwargs...) + return optimize(model, OptimLogDensity(model, DynamicPPL.LikelihoodContext()), args...; kwargs...) +end -- `optim_options` is a `Optim.Options` struct that allows you to change the number - of iterations run in an MLE estimate. +function Optim.optimize(model::Model, ::MAP, args...; kwargs...) + return optimize(model, OptimLogDensity(model, DynamicPPL.DefaultContext()), args...; kwargs...) +end -""" -function mode_estimation( - model::DynamicPPL.Model, - lpf; - optim_options=Optim.Options(), - kwargs... -) +function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) # Do some initialization. b = bijector(model) binv = inv(b) @@ -109,12 +128,8 @@ function mode_estimation( init_params = model(vi, spl) init_vals = vi[spl] - # Construct target function. - target(x) = lpf(x) - hess_target(x) = lpf(x; unlinked=true) - # Optimize! - M = Optim.optimize(target, init_vals, Optim.BFGS(), optim_options) + M = Optim.optimize(f, init_vals, args...; kwargs...) # Retrieve the estimated values. vals = binv(M.minimizer) @@ -131,105 +146,5 @@ function mode_estimation( # Store the parameters and their names in an array. vmat = NamedArrays.NamedArray(vals, varnames) - # Try to generate the information matrix. - try - # Calculate Hessian and information matrix. - info = ForwardDiff.hessian(hess_target, vals) - info = inv(info) - mat = NamedArrays.NamedArray(info, (varnames, varnames)) - - # Create the standard errors. - ses = sqrt.(diag(info)) - - # Calculate t-stats. - tstat = vals ./ ses - - # Make a summary table. - stable = NamedArrays.NamedArray( - [vals ses tstat], - (varnames, ["parameter", "std_err", "tstat"])) - - # Return a wrapped-up table. - return ModeResult(vmat, mat, M, stable, M.minimum) - catch err - @warn "Could not compute Hessian matrix" err - stable = NamedArrays.NamedArray([vals repeat([missing], length(vals)) repeat([missing], length(vals))], (varnames, ["parameter", "std_err", "tstat"])) - return ModeResult(vmat, missing, M, stable, M.minimum) - end -end - -""" - MLE(model::DynamicPPL.Model; kwargs...) - -Returns a maximum likelihood estimate of the given `model`. - -Arguments: - -- `model` is a `DynamicPPL.Model`. - -Keyword arguments: - -- `optim_options` is a `Optim.Options` struct that allows you to change the number - of iterations run in an MLE estimate. - -Usage: - -```julia -using Turing - -@model function f() - m ~ Normal(0, 1) - 1.5 ~ Normal(m, 1) - 2.0 ~ Normal(m, 1) -end - -model = f() -mle_estimate = MLE(model) - -# Manually setting the optimizers settings. -mle_estimate = MLE(model, optim_options=Optim.Options(iterations=500)) -``` -""" -function MLE(model::DynamicPPL.Model; kwargs...) - lpf = make_logjoint(model, DynamicPPL.LikelihoodContext()) - return mode_estimation(model, lpf; kwargs...) -end - -""" - MAP(model::DynamicPPL.Model; kwargs...) - -Returns the maximum a posteriori estimate of the given `model`. - -Arguments: - -- `model` is a `DynamicPPL.Model`. - -Keyword arguments: - -- `optim_options` is a `Optim.Options` struct that allows you to change the number - of iterations run in an MLE estimate. - -Usage: - -```julia -using Turing - -@model function f() - m ~ Normal(0, 1) - 1.5 ~ Normal(m, 1) - 2.0 ~ Normal(m, 1) -end - -model = f() -mle_estimate = MAP(model) - -# Manually setting the optimizers settings. -mle_estimate = MAP(model, optim_options=Optim.Options(iterations=500)) -``` -""" -function MAP(model::DynamicPPL.Model; kwargs...) - lpf = make_logjoint(model, DynamicPPL.DefaultContext()) - return mode_estimation(model, lpf; kwargs...) -end - -end #module + return ModeResult(vmat, M, -M.minimum, f) +end \ No newline at end of file diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl new file mode 100644 index 0000000000..79eb44e7f7 --- /dev/null +++ b/test/modes/ModeEstimation.jl @@ -0,0 +1,59 @@ +using Turing +using Optim +using Test +using StatsBase +using NamedArrays +using ReverseDiff +using Random + +dir = splitdir(splitdir(pathof(Turing))[1])[1] +include(dir*"/test/test_utils/AllUtils.jl") + +@testset "ModeEstimation.jl" begin + @testset "MLE" begin + Random.seed!(222) + + m1 = optimize(gdemo_default, MLE()) + m2 = optimize(gdemo_default, MLE(), NelderMead()) + m3 = optimize(gdemo_default, MLE(), Newton()) + + @test all(isapprox.(m1.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) + @test all(isapprox.(m2.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) + @test all(isapprox.(m3.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) + end + @testset "MAP" begin + Random.seed!(222) + + m1 = optimize(gdemo_default, MAP()) + m2 = optimize(gdemo_default, MAP(), NelderMead()) + m3 = optimize(gdemo_default, MAP(), Newton()) + + @test all(isapprox.(m1.values.array - [49/24, 7/6], 0.0, atol=0.01)) + @test all(isapprox.(m2.values.array - [49/24, 7/6], 0.0, atol=0.01)) + @test all(isapprox.(m3.values.array - [49/24, 7/6], 0.0, atol=0.01)) + end + + @testset "StatsBase integration" begin + Random.seed!(54321) + mle_est = optimize(gdemo_default, MLE()) + + @test coefnames(mle_est) == ["s", "m"] + + diffs = coef(mle_est).array - [0.0625031; 1.75001] + @test all(isapprox.(diffs, 0.0, atol=0.1)) + + infomat = [0.003907027690416608 4.157954948417027e-7; 4.157954948417027e-7 0.03125155528962335] + @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01)) + + ctable = coeftable(mle_est) + @test ctable isa StatsBase.CoefTable + + s = stderror(mle_est).array + @test all(isapprox.(s - [0.06250415643292194, 0.17677963626053916], 0.0, atol=0.01)) + + @test coefnames(mle_est) == params(mle_est) + @test vcov(mle_est) == informationmatrix(mle_est) + + @test isapprox(loglikelihood(mle_est), -0.0652883561466624, atol=0.01) + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 5776651bb6..3223d62077 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -53,4 +53,8 @@ include("test_utils/AllUtils.jl") @testset "utilities" begin # include("utilities/stan-interface.jl") end + + @testset "modes" begin + include("modes/ModeEstimation.jl") + end end From dca46e85dbb159dd706759b2870506476ebc1542 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 7 May 2020 20:11:16 -0700 Subject: [PATCH 09/35] Removed extraneous model call --- src/modes/ModeEstimation.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 903002b372..a93c11e94d 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -137,7 +137,6 @@ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS() # Get the VarInfo at the MLE/MAP point, and run the model to ensure # correct dimensionality. vi[spl] = vals - model(vi) # XXX: Is this a necessary step? # Make one transition to get the parameter names. ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(vi), DynamicPPL.getlogp(vi))] From e616eb87ac43ce1df84ff2fa86aeef588661922d Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 7 May 2020 20:16:57 -0700 Subject: [PATCH 10/35] Add docstringsd --- src/modes/ModeEstimation.jl | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index a93c11e94d..86cf899482 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -30,12 +30,23 @@ struct OptimLogDensity{M<:Model,C<:AbstractContext,V<:VarInfo} init::V end +""" + OptimLogDensity(model::Model, context::AbstractContext) + +Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. +""" function OptimLogDensity(model::Model, context::AbstractContext) init = VarInfo(model) DynamicPPL.link!(init, SampleFromPrior()) return OptimLogDensity(model, context, init) end +""" + (f::OptimLogDensity)(z; unlinked::Bool = false) + +Evaluate the log joint (with `DefaultContext`) or log likelihood (with `LikelihoodContext`) +at the array `z`. If `unlinked=true`, no change of variables will occur. +""" function (f::OptimLogDensity)(z; unlinked::Bool = false) spl = DynamicPPL.SampleFromPrior() @@ -110,14 +121,59 @@ StatsBase.loglikelihood(m::ModeResult) = m.lp # Optim.jl methods # #################### +""" + Optim.optimize(model::Model, ::MLE, args...; kwargs...) + +Compute a maximum likelihood estimate of the `model`. + +Example: + +```julia +@model function f(x) + m ~ Normal(0, 1) + x ~ Normal(m, 1) +end + +model = f(1.5) +mle = optimize(model, MLE()) + +# Use a different optimizer +mle = optimize(model, MLE(), NelderMeade()) +``` +""" function Optim.optimize(model::Model, ::MLE, args...; kwargs...) return optimize(model, OptimLogDensity(model, DynamicPPL.LikelihoodContext()), args...; kwargs...) end +""" + Optim.optimize(model::Model, ::MAP, args...; kwargs...) + +Compute a maximum a posterior estimate of the `model`. + +Example: + +```julia +@model function f(x) + m ~ Normal(0, 1) + x ~ Normal(m, 1) +end + +model = f(1.5) +map_est = optimize(model, MAP()) + +# Use a different optimizer +map_est = optimize(model, MAP(), NelderMeade()) +``` +""" function Optim.optimize(model::Model, ::MAP, args...; kwargs...) return optimize(model, OptimLogDensity(model, DynamicPPL.DefaultContext()), args...; kwargs...) end +""" + Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) + +Estimate a mode, i.e., compute a MLE or MAP estimate. +""" function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) # Do some initialization. b = bijector(model) From f9fa51f96b50bcfecefdac4fb0ba5361c18caefd Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Fri, 8 May 2020 05:30:55 -0700 Subject: [PATCH 11/35] Update src/modes/ModeEstimation.jl Co-authored-by: Martin Trapp --- src/modes/ModeEstimation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 86cf899482..e9691b14b0 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -174,7 +174,7 @@ end Estimate a mode, i.e., compute a MLE or MAP estimate. """ -function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) +function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) # Do some initialization. b = bijector(model) binv = inv(b) @@ -202,4 +202,4 @@ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS() vmat = NamedArrays.NamedArray(vals, varnames) return ModeResult(vmat, M, -M.minimum, f) -end \ No newline at end of file +end From 3fc51d991f79188c6ce93bfcdef45e8911f3a999 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Sun, 10 May 2020 16:51:07 -0700 Subject: [PATCH 12/35] Minor corrections. --- src/modes/ModeEstimation.jl | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 86cf899482..c146d379d7 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -52,11 +52,16 @@ function (f::OptimLogDensity)(z; unlinked::Bool = false) varinfo = DynamicPPL.VarInfo(f.init, spl, z) - unlinked && DynamicPPL.invlink!(f.init, spl) - f.model(varinfo, spl, f.context) - unlinked && DynamicPPL.link!(f.init, spl) - - return -DynamicPPL.getlogp(varinfo) + if unlinked + DynamicPPL.invlink!(f.init, spl) + f.model(varinfo, spl, f.context) + lp = -DynamicPPL.getlogp(varinfo) + DynamicPPL.link!(f.init, spl) + return lp + else + f.model(varinfo, spl, f.context) + return -DynamicPPL.getlogp(varinfo) + end end """ @@ -126,7 +131,7 @@ StatsBase.loglikelihood(m::ModeResult) = m.lp Compute a maximum likelihood estimate of the `model`. -Example: +# Examples ```julia @model function f(x) @@ -150,7 +155,7 @@ end Compute a maximum a posterior estimate of the `model`. -Example: +# Examples ```julia @model function f(x) @@ -170,11 +175,11 @@ function Optim.optimize(model::Model, ::MAP, args...; kwargs...) end """ - Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) + Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) Estimate a mode, i.e., compute a MLE or MAP estimate. """ -function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.BFGS(), args...; kwargs...) +function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) # Do some initialization. b = bijector(model) binv = inv(b) From b1a48613ebc574b86eb2174d8a5f2442669a0207 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 12 May 2020 10:22:22 -0700 Subject: [PATCH 13/35] Add NamedArrays to extras and compat --- Project.toml | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/Project.toml b/Project.toml index 4f673b87a4..16cc07b859 100644 --- a/Project.toml +++ b/Project.toml @@ -57,6 +57,7 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" +NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -68,19 +69,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = [ - "Pkg", - "PDMats", - "TerminalLoggers", - "Test", - "UnicodePlots", - "StatsBase", - "FiniteDifferences", - "DynamicHMC", - "CmdStan", - "BenchmarkTools", - "Zygote", - "ReverseDiff", - "Memoization", - "Optim" -] +test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization", "Optim", "NamedArrays"] From 4373c8829624acabab41969e60c1ac8fa9d0d90a Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 12 May 2020 10:31:04 -0700 Subject: [PATCH 14/35] Fix dependencies --- Project.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 16cc07b859..982962fcd1 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,9 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -57,8 +60,6 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" -NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" From 0a769b772e5e379270f55678af25f57f5242a559 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Mon, 18 May 2020 20:29:31 -0700 Subject: [PATCH 15/35] Update tests & address comments --- src/modes/ModeEstimation.jl | 178 ++++++++++++++++++++++++++++------- test/modes/ModeEstimation.jl | 58 ++++++++++-- 2 files changed, 195 insertions(+), 41 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index f3e037fdbd..cd1a78eafb 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -2,8 +2,11 @@ using ..Turing using ..Bijectors using LinearAlgebra +import ..AbstractMCMC: AbstractSampler import ..DynamicPPL -import ..DynamicPPL: Model, AbstractContext, VarInfo +import ..DynamicPPL: Model, AbstractContext, VarInfo, AbstractContext, VarName, + _getindex, getsym, getfield, settrans!, setorder!, + get_and_set_val!, istrans, tilde, dot_tilde import Optim import Optim: optimize import NamedArrays @@ -16,6 +19,101 @@ export MAP, MLE, optimize struct MLE end struct MAP end +""" + OptimizationContext{C<:AbstractContext} <: AbstractContext + +The `OptimizationContext` transforms variables to their constrained space, but +does not use the density with respect to the transformation. This context is +intended to allow an optimizer to sample in R^n freely. +""" +struct OptimizationContext{C<:AbstractContext} <: AbstractContext + context::C +end + +# assume +function DynamicPPL.tilde(ctx::OptimizationContext{<:LikelihoodContext}, spl, dist, vn::VarName, inds, vi) + if haskey(vi, vn) + # Always overwrite the parameters with new ones for `SampleFromUniform`. + if spl isa SampleFromUniform || is_flagged(vi, vn, "del") + unset_flag!(vi, vn, "del") + r = init(dist, spl) + vi[vn] = vectorize(dist, r) + settrans!(vi, false, vn) + setorder!(vi, vn, get_num_produce(vi)) + else + r = vi[vn] + end + else + r = init(dist, spl) + push!(vi, vn, r, dist, spl) + settrans!(vi, false, vn) + end + return r, 0 +end + +function DynamicPPL.tilde(ctx::OptimizationContext, spl, dist, vn::VarName, inds, vi) + if haskey(vi, vn) + # Always overwrite the parameters with new ones for `SampleFromUniform`. + if spl isa SampleFromUniform || is_flagged(vi, vn, "del") + unset_flag!(vi, vn, "del") + r = init(dist, spl) + vi[vn] = vectorize(dist, r) + settrans!(vi, false, vn) + setorder!(vi, vn, get_num_produce(vi)) + else + r = vi[vn] + end + else + r = init(dist, spl) + push!(vi, vn, r, dist, spl) + settrans!(vi, false, vn) + end + return r, Distributions.logpdf(dist, r) +end + +# observe +function DynamicPPL.tilde(ctx::OptimizationContext{<:PriorContext}, sampler, right, left, vi) + return 0 +end + +function DynamicPPL.tilde(ctx::OptimizationContext, sampler, dist, value, vi) + return Distributions.logpdf(dist, value) +end + +# dot assume +function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:LikelihoodContext}, sampler, right, left, vn::VarName, _, vi) + vns, dist = get_vns_and_dist(right, left, vn) + r = get_and_set_val!(vi, vns, dist, sampler) + return r, 0 +end + +function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn::VarName, _, vi) + vns, dist = get_vns_and_dist(right, left, vn) + r = get_and_set_val!(vi, vns, dist, sampler) + lp = sum(logpdf(dist, r)) + return r, lp +end + +# dot observe +function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:PriorContext}, sampler, right, left, vn, _, vi) + return 0 +end + +function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:PriorContext}, sampler, right, left, vi) + return 0 +end + +function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn, _, vi) + vns, dist = get_vns_and_dist(right, left, vn) + r = get_and_set_val!(vi, vns, dist, sampler) + lp = sum(logpdf(dist, r)) + return lp +end + +function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, dists, value, vi) + return sum(Distributions.logpdf.(dists, value)) +end + """ OptimLogDensity{M<:Model,C<:Context,V<:VarInfo} @@ -27,7 +125,9 @@ struct OptimLogDensity{M<:Model,C<:AbstractContext,V<:VarInfo} "A `DynamicPPL.AbstractContext` used to evaluate the model. `LikelihoodContext` or `DefaultContext` are typical for MAP/MLE." context::C "A `DynamicPPL.VarInfo` struct that will be used to update model parameters." - init::V + vi::V + "Whether the to evaluate the model with transformed variables." + linked::Bool end """ @@ -35,10 +135,10 @@ end Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. """ -function OptimLogDensity(model::Model, context::AbstractContext) - init = VarInfo(model) - DynamicPPL.link!(init, SampleFromPrior()) - return OptimLogDensity(model, context, init) +function OptimLogDensity(model::Model, context::AbstractContext, linked::Bool) + init = VarInfo(model, context) + linked && DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) + return OptimLogDensity(model, context, init, linked) end """ @@ -47,21 +147,33 @@ end Evaluate the log joint (with `DefaultContext`) or log likelihood (with `LikelihoodContext`) at the array `z`. If `unlinked=true`, no change of variables will occur. """ -function (f::OptimLogDensity)(z; unlinked::Bool = false) +function (f::OptimLogDensity)(z; linked::Bool = true) spl = DynamicPPL.SampleFromPrior() - varinfo = DynamicPPL.VarInfo(f.init, spl, z) + varinfo = DynamicPPL.VarInfo(f.vi, spl, z) + f.model(varinfo, spl, f.context) + return -DynamicPPL.getlogp(varinfo) +end - if unlinked - DynamicPPL.invlink!(f.init, spl) - f.model(varinfo, spl, f.context) - lp = -DynamicPPL.getlogp(varinfo) - DynamicPPL.link!(f.init, spl) - return lp - else - f.model(varinfo, spl, f.context) - return -DynamicPPL.getlogp(varinfo) - end +""" + unlink(f::OptimLogDensity) + +Generate an unlinked (with no variable transformions) version of an existing `OptimLogDensity`. +""" +function unlink(f::OptimLogDensity) + init = VarInfo(f.model, f.context) + return OptimLogDensity(f.model, f.context, init, false) +end + +""" + link(f::OptimLogDensity) + +Generate an linked (with variable transformions) version of an existing `OptimLogDensity`. +""" +function link(f::OptimLogDensity) + init = VarInfo(f.model, f.context) + DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) + return OptimLogDensity(f.model, f.context, init, false) end """ @@ -88,7 +200,6 @@ struct ModeResult{ "The evaluation function used to calculate the output." f :: M end - ############################# # Various StatsBase methods # ############################# @@ -111,8 +222,8 @@ end function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) # Calculate Hessian and information matrix. varnames = StatsBase.coefnames(m) - info = hessian_function(x -> m.f(x, unlinked=true), m.values.array[:, 1]) - info = inv(info) + f = unlink(m.f) + info = inv(hessian_function(x -> f(x), m.values.array[:, 1])) return NamedArrays.NamedArray(info, (varnames, varnames)) end @@ -147,7 +258,8 @@ mle = optimize(model, MLE(), NelderMeade()) ``` """ function Optim.optimize(model::Model, ::MLE, args...; kwargs...) - return optimize(model, OptimLogDensity(model, DynamicPPL.LikelihoodContext()), args...; kwargs...) + ctx = OptimizationContext(DynamicPPL.LikelihoodContext()) + return optimize(model, OptimLogDensity(model, ctx, true), args...; kwargs...) end """ @@ -171,7 +283,8 @@ map_est = optimize(model, MAP(), NelderMeade()) ``` """ function Optim.optimize(model::Model, ::MAP, args...; kwargs...) - return optimize(model, OptimLogDensity(model, DynamicPPL.DefaultContext()), args...; kwargs...) + ctx = OptimizationContext(DynamicPPL.DefaultContext()) + return optimize(model, OptimLogDensity(model, ctx, true), args...; kwargs...) end """ @@ -181,26 +294,21 @@ Estimate a mode, i.e., compute a MLE or MAP estimate. """ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) # Do some initialization. - b = bijector(model) - binv = inv(b) - spl = DynamicPPL.SampleFromPrior() - vi = DynamicPPL.VarInfo(model) - init_params = model(vi, spl) - init_vals = vi[spl] + init_params = model(f.vi, spl) + init_vals = f.vi[spl] # Optimize! - M = Optim.optimize(f, init_vals, args...; kwargs...) - - # Retrieve the estimated values. - vals = binv(M.minimizer) + M = Optim.optimize(f, init_vals, optimizer, args...; kwargs...) # Get the VarInfo at the MLE/MAP point, and run the model to ensure # correct dimensionality. - vi[spl] = vals + f.vi[spl] = M.minimizer + invlink!(f.vi, spl) + vals = f.vi[spl] # Make one transition to get the parameter names. - ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(vi), DynamicPPL.getlogp(vi))] + ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(f.vi), DynamicPPL.getlogp(f.vi))] varnames, _ = Turing.Inference._params_to_array(ts) # Store the parameters and their names in an array. diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl index 79eb44e7f7..607b95031f 100644 --- a/test/modes/ModeEstimation.jl +++ b/test/modes/ModeEstimation.jl @@ -5,6 +5,7 @@ using StatsBase using NamedArrays using ReverseDiff using Random +using LinearAlgebra dir = splitdir(splitdir(pathof(Turing))[1])[1] include(dir*"/test/test_utils/AllUtils.jl") @@ -17,10 +18,12 @@ include(dir*"/test/test_utils/AllUtils.jl") m2 = optimize(gdemo_default, MLE(), NelderMead()) m3 = optimize(gdemo_default, MLE(), Newton()) - @test all(isapprox.(m1.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) - @test all(isapprox.(m2.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) - @test all(isapprox.(m3.values.array - [0.0625031, 1.75], 0.0, atol=0.01)) + true_value = [0.0625031, 1.75] + @test all(isapprox.(m1.values.array - true_value, 0.0, atol=0.01)) + @test all(isapprox.(m2.values.array - true_value, 0.0, atol=0.01)) + @test all(isapprox.(m3.values.array - true_value, 0.0, atol=0.01)) end + @testset "MAP" begin Random.seed!(222) @@ -28,9 +31,10 @@ include(dir*"/test/test_utils/AllUtils.jl") m2 = optimize(gdemo_default, MAP(), NelderMead()) m3 = optimize(gdemo_default, MAP(), Newton()) - @test all(isapprox.(m1.values.array - [49/24, 7/6], 0.0, atol=0.01)) - @test all(isapprox.(m2.values.array - [49/24, 7/6], 0.0, atol=0.01)) - @test all(isapprox.(m3.values.array - [49/24, 7/6], 0.0, atol=0.01)) + true_value = [0.0625031, 1.75] + @test all(isapprox.(m1.values.array - true_value, 0.0, atol=0.01)) + @test all(isapprox.(m2.values.array - true_value, 0.0, atol=0.01)) + @test all(isapprox.(m3.values.array - true_value, 0.0, atol=0.01)) end @testset "StatsBase integration" begin @@ -56,4 +60,46 @@ include(dir*"/test/test_utils/AllUtils.jl") @test isapprox(loglikelihood(mle_est), -0.0652883561466624, atol=0.01) end + + @testset "Linear regression test" begin + @model function regtest(x, y) + beta ~ MvNormal(2,1) + mu = x*beta + y ~ MvNormal(mu, 1.0) + end + + Random.seed!(987) + true_beta = [1.0, -2.2] + x = rand(40, 2) + y = x*true_beta + + model = regtest(x, y) + mle = optimize(model, MLE()) + + vcmat = inv(x'x) + vcmat_mle = informationmatrix(mle).array + + @test isapprox(mle.values.array, true_beta) + @test isapprox(vcmat, vcmat_mle) + end + + @testset "Dot tilde test" begin + @model function dot_gdemo(x) + s ~ InverseGamma(2,3) + m ~ Normal(0, sqrt(s)) + + x .~ Normal(m, sqrt(s)) + end + + model_dot = dot_gdemo([1.5, 2.0]) + + mle1 = optimize(gdemo_default, MLE()) + mle2 = optimize(model_dot, MLE()) + + map1 = optimize(gdemo_default, MAP()) + map2 = optimize(model_dot, MAP()) + + @test isapprox(mle1.values.array, mle2.values.array) + @test isapprox(map1.values.array, map2.values.array) + end end \ No newline at end of file From e660fb596c1d82c291093153028c931cbe572012 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Mon, 18 May 2020 20:32:04 -0700 Subject: [PATCH 16/35] Correct Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 982962fcd1..9724e6290c 100644 --- a/Project.toml +++ b/Project.toml @@ -70,4 +70,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization", "Optim", "NamedArrays"] +test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization"] From 4fc1d73ef839e7d210d084f91c338bd220658a60 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Mon, 18 May 2020 20:46:09 -0700 Subject: [PATCH 17/35] Correct imports --- src/modes/ModeEstimation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index cd1a78eafb..c93284e7e1 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -160,7 +160,7 @@ end Generate an unlinked (with no variable transformions) version of an existing `OptimLogDensity`. """ -function unlink(f::OptimLogDensity) +function Bijectors.unlink(f::OptimLogDensity) init = VarInfo(f.model, f.context) return OptimLogDensity(f.model, f.context, init, false) end @@ -170,7 +170,7 @@ end Generate an linked (with variable transformions) version of an existing `OptimLogDensity`. """ -function link(f::OptimLogDensity) +function Bijectors.link(f::OptimLogDensity) init = VarInfo(f.model, f.context) DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) return OptimLogDensity(f.model, f.context, init, false) From cb56cd2de7b9f398d5619d78369cd7ee3a2510fb Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Mon, 18 May 2020 21:37:23 -0700 Subject: [PATCH 18/35] Renaming invlink --- src/modes/ModeEstimation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index c93284e7e1..b510a98d0b 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -160,7 +160,7 @@ end Generate an unlinked (with no variable transformions) version of an existing `OptimLogDensity`. """ -function Bijectors.unlink(f::OptimLogDensity) +function Bijectors.invlink(f::OptimLogDensity) init = VarInfo(f.model, f.context) return OptimLogDensity(f.model, f.context, init, false) end @@ -222,7 +222,7 @@ end function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) # Calculate Hessian and information matrix. varnames = StatsBase.coefnames(m) - f = unlink(m.f) + f = invlink(m.f) info = inv(hessian_function(x -> f(x), m.values.array[:, 1])) return NamedArrays.NamedArray(info, (varnames, varnames)) end From fab5b2d32ee74ba9ba2f51eb1c151985ec1849ef Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 19 May 2020 20:00:04 -0700 Subject: [PATCH 19/35] Address comments --- Project.toml | 4 +- src/Turing.jl | 2 +- src/modes/ModeEstimation.jl | 111 ++++++++++------------------------- test/modes/ModeEstimation.jl | 2 +- 4 files changed, 34 insertions(+), 85 deletions(-) diff --git a/Project.toml b/Project.toml index 9724e6290c..b1b5a86629 100644 --- a/Project.toml +++ b/Project.toml @@ -17,8 +17,6 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -60,6 +58,8 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" +NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" diff --git a/src/Turing.jl b/src/Turing.jl index beb5cd6172..0bbca7445f 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -60,7 +60,7 @@ end @init @require Optim="429524aa-4258-5aef-a3af-852621145aeb" @eval begin include("modes/ModeEstimation.jl") - export optimize + export MAP, MLE, optimize end ########### diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index b510a98d0b..d61e126244 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -14,8 +14,6 @@ import ..ForwardDiff import StatsBase import Printf -export MAP, MLE, optimize - struct MLE end struct MAP end @@ -32,42 +30,12 @@ end # assume function DynamicPPL.tilde(ctx::OptimizationContext{<:LikelihoodContext}, spl, dist, vn::VarName, inds, vi) - if haskey(vi, vn) - # Always overwrite the parameters with new ones for `SampleFromUniform`. - if spl isa SampleFromUniform || is_flagged(vi, vn, "del") - unset_flag!(vi, vn, "del") - r = init(dist, spl) - vi[vn] = vectorize(dist, r) - settrans!(vi, false, vn) - setorder!(vi, vn, get_num_produce(vi)) - else - r = vi[vn] - end - else - r = init(dist, spl) - push!(vi, vn, r, dist, spl) - settrans!(vi, false, vn) - end + r = vi[vn] return r, 0 end function DynamicPPL.tilde(ctx::OptimizationContext, spl, dist, vn::VarName, inds, vi) - if haskey(vi, vn) - # Always overwrite the parameters with new ones for `SampleFromUniform`. - if spl isa SampleFromUniform || is_flagged(vi, vn, "del") - unset_flag!(vi, vn, "del") - r = init(dist, spl) - vi[vn] = vectorize(dist, r) - settrans!(vi, false, vn) - setorder!(vi, vn, get_num_produce(vi)) - else - r = vi[vn] - end - else - r = init(dist, spl) - push!(vi, vn, r, dist, spl) - settrans!(vi, false, vn) - end + r = vi[vn] return r, Distributions.logpdf(dist, r) end @@ -83,15 +51,13 @@ end # dot assume function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:LikelihoodContext}, sampler, right, left, vn::VarName, _, vi) vns, dist = get_vns_and_dist(right, left, vn) - r = get_and_set_val!(vi, vns, dist, sampler) - return r, 0 + return vi[vn], 0 end function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn::VarName, _, vi) vns, dist = get_vns_and_dist(right, left, vn) - r = get_and_set_val!(vi, vns, dist, sampler) - lp = sum(logpdf(dist, r)) - return r, lp + r = vi[vn] + return r, loglikelihood(dist, r) end # dot observe @@ -106,8 +72,7 @@ end function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn, _, vi) vns, dist = get_vns_and_dist(right, left, vn) r = get_and_set_val!(vi, vns, dist, sampler) - lp = sum(logpdf(dist, r)) - return lp + return loglikelihood(dist, r) end function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, dists, value, vi) @@ -126,8 +91,6 @@ struct OptimLogDensity{M<:Model,C<:AbstractContext,V<:VarInfo} context::C "A `DynamicPPL.VarInfo` struct that will be used to update model parameters." vi::V - "Whether the to evaluate the model with transformed variables." - linked::Bool end """ @@ -135,19 +98,19 @@ end Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. """ -function OptimLogDensity(model::Model, context::AbstractContext, linked::Bool) - init = VarInfo(model, context) - linked && DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) - return OptimLogDensity(model, context, init, linked) +function OptimLogDensity(model::Model, context::AbstractContext) + init = VarInfo(model) + DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) + return OptimLogDensity(model, context, init) end """ - (f::OptimLogDensity)(z; unlinked::Bool = false) + (f::OptimLogDensity)(z) Evaluate the log joint (with `DefaultContext`) or log likelihood (with `LikelihoodContext`) -at the array `z`. If `unlinked=true`, no change of variables will occur. +at the array `z`. """ -function (f::OptimLogDensity)(z; linked::Bool = true) +function (f::OptimLogDensity)(z) spl = DynamicPPL.SampleFromPrior() varinfo = DynamicPPL.VarInfo(f.vi, spl, z) @@ -155,27 +118,6 @@ function (f::OptimLogDensity)(z; linked::Bool = true) return -DynamicPPL.getlogp(varinfo) end -""" - unlink(f::OptimLogDensity) - -Generate an unlinked (with no variable transformions) version of an existing `OptimLogDensity`. -""" -function Bijectors.invlink(f::OptimLogDensity) - init = VarInfo(f.model, f.context) - return OptimLogDensity(f.model, f.context, init, false) -end - -""" - link(f::OptimLogDensity) - -Generate an linked (with variable transformions) version of an existing `OptimLogDensity`. -""" -function Bijectors.link(f::OptimLogDensity) - init = VarInfo(f.model, f.context) - DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) - return OptimLogDensity(f.model, f.context, init, false) -end - """ ModeResult{ V<:NamedArrays.NamedArray, @@ -204,11 +146,19 @@ end # Various StatsBase methods # ############################# -function Base.show(io::IO, m::ModeResult) - println(io, Printf.@sprintf("ModeResult with minimized lp of %.2f", m.lp), "\n") + + +function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) + print(io, "ModeResult with minimized lp of ") + Printf.@printf(io, "%.2f", m.lp) + println(io) show(io, m.values) end +function Base.show(io::IO, m::ModeResult) + show(io, m.values.array) +end + function StatsBase.coeftable(m::ModeResult) # Get columns for coeftable. terms = StatsBase.coefnames(m) @@ -222,8 +172,7 @@ end function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) # Calculate Hessian and information matrix. varnames = StatsBase.coefnames(m) - f = invlink(m.f) - info = inv(hessian_function(x -> f(x), m.values.array[:, 1])) + info = inv(hessian_function(m.f, m.values.array[:, 1])) return NamedArrays.NamedArray(info, (varnames, varnames)) end @@ -244,7 +193,7 @@ Compute a maximum likelihood estimate of the `model`. # Examples -```julia +```julia-repl @model function f(x) m ~ Normal(0, 1) x ~ Normal(m, 1) @@ -254,12 +203,12 @@ model = f(1.5) mle = optimize(model, MLE()) # Use a different optimizer -mle = optimize(model, MLE(), NelderMeade()) +mle = optimize(model, MLE(), NelderMead()) ``` """ function Optim.optimize(model::Model, ::MLE, args...; kwargs...) ctx = OptimizationContext(DynamicPPL.LikelihoodContext()) - return optimize(model, OptimLogDensity(model, ctx, true), args...; kwargs...) + return optimize(model, OptimLogDensity(model, ctx), args...; kwargs...) end """ @@ -269,7 +218,7 @@ Compute a maximum a posterior estimate of the `model`. # Examples -```julia +```julia-repl @model function f(x) m ~ Normal(0, 1) x ~ Normal(m, 1) @@ -279,12 +228,12 @@ model = f(1.5) map_est = optimize(model, MAP()) # Use a different optimizer -map_est = optimize(model, MAP(), NelderMeade()) +map_est = optimize(model, MAP(), NelderMead()) ``` """ function Optim.optimize(model::Model, ::MAP, args...; kwargs...) ctx = OptimizationContext(DynamicPPL.DefaultContext()) - return optimize(model, OptimLogDensity(model, ctx, true), args...; kwargs...) + return optimize(model, OptimLogDensity(model, ctx), args...; kwargs...) end """ diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl index 607b95031f..60d38ea58c 100644 --- a/test/modes/ModeEstimation.jl +++ b/test/modes/ModeEstimation.jl @@ -31,7 +31,7 @@ include(dir*"/test/test_utils/AllUtils.jl") m2 = optimize(gdemo_default, MAP(), NelderMead()) m3 = optimize(gdemo_default, MAP(), Newton()) - true_value = [0.0625031, 1.75] + true_value = [49 / 54, 7 / 6] @test all(isapprox.(m1.values.array - true_value, 0.0, atol=0.01)) @test all(isapprox.(m2.values.array - true_value, 0.0, atol=0.01)) @test all(isapprox.(m3.values.array - true_value, 0.0, atol=0.01)) From 1f2e8cb8c6f688a4fd7dbf7910218f4296563873 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 19 May 2020 20:31:37 -0700 Subject: [PATCH 20/35] Remove Optim from compat --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index b1b5a86629..141b4bcc69 100644 --- a/Project.toml +++ b/Project.toml @@ -42,7 +42,6 @@ ForwardDiff = "0.10.3" Libtask = "0.4" LogDensityProblems = "^0.9, 0.10" MCMCChains = "3.0.7" -Optim = "0.20, 0.21" ProgressLogging = "0.1" Reexport = "0.2.0" Requires = "0.5, 1.0" From d97f2dc94f7a0520cf7fef71fde7b56a3ba1dc7d Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Tue, 19 May 2020 21:04:55 -0700 Subject: [PATCH 21/35] Minor correction --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 141b4bcc69..3886aa8f0a 100644 --- a/Project.toml +++ b/Project.toml @@ -42,6 +42,7 @@ ForwardDiff = "0.10.3" Libtask = "0.4" LogDensityProblems = "^0.9, 0.10" MCMCChains = "3.0.7" +Optim = "0.20, 0.21" ProgressLogging = "0.1" Reexport = "0.2.0" Requires = "0.5, 1.0" @@ -69,4 +70,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization"] +test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization", "Optim", "NamedArrays"] From 16bbdf6cc6a569375803a6e59a4337592fbaa5ed Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:14:54 -0700 Subject: [PATCH 22/35] Update src/modes/ModeEstimation.jl Co-authored-by: David Widmann --- src/modes/ModeEstimation.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index d61e126244..f1539700c0 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -50,7 +50,6 @@ end # dot assume function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:LikelihoodContext}, sampler, right, left, vn::VarName, _, vi) - vns, dist = get_vns_and_dist(right, left, vn) return vi[vn], 0 end From 403ddcdc744c682feba50ac53ebaffe568096b79 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:15:06 -0700 Subject: [PATCH 23/35] Update src/modes/ModeEstimation.jl Co-authored-by: David Widmann --- src/modes/ModeEstimation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index f1539700c0..238ea87d0e 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -25,7 +25,7 @@ does not use the density with respect to the transformation. This context is intended to allow an optimizer to sample in R^n freely. """ struct OptimizationContext{C<:AbstractContext} <: AbstractContext - context::C + context::C end # assume From 58fd2cf63890367136a7fd91b80837ef4a27c880 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:15:19 -0700 Subject: [PATCH 24/35] Update src/modes/ModeEstimation.jl Co-authored-by: David Widmann --- src/modes/ModeEstimation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 238ea87d0e..ab0bab1a35 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -31,7 +31,7 @@ end # assume function DynamicPPL.tilde(ctx::OptimizationContext{<:LikelihoodContext}, spl, dist, vn::VarName, inds, vi) r = vi[vn] - return r, 0 + return r, 0 end function DynamicPPL.tilde(ctx::OptimizationContext, spl, dist, vn::VarName, inds, vi) From 3eb52515749592bab25c984696f12e2bb65f1239 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:18:10 -0700 Subject: [PATCH 25/35] Use getval --- src/modes/ModeEstimation.jl | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index d61e126244..e1e5590295 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -51,12 +51,13 @@ end # dot assume function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:LikelihoodContext}, sampler, right, left, vn::VarName, _, vi) vns, dist = get_vns_and_dist(right, left, vn) - return vi[vn], 0 + r = getval(vi, vns) + return r, 0 end function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn::VarName, _, vi) vns, dist = get_vns_and_dist(right, left, vn) - r = vi[vn] + r = getval(vi, vns) return r, loglikelihood(dist, r) end @@ -71,7 +72,7 @@ end function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, right, left, vn, _, vi) vns, dist = get_vns_and_dist(right, left, vn) - r = get_and_set_val!(vi, vns, dist, sampler) + r = getval(vi, vns) return loglikelihood(dist, r) end @@ -79,6 +80,22 @@ function DynamicPPL.dot_tilde(ctx::OptimizationContext, sampler, dists, value, v return sum(Distributions.logpdf.(dists, value)) end +function getval( + vi, + vns::AbstractVector{<:VarName}, +) + r = vi[vns] + return r +end + +function getval( + vi, + vns::AbstractArray{<:VarName}, +) + r = reshape(vi[vec(vns)], size(vns)) + return r +end + """ OptimLogDensity{M<:Model,C<:Context,V<:VarInfo} From 0809329001546957b6a6679e64f4a0e26e6b3559 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:19:56 -0700 Subject: [PATCH 26/35] Update src/modes/ModeEstimation.jl Co-authored-by: David Widmann --- src/modes/ModeEstimation.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 7da9c93ad0..86d3b1eca9 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -116,9 +116,9 @@ end Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. """ function OptimLogDensity(model::Model, context::AbstractContext) - init = VarInfo(model) - DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) - return OptimLogDensity(model, context, init) + init = VarInfo(model) + DynamicPPL.link!(init, DynamicPPL.SampleFromPrior()) + return OptimLogDensity(model, context, init) end """ From 065bc184a67310ed6dd71a30b13ff01060b136a4 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 09:21:31 -0700 Subject: [PATCH 27/35] Tidying, fixing tests --- src/modes/ModeEstimation.jl | 1 - test/modes/ModeEstimation.jl | 34 ++++++++++++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 7da9c93ad0..7f513a42aa 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -261,7 +261,6 @@ Estimate a mode, i.e., compute a MLE or MAP estimate. function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) # Do some initialization. spl = DynamicPPL.SampleFromPrior() - init_params = model(f.vi, spl) init_vals = f.vi[spl] # Optimize! diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl index 60d38ea58c..a80f6e57ba 100644 --- a/test/modes/ModeEstimation.jl +++ b/test/modes/ModeEstimation.jl @@ -84,22 +84,24 @@ include(dir*"/test/test_utils/AllUtils.jl") end @testset "Dot tilde test" begin - @model function dot_gdemo(x) - s ~ InverseGamma(2,3) - m ~ Normal(0, sqrt(s)) - - x .~ Normal(m, sqrt(s)) + if VERSION >= v"1.0" + @model function dot_gdemo(x) + s ~ InverseGamma(2,3) + m ~ Normal(0, sqrt(s)) + + x .~ Normal(m, sqrt(s)) + end + + model_dot = dot_gdemo([1.5, 2.0]) + + mle1 = optimize(gdemo_default, MLE()) + mle2 = optimize(model_dot, MLE()) + + map1 = optimize(gdemo_default, MAP()) + map2 = optimize(model_dot, MAP()) + + @test isapprox(mle1.values.array, mle2.values.array) + @test isapprox(map1.values.array, map2.values.array) end - - model_dot = dot_gdemo([1.5, 2.0]) - - mle1 = optimize(gdemo_default, MLE()) - mle2 = optimize(model_dot, MLE()) - - map1 = optimize(gdemo_default, MAP()) - map2 = optimize(model_dot, MAP()) - - @test isapprox(mle1.values.array, mle2.values.array) - @test isapprox(map1.values.array, map2.values.array) end end \ No newline at end of file From 1e5aea809b9993b32931eb672cddbb99d197db9e Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 11:23:08 -0700 Subject: [PATCH 28/35] Replaced >= with >, because I am a fool --- test/modes/ModeEstimation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl index a80f6e57ba..ce8f97575e 100644 --- a/test/modes/ModeEstimation.jl +++ b/test/modes/ModeEstimation.jl @@ -84,7 +84,7 @@ include(dir*"/test/test_utils/AllUtils.jl") end @testset "Dot tilde test" begin - if VERSION >= v"1.0" + if VERSION > v"1.0" @model function dot_gdemo(x) s ~ InverseGamma(2,3) m ~ Normal(0, sqrt(s)) From 199c00c9b213c66b446e43b0d3095c3fef5fc600 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 13:17:27 -0700 Subject: [PATCH 29/35] Use function notation for .~ --- test/modes/ModeEstimation.jl | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/test/modes/ModeEstimation.jl b/test/modes/ModeEstimation.jl index ce8f97575e..016f1b8a6c 100644 --- a/test/modes/ModeEstimation.jl +++ b/test/modes/ModeEstimation.jl @@ -84,24 +84,22 @@ include(dir*"/test/test_utils/AllUtils.jl") end @testset "Dot tilde test" begin - if VERSION > v"1.0" - @model function dot_gdemo(x) - s ~ InverseGamma(2,3) - m ~ Normal(0, sqrt(s)) - - x .~ Normal(m, sqrt(s)) - end - - model_dot = dot_gdemo([1.5, 2.0]) - - mle1 = optimize(gdemo_default, MLE()) - mle2 = optimize(model_dot, MLE()) - - map1 = optimize(gdemo_default, MAP()) - map2 = optimize(model_dot, MAP()) - - @test isapprox(mle1.values.array, mle2.values.array) - @test isapprox(map1.values.array, map2.values.array) + @model function dot_gdemo(x) + s ~ InverseGamma(2,3) + m ~ Normal(0, sqrt(s)) + + (.~)(x, Normal(m, sqrt(s))) end + + model_dot = dot_gdemo([1.5, 2.0]) + + mle1 = optimize(gdemo_default, MLE()) + mle2 = optimize(model_dot, MLE()) + + map1 = optimize(gdemo_default, MAP()) + map2 = optimize(model_dot, MAP()) + + @test isapprox(mle1.values.array, mle2.values.array) + @test isapprox(map1.values.array, map2.values.array) end end \ No newline at end of file From bd0c1bb5daf74cd3b943c1b5867f475eebac7458 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 15:44:17 -0700 Subject: [PATCH 30/35] Link the model vi after extracting optimized vals --- src/modes/ModeEstimation.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 0b34c994d4..237bf38afd 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -271,6 +271,7 @@ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS( f.vi[spl] = M.minimizer invlink!(f.vi, spl) vals = f.vi[spl] + link!(f.vi, spl) # Make one transition to get the parameter names. ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(f.vi), DynamicPPL.getlogp(f.vi))] From 05ae0d4fc5011cf450bc19f0a68e22eea926d1d6 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Wed, 20 May 2020 16:21:46 -0700 Subject: [PATCH 31/35] Make sure linking status is right for Hessian --- src/modes/ModeEstimation.jl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 237bf38afd..14cb4c6841 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -188,8 +188,25 @@ end function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) # Calculate Hessian and information matrix. + + # Convert the values to their unconstrained states to make sure the + # Hessian is computed with respect to the untransformed parameters. + spl = DynamicPPL.SampleFromPrior() + + # NOTE: This should be converted to islinked(vi, spl) after + # https://github.com/TuringLang/DynamicPPL.jl/pull/124 goes through. + vns = DynamicPPL._getvns(m.f.vi, spl) + + linked = DynamicPPL._islinked(m.f.vi, vns) + linked && invlink!(m.f.vi, spl) + + # Calculate the Hessian. varnames = StatsBase.coefnames(m) info = inv(hessian_function(m.f, m.values.array[:, 1])) + + # Link it back if we invlinked it. + linked && link!(m.f.vi, spl) + return NamedArrays.NamedArray(info, (varnames, varnames)) end @@ -255,7 +272,7 @@ end """ Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) - +0 Estimate a mode, i.e., compute a MLE or MAP estimate. """ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) From 345776cf23601fdfc90a131f4bd05a641f040b35 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 21 May 2020 06:00:01 -0700 Subject: [PATCH 32/35] Update src/Turing.jl Co-authored-by: David Widmann --- src/Turing.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Turing.jl b/src/Turing.jl index 0bbca7445f..35fab3da37 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -115,8 +115,5 @@ export @model, # modelling LogPoisson, NamedDist, filldist, - arraydist, - - MLE, # mode estimation tools - MAP + arraydist end From a2b159ca37c1d001fdaf869052fbbbe88557e77c Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 21 May 2020 06:02:02 -0700 Subject: [PATCH 33/35] Update src/modes/ModeEstimation.jl Co-authored-by: David Widmann --- src/modes/ModeEstimation.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index 14cb4c6841..d0f199c05c 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -7,9 +7,9 @@ import ..DynamicPPL import ..DynamicPPL: Model, AbstractContext, VarInfo, AbstractContext, VarName, _getindex, getsym, getfield, settrans!, setorder!, get_and_set_val!, istrans, tilde, dot_tilde -import Optim -import Optim: optimize -import NamedArrays +import .Optim +import .Optim: optimize +import .NamedArrays import ..ForwardDiff import StatsBase import Printf From 856f282725cc77ce20425a62425ec9797ea7e5e2 Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 21 May 2020 06:14:11 -0700 Subject: [PATCH 34/35] Changer NamedArrays to dependency --- Project.toml | 5 +++-- src/modes/ModeEstimation.jl | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 3886aa8f0a..f441602174 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -42,6 +43,7 @@ ForwardDiff = "0.10.3" Libtask = "0.4" LogDensityProblems = "^0.9, 0.10" MCMCChains = "3.0.7" +NamedArrays = "0.9" Optim = "0.20, 0.21" ProgressLogging = "0.1" Reexport = "0.2.0" @@ -58,7 +60,6 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" -NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -70,4 +71,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization", "Optim", "NamedArrays"] +test = ["Pkg", "PDMats", "TerminalLoggers", "Test", "UnicodePlots", "StatsBase", "FiniteDifferences", "DynamicHMC", "CmdStan", "BenchmarkTools", "Zygote", "ReverseDiff", "Memoization", "Optim"] diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index d0f199c05c..fb0f65aa79 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -9,8 +9,8 @@ import ..DynamicPPL: Model, AbstractContext, VarInfo, AbstractContext, VarName, get_and_set_val!, istrans, tilde, dot_tilde import .Optim import .Optim: optimize -import .NamedArrays import ..ForwardDiff +import NamedArrays import StatsBase import Printf From 8cd87aaec559d44e0e3c3da685a82669e9d4a47e Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 21 May 2020 10:04:23 -0700 Subject: [PATCH 35/35] Add warning if no convergence occurred, adapt to DynamicPPL master --- src/modes/ModeEstimation.jl | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl index fb0f65aa79..00f8c505e3 100644 --- a/src/modes/ModeEstimation.jl +++ b/src/modes/ModeEstimation.jl @@ -6,7 +6,7 @@ import ..AbstractMCMC: AbstractSampler import ..DynamicPPL import ..DynamicPPL: Model, AbstractContext, VarInfo, AbstractContext, VarName, _getindex, getsym, getfield, settrans!, setorder!, - get_and_set_val!, istrans, tilde, dot_tilde + get_and_set_val!, istrans, tilde, dot_tilde, get_vns_and_dist import .Optim import .Optim: optimize import ..ForwardDiff @@ -29,6 +29,10 @@ struct OptimizationContext{C<:AbstractContext} <: AbstractContext end # assume +function DynamicPPL.tilde(rng, ctx::OptimizationContext, spl, dist, vn::VarName, inds, vi) + return DynamicPPL.tilde(ctx, spl, dist, vn, inds, vi) +end + function DynamicPPL.tilde(ctx::OptimizationContext{<:LikelihoodContext}, spl, dist, vn::VarName, inds, vi) r = vi[vn] return r, 0 @@ -39,7 +43,12 @@ function DynamicPPL.tilde(ctx::OptimizationContext, spl, dist, vn::VarName, inds return r, Distributions.logpdf(dist, r) end + # observe +function DynamicPPL.tilde(rng, ctx::OptimizationContext, sampler, right, left, vi) + return DynamicPPL.tilde(ctx, sampler, right, left, vi) +end + function DynamicPPL.tilde(ctx::OptimizationContext{<:PriorContext}, sampler, right, left, vi) return 0 end @@ -49,6 +58,10 @@ function DynamicPPL.tilde(ctx::OptimizationContext, sampler, dist, value, vi) end # dot assume +function DynamicPPL.dot_tilde(rng, ctx::OptimizationContext, sampler, right, left, vn::VarName, inds, vi) + return DynamicPPL.dot_tilde(ctx, sampler, right, left, vn, inds, vi) +end + function DynamicPPL.dot_tilde(ctx::OptimizationContext{<:LikelihoodContext}, sampler, right, left, vn::VarName, _, vi) vns, dist = get_vns_and_dist(right, left, vn) r = getval(vi, vns) @@ -202,7 +215,8 @@ function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff # Calculate the Hessian. varnames = StatsBase.coefnames(m) - info = inv(hessian_function(m.f, m.values.array[:, 1])) + H = hessian_function(m.f, m.values.array[:, 1]) + info = inv(H) # Link it back if we invlinked it. linked && link!(m.f.vi, spl) @@ -283,6 +297,11 @@ function Optim.optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS( # Optimize! M = Optim.optimize(f, init_vals, optimizer, args...; kwargs...) + # Warn the user if the optimization did not converge. + if !Optim.converged(M) + @warn "Optimization did not converge! You may need to correct your model or adjust the Optim parameters." + end + # Get the VarInfo at the MLE/MAP point, and run the model to ensure # correct dimensionality. f.vi[spl] = M.minimizer