From ac71f5061ad69c5de3bad64b23751e62a7e6db33 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 13:35:52 +0100 Subject: [PATCH 01/55] Make last to last! --- src/core/varinfo.jl | 7 +------ src/samplers/support/hmc_core.jl | 4 +++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index d84d887a17..fb67637b91 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -73,7 +73,6 @@ setval!(vi::VarInfo, val::Any, view::Vector{UnitRange}) = map(v->vi.vals[end][v] getall(vi::VarInfo) = vi.vals[end] setall!(vi::VarInfo, val::Any) = vi.vals[end] = val - getsym(vi::VarInfo, vn::VarName) = vi.vns[getidx(vi, vn)].sym getdist(vi::VarInfo, vn::VarName) = vi.dists[getidx(vi, vn)] @@ -201,11 +200,7 @@ end ################################# expand!(vi::VarInfo) = push!(vi.vals, deepcopy(vi.vals[end])) -last(_vi::VarInfo) = begin - vi = deepcopy(_vi) - splice!(vi.vals, 1:length(vi.vals)-1) - vi -end +last!(vi::VarInfo) = splice!(vi.vals, 1:length(vi.vals)-1) # Get all indices of variables belonging to gid or 0 getidcs(vi::VarInfo) = getidcs(vi, nothing) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 17e706bcf8..40a49548da 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -48,8 +48,10 @@ function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Functio τ_valid += 1 end + last!(vi) + # Return updated θ and momentum - last(vi), p, τ_valid + vi, p, τ_valid end # Compute Hamiltonian From 557df1ad77cf89491748668d524d82bd768a764d Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 13:36:37 +0100 Subject: [PATCH 02/55] Remove old comments --- src/core/varinfo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index fb67637b91..d2325a0442 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -209,7 +209,6 @@ getidcs(vi::VarInfo, spl::Sampler) = begin # NOTE: 0b00 is the sanity flag for # |\____ getidcs (mask = 0b10) # \_____ getranges (mask = 0b01) - # TODO: set these as constants if ~haskey(spl.info, :cache_updated) spl.info[:cache_updated] = CACHERESET end if haskey(spl.info, :idcs) && (spl.info[:cache_updated] & CACHEIDCS) > 0 spl.info[:idcs] From f4ebf85d47b4a5ecd9a756dd34c2b9175fa1383e Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 14:21:55 +0100 Subject: [PATCH 03/55] Improve transformation interface --- src/core/varinfo.jl | 9 +++++---- src/samplers/hmc.jl | 2 +- src/samplers/sampler.jl | 2 +- test/utility.jl | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index d2325a0442..c041a38d4d 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -81,7 +81,8 @@ getgid(vi::VarInfo, vn::VarName) = vi.gids[getidx(vi, vn)] setgid!(vi::VarInfo, gid::Int, vn::VarName) = vi.gids[getidx(vi, vn)] = gid -istransformed(vi::VarInfo, vn::VarName) = vi.trans[getidx(vi, vn)] +istrans(vi::VarInfo, vn::VarName) = vi.trans[getidx(vi, vn)] +settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[getidx(vi, vn)] = trans isempty(vi::VarInfo) = isempty(vi.idcs) @@ -92,7 +93,7 @@ function link(_vi::VarInfo, spl::Sampler) for vn in gvns dist = getdist(vi, vn) setval!(vi, vectorize(dist, link(dist, reconstruct(dist, getval(vi, vn)))), vn) - vi.trans[getidx(vi, vn)] = true + settrans!(vi, true, vn) end vi end @@ -104,7 +105,7 @@ function invlink(_vi::VarInfo, spl::Sampler) for vn in gvns dist = getdist(vi, vn) setval!(vi, vectorize(dist, invlink(dist, reconstruct(dist, getval(vi, vn)))), vn) - vi.trans[getidx(vi, vn)] = false + settrans!(vi, false, vn) end vi end @@ -127,7 +128,7 @@ Base.getindex(vi::VarInfo, vn::VarName) = begin @assert haskey(vi, vn) "[Turing] attempted to replay unexisting variables in VarInfo" dist = getdist(vi, vn) r = reconstruct(dist, getval(vi, vn)) - r = istransformed(vi, vn) ? invlink(dist, r) : r + r = istrans(vi, vn) ? invlink(dist, r) : r end # Base.setindex!(vi::VarInfo, r, vn::VarName) = begin diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 3ba5684248..750a2c6cdd 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -101,7 +101,7 @@ function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName dprintln(2, "assuming...") updategid!(vi, vn, spl) r = vi[vn] - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + vi.logp += logpdf(dist, r, istrans(vi, vn)) r end diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 8d5f419a98..40b2a267fd 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -33,7 +33,7 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin r = rand(dist) push!(vi, vn, r, dist, 0) end - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + vi.logp += logpdf(dist, r, istrans(vi, vn)) r end diff --git a/test/utility.jl b/test/utility.jl index 0c95ae359d..ef514a3578 100644 --- a/test/utility.jl +++ b/test/utility.jl @@ -13,7 +13,7 @@ randr(vi::VarInfo, vn::VarName, dist::Distribution, count::Bool) = begin else if count checkindex(vn, vi) end r = vi[vn] - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + vi.logp += logpdf(dist, r, istrans(vi, vn)) r end end From 2daf6cc5a95135a11da17b0f202e5ba4e5b09d43 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 14:27:36 +0100 Subject: [PATCH 04/55] Make VarInfo.trans a vector-of-vector --- src/core/varinfo.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index c041a38d4d..ed6bee21a2 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -38,7 +38,7 @@ type VarInfo vals :: Vector{Vector{Real}} dists :: Vector{Distribution} gids :: Vector{Int} # group ids - trans :: Vector{Bool} + trans :: Vector{Vector{Bool}} logp :: Real # NOTE: logp should be a vector. logw :: Real # NOTE: importance weight when sampling from the prior. index :: Int # index of current randomness @@ -50,7 +50,7 @@ type VarInfo Vector{Vector{Real}}(), Vector{Distribution}(), Vector{Int}(), - Vector{Bool}(), + Vector{Vector{Bool}}(), 0.0,0.0, 0, 0 @@ -81,8 +81,8 @@ getgid(vi::VarInfo, vn::VarName) = vi.gids[getidx(vi, vn)] setgid!(vi::VarInfo, gid::Int, vn::VarName) = vi.gids[getidx(vi, vn)] = gid -istrans(vi::VarInfo, vn::VarName) = vi.trans[getidx(vi, vn)] -settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[getidx(vi, vn)] = trans +istrans(vi::VarInfo, vn::VarName) = vi.trans[end][getidx(vi, vn)] +settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[end][getidx(vi, vn)] = trans isempty(vi::VarInfo) = isempty(vi.idcs) @@ -175,6 +175,7 @@ push!(vi::VarInfo, vn::VarName, r::Any, dist::Distribution, gid::Int) = begin @assert ~(vn in vns(vi)) "[push!] attempt to add an exisitng variable $(sym(vn)) ($(vn)) to VarInfo (keys=$(keys(vi))) with dist=$dist, gid=$gid" if isempty(vi.vals) push!(vi.vals, Vector{Real}()) end + if isempty(vi.trans) push!(vi.trans, Vector{Bool}()) end val = vectorize(dist, r) @@ -185,7 +186,7 @@ push!(vi::VarInfo, vn::VarName, r::Any, dist::Distribution, gid::Int) = begin append!(vi.vals[end], val) push!(vi.dists, dist) push!(vi.gids, gid) - push!(vi.trans, false) + push!(vi.trans[end], false) vi end From 0b201d23adbf73a60232e9e9544351cf06336238 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 14:32:08 +0100 Subject: [PATCH 05/55] Remove deepcopy from link/invlink --- src/core/varinfo.jl | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index ed6bee21a2..f7e9fc3223 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -87,26 +87,28 @@ settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[end][getidx(vi, vn)] isempty(vi::VarInfo) = isempty(vi.idcs) # X -> R for all variables associated with given sampler -function link(_vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) +function link(vi::VarInfo, spl::Sampler) + expand!(vi) gvns = getvns(vi, spl) for vn in gvns dist = getdist(vi, vn) setval!(vi, vectorize(dist, link(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, true, vn) end + last!(vi) vi end # R -> X for all variables associated with given sampler -function invlink(_vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) +function invlink(vi::VarInfo, spl::Sampler) + expand!(vi) gvns = getvns(vi, spl) for vn in gvns dist = getdist(vi, vn) setval!(vi, vectorize(dist, invlink(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, false, vn) end + last!(vi) vi end @@ -201,8 +203,15 @@ end # Utility functions for VarInfo # ################################# -expand!(vi::VarInfo) = push!(vi.vals, deepcopy(vi.vals[end])) -last!(vi::VarInfo) = splice!(vi.vals, 1:length(vi.vals)-1) +expand!(vi::VarInfo) = begin + push!(vi.vals, deepcopy(vi.vals[end])) + push!(vi.trans, deepcopy(vi.trans[end])) +end + +last!(vi::VarInfo) = begin + splice!(vi.vals, 1:length(vi.vals)-1) + splice!(vi.trans, 1:length(vi.trans)-1) +end # Get all indices of variables belonging to gid or 0 getidcs(vi::VarInfo) = getidcs(vi, nothing) From 478d9fd0c1b5fd43ef102e900b2a46b556b8d787 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 14:42:19 +0100 Subject: [PATCH 06/55] Use realpart in expand --- src/core/varinfo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index f7e9fc3223..969716417c 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -204,11 +204,12 @@ end ################################# expand!(vi::VarInfo) = begin - push!(vi.vals, deepcopy(vi.vals[end])) + push!(vi.vals, realpart(vi.vals[end])) push!(vi.trans, deepcopy(vi.trans[end])) end last!(vi::VarInfo) = begin + # QUESTION: is splice! slow? splice!(vi.vals, 1:length(vi.vals)-1) splice!(vi.trans, 1:length(vi.trans)-1) end From 9d1c21193b0d22dde1b8c6ae9a75772ea57ad4d3 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 15:13:38 +0100 Subject: [PATCH 07/55] Improve last! --- src/core/varinfo.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index 969716417c..ebbb59c8dd 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -209,9 +209,8 @@ expand!(vi::VarInfo) = begin end last!(vi::VarInfo) = begin - # QUESTION: is splice! slow? - splice!(vi.vals, 1:length(vi.vals)-1) - splice!(vi.trans, 1:length(vi.trans)-1) + vi.vals = [vi.vals[end]] + vi.trans = [vi.trans[end]] end # Get all indices of variables belonging to gid or 0 From efe151a88dfa223be13bc1d92d086e017a4f9339 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 16:36:21 +0100 Subject: [PATCH 08/55] Make VarInfo.logp into a vector --- src/core/ad.jl | 4 +-- src/core/varinfo.jl | 57 ++++++++++++++++++++------------ src/samplers/hmc.jl | 4 +-- src/samplers/is.jl | 2 +- src/samplers/pgibbs.jl | 4 +-- src/samplers/sampler.jl | 8 ++--- src/samplers/support/helper.jl | 2 +- src/samplers/support/hmc_core.jl | 14 ++++---- test/utility.jl | 4 +-- 9 files changed, 57 insertions(+), 42 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index 88b8aa55e4..1db3a0f3eb 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -17,7 +17,7 @@ gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin f(x::Vector) = begin vi[spl] = x - -runmodel(model, vi, spl).logp + -getlogp(runmodel(model, vi, spl)) end g = x -> ForwardDiff.gradient(f, x::Vector, @@ -77,7 +77,7 @@ gradient2(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi = runmodel(model, vi, spl) # Collect gradient dprintln(4, "collect gradients from logp...") - append!(grad, collect(dualpart(-vi.logp))) + append!(grad, collect(dualpart(-getlogp(vi)))) end grad diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index ebbb59c8dd..b3be33d53a 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -37,24 +37,30 @@ type VarInfo ranges :: Vector{UnitRange{Int}} vals :: Vector{Vector{Real}} dists :: Vector{Distribution} - gids :: Vector{Int} # group ids + gids :: Vector{Int} trans :: Vector{Vector{Bool}} - logp :: Real # NOTE: logp should be a vector. + logp :: Vector{Real} logw :: Real # NOTE: importance weight when sampling from the prior. index :: Int # index of current randomness num_produce :: Int # num of produce calls from trace, each produce corresponds to an observe. - VarInfo() = new( - Dict{VarName, Int}(), - Vector{VarName}(), - Vector{UnitRange{Int}}(), - Vector{Vector{Real}}(), - Vector{Distribution}(), - Vector{Int}(), - Vector{Vector{Bool}}(), - 0.0,0.0, - 0, - 0 - ) + VarInfo() = begin + vals = Vector{Vector{Real}}(); push!(vals, Vector{Real}()) + trans = Vector{Vector{Real}}(); push!(trans, Vector{Real}()) + logp = Vector{Real}(); push!(logp, zero(Real)) + + new( + Dict{VarName, Int}(), + Vector{VarName}(), + Vector{UnitRange{Int}}(), + vals, + Vector{Distribution}(), + Vector{Int}(), + trans, logp, + zero(Real), + 0, + 0 + ) + end end typealias VarView Union{Int,UnitRange,Vector{Int},Vector{UnitRange}} @@ -84,6 +90,10 @@ setgid!(vi::VarInfo, gid::Int, vn::VarName) = vi.gids[getidx(vi, vn)] = gid istrans(vi::VarInfo, vn::VarName) = vi.trans[end][getidx(vi, vn)] settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[end][getidx(vi, vn)] = trans +getlogp(vi::VarInfo) = vi.logp[end] +setlogp!(vi::VarInfo, logp::Real) = vi.logp[end] = logp +acclogp!(vi::VarInfo, logp::Real) = vi.logp[end] += logp + isempty(vi::VarInfo) = isempty(vi.idcs) # X -> R for all variables associated with given sampler @@ -117,7 +127,7 @@ function cleandual!(vi::VarInfo) range = getrange(vi, vn) vi[range] = realpart(vi[range]) end - vi.logp = realpart(vi.logp) + setlogp!(vi, realpart(getlogp(vi))) vi.logw = realpart(vi.logw) end @@ -176,9 +186,6 @@ push!(vi::VarInfo, vn::VarName, r::Any, dist::Distribution, gid::Int) = begin @assert ~(vn in vns(vi)) "[push!] attempt to add an exisitng variable $(sym(vn)) ($(vn)) to VarInfo (keys=$(keys(vi))) with dist=$dist, gid=$gid" - if isempty(vi.vals) push!(vi.vals, Vector{Real}()) end - if isempty(vi.trans) push!(vi.trans, Vector{Bool}()) end - val = vectorize(dist, r) vi.idcs[vn] = length(vi.idcs) + 1 @@ -204,13 +211,21 @@ end ################################# expand!(vi::VarInfo) = begin - push!(vi.vals, realpart(vi.vals[end])) + push!(vi.vals, realpart(vi.vals[end])); vi.vals[end], vi.vals[end-1] = vi.vals[end-1], vi.vals[end] push!(vi.trans, deepcopy(vi.trans[end])) + push!(vi.logp, zero(Real)) +end + +shrink!(vi::VarInfo) = begin + pop!(vi.vals) + pop!(vi.trans) + pop!(vi.logp) end last!(vi::VarInfo) = begin - vi.vals = [vi.vals[end]] - vi.trans = [vi.trans[end]] + vi.vals = vi.vals[end:end] + vi.trans = vi.trans[end:end] + vi.logp = vi.logp[end:end] end # Get all indices of variables belonging to gid or 0 diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 750a2c6cdd..2b32acc4d7 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -101,11 +101,11 @@ function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName dprintln(2, "assuming...") updategid!(vi, vn, spl) r = vi[vn] - vi.logp += logpdf(dist, r, istrans(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end function observe{T<:Hamiltonian}(spl::Sampler{T}, d::Distribution, value::Any, vi::VarInfo) dprintln(2, "observing...") - vi.logp += logpdf(d, map(x -> Dual(x), value)) + acclogp!(vi, logpdf(d, map(x -> Dual(x), value))) end diff --git a/src/samplers/is.jl b/src/samplers/is.jl index 4a1dadd3da..fe2337582a 100644 --- a/src/samplers/is.jl +++ b/src/samplers/is.jl @@ -57,4 +57,4 @@ assume(spl::Sampler{IS}, dist::Distribution, vn::VarName, vi::VarInfo) = begin end observe(spl::Sampler{IS}, dist::Distribution, value::Any, vi::VarInfo) = - vi.logp += logpdf(dist, value) + acclogp!(vi, logpdf(dist, value)) diff --git a/src/samplers/pgibbs.jl b/src/samplers/pgibbs.jl index 11669cfd98..1357c7303a 100644 --- a/src/samplers/pgibbs.jl +++ b/src/samplers/pgibbs.jl @@ -50,7 +50,7 @@ step(model::Function, spl::Sampler{PG}, vi::VarInfo, _::Bool) = step(model, spl, step(model::Function, spl::Sampler{PG}, vi::VarInfo) = begin particles = ParticleContainer{TraceR}(model) - vi.index = 0; vi.num_produce = 0; # We need this line cause fork2 deepcopy `vi`. + vi.index = 0; vi.num_produce = 0; # We need this line cause fork2 deepcopy `vi`. ref_particle = isempty(vi) ? nothing : fork2(TraceR(model, spl, vi)) @@ -121,6 +121,6 @@ end observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) = begin lp = logpdf(dist, value) - vi.logp += lp + acclogp!(vi, lp) produce(lp) end diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 40b2a267fd..2feaffc52b 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -33,12 +33,12 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin r = rand(dist) push!(vi, vn, r, dist, 0) end - vi.logp += logpdf(dist, r, istrans(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end -observe(spl::Void, d::Distribution, value::Any, vi::VarInfo) = begin - lp = logpdf(d, value) +observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = begin + lp = logpdf(dist, value) vi.logw += lp - vi.logp += lp + acclogp!(vi, lp) end diff --git a/src/samplers/support/helper.jl b/src/samplers/support/helper.jl index a4b7b24d55..2ab376a9db 100644 --- a/src/samplers/support/helper.jl +++ b/src/samplers/support/helper.jl @@ -41,6 +41,6 @@ Sample(vi::VarInfo) = begin value[sym(vn)] = realpart(vi[vn]) end # NOTE: do we need to check if lp is 0? - value[:lp] = realpart(vi.logp) + value[:lp] = realpart(getlogp(vi)) Sample(0.0, value) end diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 40a49548da..98ac90bc63 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -2,12 +2,13 @@ global Δ_max = 1000 setchunksize(chun_size::Int) = global CHUNKSIZE = chunk_size -function runmodel(model::Function, _vi::VarInfo, spl::Union{Void,Sampler}) +function runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) dprintln(4, "run model...") - vi = deepcopy(_vi) - vi.logp = 0.0 + expand!(vi) vi.index = 0 - model(vi=vi, sampler=spl) # run model\ + vi = model(vi=vi, sampler=spl) # run model + last!(vi) + vi end function sample_momentum(vi::VarInfo, spl::Sampler) @@ -40,8 +41,7 @@ function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Functio grad = gradient2(vi, model, spl) # Verify gradients; reject if gradients is NaN or Inf - # verifygrad(grad) || (pop!(vi.vals); pop!(vi.logp); p = p_old; break) - verifygrad(grad) || (pop!(vi.vals); p = p_old; break) + verifygrad(grad) || (shrink!(vi); p = p_old; break) p -= ϵ * grad / 2 @@ -58,7 +58,7 @@ end function find_H(p::Vector, model::Function, _vi::VarInfo, spl::Sampler) vi = deepcopy(_vi) vi = runmodel(model, vi, spl) - H = dot(p, p) / 2 + realpart(-vi.logp) + H = dot(p, p) / 2 + realpart(-getlogp(vi)) if isnan(H) H = Inf else H end end diff --git a/test/utility.jl b/test/utility.jl index ef514a3578..4a683a32f2 100644 --- a/test/utility.jl +++ b/test/utility.jl @@ -2,7 +2,7 @@ # TODO: can we somehow update the tests so that we can remove these two functions below? using Turing -using Turing: checkindex, setval!, updategid!, vectorize, CACHERESET, VarInfo, VarName, Sampler +using Turing: checkindex, setval!, updategid!, acclogp!, vectorize, CACHERESET, VarInfo, VarName, Sampler randr(vi::VarInfo, vn::VarName, dist::Distribution, count::Bool) = begin vi.index = count ? vi.index + 1 : vi.index @@ -13,7 +13,7 @@ randr(vi::VarInfo, vn::VarName, dist::Distribution, count::Bool) = begin else if count checkindex(vn, vi) end r = vi[vn] - vi.logp += logpdf(dist, r, istrans(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end end From 817ffc3ff2a5f875f661cf9515a2dde4aaf5910b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 17:10:42 +0100 Subject: [PATCH 09/55] Remove deepcopy from ad.jl --- src/core/ad.jl | 7 ++++--- src/samplers/support/hmc_core.jl | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index 1db3a0f3eb..ab49cde553 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -10,7 +10,7 @@ grad = gradient(vi, model, spl) end ``` """ -gradient(_vi::VarInfo, model::Function) = gradient(_vi, model, nothing) +gradient(vi::VarInfo, model::Function) = gradient(vi, model, nothing) gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi = deepcopy(_vi) @@ -26,9 +26,10 @@ gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin g(vi[spl]) end -gradient2(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin +gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Initialisation - vi = deepcopy(_vi); grad = Vector{Float64}() + expand!(vi) + grad = Vector{Float64}() # Split keys(vi) into chunks, dprintln(4, "making chunks...") diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 98ac90bc63..8f1185ef4f 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -57,6 +57,7 @@ end # Compute Hamiltonian function find_H(p::Vector, model::Function, _vi::VarInfo, spl::Sampler) vi = deepcopy(_vi) + # TODO: remove below by making sure that the logp is always updated vi = runmodel(model, vi, spl) H = dot(p, p) / 2 + realpart(-getlogp(vi)) if isnan(H) H = Inf else H end From 17388125eaa0a20bb99af697b89824f2a5591447 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 17:23:47 +0100 Subject: [PATCH 10/55] Remove duplicated expand from ad.jl --- src/core/ad.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index ab49cde553..70eb7d48e1 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -28,7 +28,6 @@ end gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Initialisation - expand!(vi) grad = Vector{Float64}() # Split keys(vi) into chunks, @@ -52,8 +51,8 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Chunk-wise forward AD for (key_chunk, prior_dim) in prior_key_chunks - expand!(vi) # NOTE: we don't have to call last in the end - # because we don't return the amended VarInfo + expand!(vi) + # Set dual part correspondingly dprintln(4, "set dual...") dps = zeros(prior_dim) From ab444cb0a607803953afb79adecb59240f0bc4a8 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 21:02:28 +0100 Subject: [PATCH 11/55] Remove expand in runmodel --- src/core/ad.jl | 6 +++--- src/samplers/support/hmc_core.jl | 17 +++++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index 70eb7d48e1..a9a12cda77 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -10,7 +10,7 @@ grad = gradient(vi, model, spl) end ``` """ -gradient(vi::VarInfo, model::Function) = gradient(vi, model, nothing) +gradient(vi::VarInfo, model::Function) = gradient2(vi, model, nothing) gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi = deepcopy(_vi) @@ -51,8 +51,8 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Chunk-wise forward AD for (key_chunk, prior_dim) in prior_key_chunks - expand!(vi) - + expand!(vi) # NOTE: place where calling gradient should + # be responsible for clean up the vals # Set dual part correspondingly dprintln(4, "set dual...") dps = zeros(prior_dim) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 8f1185ef4f..17e7e920ed 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -2,22 +2,19 @@ global Δ_max = 1000 setchunksize(chun_size::Int) = global CHUNKSIZE = chunk_size -function runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) +runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) = begin dprintln(4, "run model...") - expand!(vi) vi.index = 0 - vi = model(vi=vi, sampler=spl) # run model - last!(vi) - vi + model(vi=vi, sampler=spl) # run model end -function sample_momentum(vi::VarInfo, spl::Sampler) +sample_momentum(vi::VarInfo, spl::Sampler) = begin dprintln(2, "sampling momentum...") randn(length(getranges(vi, spl))) end # Leapfrog step -function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) +leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin vi = deepcopy(_vi) p = deepcopy(_p) @@ -55,15 +52,15 @@ function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Functio end # Compute Hamiltonian -function find_H(p::Vector, model::Function, _vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) +find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin + expand!(vi) # TODO: remove below by making sure that the logp is always updated vi = runmodel(model, vi, spl) H = dot(p, p) / 2 + realpart(-getlogp(vi)) if isnan(H) H = Inf else H end end -function find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) +find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin ϵ, p = 1.0, sample_momentum(vi, spl) # set initial epsilon and momentums log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) From 6b78abd713de5c52ab1b30b9fa13516ad476d03b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 21:19:24 +0100 Subject: [PATCH 12/55] Rename varibales in ad.jl --- src/core/ad.jl | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index a9a12cda77..dca68a4b43 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -32,46 +32,51 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Split keys(vi) into chunks, dprintln(4, "making chunks...") - prior_key_chunks = []; key_chunk = []; prior_dim = 0 + vn_chunks = []; vn_chunk = []; chunk_dim = 0 - gkeys = getvns(vi, spl) - for k in gkeys + vns_all = getvns(vi, spl) + for k in vns_all l = length(getrange(vi, k)) # dimension for the current variable - if prior_dim + l > CHUNKSIZE - push!(prior_key_chunks, # store the previous chunk - (key_chunk, prior_dim)) - key_chunk = [] # initialise a new chunk - prior_dim = 0 # reset dimension counter + if chunk_dim + l > CHUNKSIZE + push!(vn_chunks, # store the previous chunk + (vn_chunk, chunk_dim)) + vn_chunk = [] # initialise a new chunk + chunk_dim = 0 # reset dimension counter end - push!(key_chunk, k) # put the current variable into the current chunk - prior_dim += l # update dimension counter + push!(vn_chunk, k) # put the current variable into the current chunk + chunk_dim += l # update dimension counter end - push!(prior_key_chunks, # push the last chunk - (key_chunk, prior_dim)) + push!(vn_chunks, # push the last chunk + (vn_chunk, chunk_dim)) # Chunk-wise forward AD - for (key_chunk, prior_dim) in prior_key_chunks + for (vn_chunk, chunk_dim) in vn_chunks expand!(vi) # NOTE: place where calling gradient should # be responsible for clean up the vals # Set dual part correspondingly dprintln(4, "set dual...") - dps = zeros(prior_dim) - prior_count = 1 - for k in gkeys + dps = zeros(chunk_dim) + dim_count = 1 + + # vns = filter(vn -> vn in vn_chunk, vns_all) + # ranges = union(map(vn -> getrange(vi, vn), vns)...) + # vi[ranges] = + + for k in vns_all l = length(getrange(vi, k)) reals = realpart(getval(vi, k)) range = getrange(vi, k) - if k in key_chunk # for each variable to compute gradient in this round + if k in vn_chunk # for each variable to compute gradient in this round dprintln(5, "making dual...") for i = 1:l - dps[prior_count] = 1 # set dual part + dps[dim_count] = 1 # set dual part vi[range[i]] = ForwardDiff.Dual(reals[i], dps...) - dps[prior_count] = 0 # reset dual part - prior_count += 1 # count + dps[dim_count] = 0 # reset dual part + dim_count += 1 # count end dprintln(5, "make dual done") else # for other varilables (no gradient in this round) - vi[range] = map(r -> Dual{prior_dim, Float64}(r), reals) + vi[range] = map(r -> Dual{chunk_dim, Float64}(r), reals) end end vi = runmodel(model, vi, spl) From e47fb3e36d91475fbbe3ddb7777862b07cdd7316 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 21:40:48 +0100 Subject: [PATCH 13/55] Give up improve AD --- src/core/ad.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index dca68a4b43..cff59731df 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -53,15 +53,12 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin for (vn_chunk, chunk_dim) in vn_chunks expand!(vi) # NOTE: place where calling gradient should # be responsible for clean up the vals + # Set dual part correspondingly dprintln(4, "set dual...") dps = zeros(chunk_dim) - dim_count = 1 - - # vns = filter(vn -> vn in vn_chunk, vns_all) - # ranges = union(map(vn -> getrange(vi, vn), vns)...) - # vi[ranges] = + dim_count = 1 for k in vns_all l = length(getrange(vi, k)) reals = realpart(getval(vi, k)) From 04dfeffb39feee4a30d9ad1835800e7788fd44dc Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:11:32 +0100 Subject: [PATCH 14/55] Change link and invlink to in-place --- src/core/varinfo.jl | 20 ++++++-------------- src/samplers/hmc.jl | 2 +- src/samplers/hmcda.jl | 6 +++--- src/samplers/nuts.jl | 8 ++++---- src/samplers/pgibbs.jl | 7 ++----- src/samplers/support/hmc_core.jl | 7 ++++--- 6 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index b3be33d53a..6c15dd7188 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -97,30 +97,22 @@ acclogp!(vi::VarInfo, logp::Real) = vi.logp[end] += logp isempty(vi::VarInfo) = isempty(vi.idcs) # X -> R for all variables associated with given sampler -function link(vi::VarInfo, spl::Sampler) - expand!(vi) - gvns = getvns(vi, spl) - for vn in gvns +link!(vi::VarInfo, spl::Sampler) = + for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, link(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, true, vn) + setlogp!(vi, zero(Real)) end - last!(vi) - vi -end # R -> X for all variables associated with given sampler -function invlink(vi::VarInfo, spl::Sampler) - expand!(vi) - gvns = getvns(vi, spl) - for vn in gvns +invlink!(vi::VarInfo, spl::Sampler) = + for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, invlink(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, false, vn) + setlogp!(vi, zero(Real)) end - last!(vi) - vi -end function cleandual!(vi::VarInfo) for vn in keys(vi) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 2b32acc4d7..352d4cb8ba 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -74,7 +74,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) accept_num = 0 # record the accept number vi = model() - if spl.alg.gid == 0 vi = link(vi, spl) end + if spl.alg.gid == 0 link!(vi, spl) end # HMC steps spl.info[:progress] = ProgressMeter.Progress(n, 1, "[$alg_str] Sampling...", 0) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 1fe08440af..baf048e293 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -54,7 +54,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) if spl.alg.delta > 0 dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end # Heuristically find optimal ϵ ϵ = find_good_eps(model, spl, vi) @@ -89,7 +89,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) p = sample_momentum(vi, spl) dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end dprintln(2, "recording old H...") oldH = find_H(p, model, vi, spl) @@ -126,7 +126,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) end dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl); cleandual!(vi) end + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end dprintln(2, "decide wether to accept...") if rand() < α # accepted diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index d6ff9c0ae4..1605174687 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -47,14 +47,14 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) vi_0 = deepcopy(vi) dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end # Heuristically find optimal ϵ # ϵ_bar, ϵ = find_good_eps(model, spl, vi) ϵ = find_good_eps(model, spl, vi) dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl) end + if spl.alg.gid != 0 invlink!(vi, spl) end spl.info[:ϵ] = ϵ spl.info[:μ] = log(10 * ϵ) @@ -78,7 +78,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) p = sample_momentum(vi, spl) dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end dprintln(2, "recording old H...") H0 = find_H(p, model, vi, spl) @@ -112,7 +112,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) end dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl); cleandual!(vi) end + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end # Use Dual Averaging to adapt ϵ m = spl.info[:m] += 1 diff --git a/src/samplers/pgibbs.jl b/src/samplers/pgibbs.jl index 1357c7303a..1f866a5035 100644 --- a/src/samplers/pgibbs.jl +++ b/src/samplers/pgibbs.jl @@ -119,8 +119,5 @@ assume{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, vn::VarName, _::Va end end -observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) = begin - lp = logpdf(dist, value) - acclogp!(vi, lp) - produce(lp) -end +observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) = + produce(logpdf(dist, value)) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 17e7e920ed..09cfd5f640 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -53,9 +53,10 @@ end # Compute Hamiltonian find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin - expand!(vi) - # TODO: remove below by making sure that the logp is always updated - vi = runmodel(model, vi, spl) + # NOTE: getlogp(vi) = 0 means the current vals[end] hasn't been used at all. + # This can be a result of link/invlink (where expand! is used) + if getlogp(vi) == 0 vi = runmodel(model, vi, spl) end + H = dot(p, p) / 2 + realpart(-getlogp(vi)) if isnan(H) H = Inf else H end end From 029a3f754076f15ad971c352e3cffb10b98d67d0 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:14:41 +0100 Subject: [PATCH 15/55] Fix HMC accept reate log --- src/samplers/hmc.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 352d4cb8ba..d964ee89f2 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -71,7 +71,6 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) for i = 1:n samples[i] = Sample(weight, Dict{Symbol, Any}()) end - accept_num = 0 # record the accept number vi = model() if spl.alg.gid == 0 link!(vi, spl) end @@ -90,7 +89,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) end if ~isa(alg, NUTS) # cccept rate for NUTS is meaningless - so no printing - accept_rate = accept_num / n # calculate the accept rate + accept_rate = sum(spl.info[:accept_his]) / n # calculate the accept rate println("[$alg_str] Done with accept rate = $accept_rate.") end From e1a43a41950add251774bb30b00d045df771178a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:21:31 +0100 Subject: [PATCH 16/55] Remove expand in ad.jl --- src/core/ad.jl | 8 +++----- src/samplers/support/hmc_core.jl | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index cff59731df..f1aa301bec 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -51,10 +51,7 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin # Chunk-wise forward AD for (vn_chunk, chunk_dim) in vn_chunks - expand!(vi) # NOTE: place where calling gradient should - # be responsible for clean up the vals - - # Set dual part correspondingly + # 1. Set dual part correspondingly dprintln(4, "set dual...") dps = zeros(chunk_dim) @@ -76,8 +73,9 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi[range] = map(r -> Dual{chunk_dim, Float64}(r), reals) end end + + # 2. Run model and collect gradient vi = runmodel(model, vi, spl) - # Collect gradient dprintln(4, "collect gradients from logp...") append!(grad, collect(dualpart(-getlogp(vi)))) end diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 09cfd5f640..8870d0bd8f 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -5,6 +5,7 @@ setchunksize(chun_size::Int) = global CHUNKSIZE = chunk_size runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) = begin dprintln(4, "run model...") vi.index = 0 + setlogp!(vi, zero(Real)) model(vi=vi, sampler=spl) # run model end From 59b7521d64e0be0acd4fc51c4a6d506d552ef32b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:29:44 +0100 Subject: [PATCH 17/55] Remove deepcopy from leapfrog --- src/samplers/support/hmc_core.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 8870d0bd8f..830556e7c6 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -15,10 +15,7 @@ sample_momentum(vi::VarInfo, spl::Sampler) = begin end # Leapfrog step -leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin - - vi = deepcopy(_vi) - p = deepcopy(_p) +leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin dprintln(3, "first gradient...") grad = gradient2(vi, model, spl) @@ -73,14 +70,14 @@ find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin # vi_prime, p_prime, τ_valid = leapfrog(vi, p, 1, ϵ, model, spl) # end # ϵ_bar = ϵ - vi_prime, p_prime, τ = leapfrog(vi, p, 1, ϵ, model, spl) + vi_prime, p_prime, τ = leapfrog(deepcopy(vi), deepcopy(p), 1, ϵ, model, spl) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) # calculate new p(Θ, p) # Heuristically find optimal ϵ a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - vi_prime, p_prime, τ = leapfrog(vi, p, 1, ϵ, model, spl) + vi_prime, p_prime, τ = leapfrog(deepcopy(vi), deepcopy(p), 1, ϵ, model, spl) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) dprintln(1, "a = $a, log_p_r_Θ′ = $log_p_r_Θ′") end From 2d3daf9ae921218b08c4aab99cc72b1c6d44b4a7 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:37:27 +0100 Subject: [PATCH 18/55] Reduce deepcopy in HMCDA and NUTS --- src/samplers/hmcda.jl | 14 ++++---------- src/samplers/nuts.jl | 16 ++++------------ 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index baf048e293..1db7a947ea 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -49,16 +49,10 @@ end function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) if is_first - vi_0 = deepcopy(vi) - if spl.alg.delta > 0 - - dprintln(3, "X -> R...") - if spl.alg.gid != 0 link!(vi, spl) end - - # Heuristically find optimal ϵ - ϵ = find_good_eps(model, spl, vi) - + if spl.alg.gid != 0 link!(vi, spl) end # X -> R + ϵ = find_good_eps(model, spl, vi) # heuristically find optimal ϵ + if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X else ϵ = spl.info[:ϵ][end] end @@ -71,7 +65,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_0 + vi else dprintln(2, "recording old θ...") old_vi = deepcopy(vi) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index 1605174687..6efe74dc39 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -44,17 +44,9 @@ end function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) if is_first - vi_0 = deepcopy(vi) - - dprintln(3, "X -> R...") - if spl.alg.gid != 0 link!(vi, spl) end - - # Heuristically find optimal ϵ - # ϵ_bar, ϵ = find_good_eps(model, spl, vi) - ϵ = find_good_eps(model, spl, vi) - - dprintln(3, "R -> X...") - if spl.alg.gid != 0 invlink!(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end # X -> R + ϵ = find_good_eps(model, spl, vi) # heuristically find optimal ϵ + if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X spl.info[:ϵ] = ϵ spl.info[:μ] = log(10 * ϵ) @@ -65,7 +57,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_0 + vi else # Set parameters δ = spl.alg.delta From 6b0c2ac80f60edd8bb68012c73fdd4c41136860a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 22:39:42 +0100 Subject: [PATCH 19/55] Fix benchmark path bug --- benchmarks/benchmark.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark.jl b/benchmarks/benchmark.jl index 56d4a52759..e6e343a3cb 100644 --- a/benchmarks/benchmark.jl +++ b/benchmarks/benchmark.jl @@ -6,14 +6,14 @@ using Stan CONFIG = Dict( "model-list" => [ "gdemo-geweke", - "MoC", + # "MoC", #"normal-loc", "normal-mixture", - "simplegauss", + "gdemo", "gauss", "bernoulli", #"negative-binomial", - "lda" + # "lda" ], "test-level" => 2 # 1 = model lang, 2 = whole interface From d3cf32b26b5fc66a67b7e196deb05ed183d5a085 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 23:17:09 +0100 Subject: [PATCH 20/55] Fix model bug --- example-models/benchmarks/gauss.model.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example-models/benchmarks/gauss.model.jl b/example-models/benchmarks/gauss.model.jl index 19e41180a1..de82e9483a 100644 --- a/example-models/benchmarks/gauss.model.jl +++ b/example-models/benchmarks/gauss.model.jl @@ -1,6 +1,6 @@ @model gaussmodel(x) = begin N = length(x) - lam = tzeros(Float64,N) + lam = tzeros(Real,N) mu ~ Normal(0, sqrt(1000)); for i = 1:N lam[i] ~ Gamma(1, 1) From 1fc34dc644830d34573c9f2daaaf1b96da0e6217 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 23:23:20 +0100 Subject: [PATCH 21/55] Improve inv/link --- src/core/varinfo.jl | 10 ++++++---- src/samplers/hmcda.jl | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index 6c15dd7188..9210e8c1ac 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -97,22 +97,24 @@ acclogp!(vi::VarInfo, logp::Real) = vi.logp[end] += logp isempty(vi::VarInfo) = isempty(vi.idcs) # X -> R for all variables associated with given sampler -link!(vi::VarInfo, spl::Sampler) = +link!(vi::VarInfo, spl::Sampler) = begin for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, link(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, true, vn) - setlogp!(vi, zero(Real)) end + setlogp!(vi, zero(Real)) +end # R -> X for all variables associated with given sampler -invlink!(vi::VarInfo, spl::Sampler) = +invlink!(vi::VarInfo, spl::Sampler) = begin for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, invlink(dist, reconstruct(dist, getval(vi, vn)))), vn) settrans!(vi, false, vn) - setlogp!(vi, zero(Real)) end + setlogp!(vi, zero(Real)) +end function cleandual!(vi::VarInfo) for vn in keys(vi) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 1db7a947ea..c1a070da63 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -125,10 +125,10 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) dprintln(2, "decide wether to accept...") if rand() < α # accepted push!(spl.info[:accept_his], true) - vi else # rejected push!(spl.info[:accept_his], false) - old_vi + vi = old_vi end + vi end end From 91e5f43af8fdc7adf9c67c03b09300098e240e56 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 23:37:15 +0100 Subject: [PATCH 22/55] Remove deepcopy from hmcda --- src/samplers/hmcda.jl | 5 +++-- src/samplers/support/hmc_core.jl | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index c1a070da63..89808bc3b4 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -68,7 +68,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) vi else dprintln(2, "recording old θ...") - old_vi = deepcopy(vi) + old_θ = vi[spl] # Set parameters δ = spl.alg.delta @@ -127,7 +127,8 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) else # rejected push!(spl.info[:accept_his], false) - vi = old_vi + vi[spl] = old_θ + setlogp!(vi, zero(Real)) end vi end diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 830556e7c6..ed333ffefa 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -15,6 +15,12 @@ sample_momentum(vi::VarInfo, spl::Sampler) = begin end # Leapfrog step +leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler, θ::Vector) = begin + vi[spl] = θ + setlogp!(vi, zero(Real)) + leapfrog(vi, p, τ, ϵ, model, spl) +end + leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin dprintln(3, "first gradient...") @@ -23,7 +29,7 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam verifygrad(grad) || (return vi, p, 0) dprintln(2, "leapfrog stepping...") - τ_valid = 0; p_old = deepcopy(p) + τ_valid = 0; p_old = copy(p) for t in 1:τ # do 'leapfrog' for each var p_old[1:end] = p[1:end] @@ -63,21 +69,15 @@ find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin ϵ, p = 1.0, sample_momentum(vi, spl) # set initial epsilon and momentums log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) - # # Make a leapfrog step until accept - # vi_prime, p_prime, τ_valid = leapfrog(vi, p, 1, ϵ, model, spl) - # while τ_valid == 0 - # ϵ *= 0.5 - # vi_prime, p_prime, τ_valid = leapfrog(vi, p, 1, ϵ, model, spl) - # end - # ϵ_bar = ϵ - vi_prime, p_prime, τ = leapfrog(deepcopy(vi), deepcopy(p), 1, ϵ, model, spl) + θ = vi[spl] + vi_prime, p_prime, τ = leapfrog(vi, copy(p), 1, ϵ, model, spl, θ) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) # calculate new p(Θ, p) # Heuristically find optimal ϵ a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - vi_prime, p_prime, τ = leapfrog(deepcopy(vi), deepcopy(p), 1, ϵ, model, spl) + vi_prime, p_prime, τ = leapfrog(vi, copy(p), 1, ϵ, model, spl, θ) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) dprintln(1, "a = $a, log_p_r_Θ′ = $log_p_r_Θ′") end From a08f29ce8bbf9d60c39f836cc605017220f3e6bc Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 23:47:47 +0100 Subject: [PATCH 23/55] Fix model name --- benchmarks/bernoulli-stan.run.jl | 2 +- benchmarks/bernoulli.run.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/bernoulli-stan.run.jl b/benchmarks/bernoulli-stan.run.jl index 74bd76c902..14eb1ccc92 100644 --- a/benchmarks/bernoulli-stan.run.jl +++ b/benchmarks/bernoulli-stan.run.jl @@ -4,7 +4,7 @@ using Stan include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.data.jl") -include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan..model.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.model.jl") stan_model_name = "bernoulli" berstan = Stanmodel(name=stan_model_name, model=berstanmodel, nchains=1); diff --git a/benchmarks/bernoulli.run.jl b/benchmarks/bernoulli.run.jl index 3776955a81..f88097ef3d 100644 --- a/benchmarks/bernoulli.run.jl +++ b/benchmarks/bernoulli.run.jl @@ -4,7 +4,7 @@ using Stan include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.data.jl") -include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli..model.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.model.jl") bench_res = tbenchmark("HMC(1000, 0.25, 5)", "bermodel", "berdata") logd = build_logd("Bernoulli Model", bench_res...) From 99a8be4859f81130d206633be08374563e690d0c Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 11 May 2017 23:48:39 +0100 Subject: [PATCH 24/55] Remove expand! in leapfrog --- src/samplers/support/hmc_core.jl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index ed333ffefa..8036210bc0 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -29,28 +29,24 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam verifygrad(grad) || (return vi, p, 0) dprintln(2, "leapfrog stepping...") - τ_valid = 0; p_old = copy(p) + τ_valid = 0; for t in 1:τ # do 'leapfrog' for each var - p_old[1:end] = p[1:end] + p_old = copy(p); θ_old = vi[spl] p -= ϵ * grad / 2 - expand!(vi) - vi[spl] += ϵ * p # full step for state grad = gradient2(vi, model, spl) # Verify gradients; reject if gradients is NaN or Inf - verifygrad(grad) || (shrink!(vi); p = p_old; break) + verifygrad(grad) || (vi[spl] = θ_old; p = p_old; break) p -= ϵ * grad / 2 τ_valid += 1 end - last!(vi) - # Return updated θ and momentum vi, p, τ_valid end From 1854e01f67a3dcb795bb295c46f147317d1b6a7b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 15:39:15 +0100 Subject: [PATCH 25/55] Pass vi into recursion --- src/samplers/nuts.jl | 19 +++++++------ src/samplers/support/hmc_core.jl | 48 ++++++++++++++++++++++++++------ test/nuts.jl/nuts.jl | 2 +- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index 6efe74dc39..a4a9120ce0 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -78,16 +78,16 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) dprintln(3, "sample slice variable u") logu = log(rand()) + (-H0) - θm, θp, rm, rp, j, vi_new, n, s = deepcopy(vi), deepcopy(vi), deepcopy(p), deepcopy(p), 0, deepcopy(vi), 1, 1 + θm, θp, rm, rp, j, n, s = deepcopy(vi), deepcopy(vi), deepcopy(p), deepcopy(p), 0, 1, 1 local α, n_α while s == 1 && j <= 2 v_j = rand([-1, 1]) # Note: this variable actually does not depend on j; # it is set as `v_j` just to be consistent to the paper if v_j == -1 - θm, rm, _, _, θ′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl) + θm, rm, _, _, θ′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl) + _, _, θp, rp, θ′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl, vi) end haskey(spl.info, :progress) && ProgressMeter.update!(spl.info[:progress], @@ -95,7 +95,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) showvalues = [(:ϵ, ϵ), (:tree_depth, j)]) if s′ == 1 && rand() < min(1, n′ / n) - vi_new = deepcopy(θ′) + vi = deepcopy(θ′) end n = n + n′ @@ -121,11 +121,12 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_new + vi end end -function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, model::Function, spl::Sampler) +function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, + model::Function, spl::Sampler, vi::VarInfo) doc""" - θ : model parameter - r : momentum variable @@ -147,13 +148,13 @@ function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::F θ′, r′, deepcopy(θ′), deepcopy(r′), deepcopy(θ′), n′, s′, α′, 1 else # Recursion - build the left and right subtrees. - θm, rm, θp, rp, θ′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl) + θm, rm, θp, rp, θ′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl, vi) if s′ == 1 if v == -1 - θm, rm, _, _, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl) + θm, rm, _, _, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl) + _, _, θp, rp, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl, vi) end if rand() < n′′ / (n′ + n′′) θ′ = deepcopy(θ′′) end α′ = α′ + α′′ diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 8036210bc0..cc9ebf3900 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -15,10 +15,34 @@ sample_momentum(vi::VarInfo, spl::Sampler) = begin end # Leapfrog step -leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler, θ::Vector) = begin +leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler, vi::VarInfo) = begin + vi[spl] = θ - setlogp!(vi, zero(Real)) - leapfrog(vi, p, τ, ϵ, model, spl) + grad = gradient2(vi, model, spl) + verifygrad(grad) || (return vi, p, 0) + + τ_valid = 0 + for t in 1:τ + p_old = copy(p); θ_old = θ + + p -= ϵ * grad / 2 + + θ += ϵ * p # full step for state + + vi[spl] = θ + grad = gradient2(vi, model, spl) + + # Verify gradients; reject if gradients is NaN or Inf + verifygrad(grad) || (θ = θ_old; p = p_old; break) + + p -= ϵ * grad / 2 + + τ_valid += 1 + end + + # Return updated θ and momentum + θ, p, τ_valid + end leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin @@ -26,10 +50,10 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam dprintln(3, "first gradient...") grad = gradient2(vi, model, spl) # Verify gradients; reject if gradients is NaN or Inf. - verifygrad(grad) || (return vi, p, 0) + dprintln(2, "leapfrog stepping...") - τ_valid = 0; + τ_valid = 0 for t in 1:τ # do 'leapfrog' for each var p_old = copy(p); θ_old = vi[spl] @@ -66,18 +90,24 @@ find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) θ = vi[spl] - vi_prime, p_prime, τ = leapfrog(vi, copy(p), 1, ϵ, model, spl, θ) - log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) # calculate new p(Θ, p) + θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, spl, vi) + vi[spl] = θ_prime + setlogp!(vi, zero(Real)) + log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) # calculate new p(Θ, p) # Heuristically find optimal ϵ a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - vi_prime, p_prime, τ = leapfrog(vi, copy(p), 1, ϵ, model, spl, θ) - log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) + θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, spl, vi) + vi[spl] = θ_prime + setlogp!(vi, zero(Real)) + log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) dprintln(1, "a = $a, log_p_r_Θ′ = $log_p_r_Θ′") end println("\r[$T] found initial ϵ: ", ϵ) + vi[spl] = θ + setlogp!(vi, zero(Real)) ϵ end diff --git a/test/nuts.jl/nuts.jl b/test/nuts.jl/nuts.jl index 06d983bcf7..37cc59692d 100644 --- a/test/nuts.jl/nuts.jl +++ b/test/nuts.jl/nuts.jl @@ -8,7 +8,7 @@ using Distributions, Turing return s, m end -alg = NUTS(1000, 200, 0.65) +alg = NUTS(2500, 500, 0.65) res = sample(gdemo([1.5, 2.0]), alg) check_numerical(res, [:s, :m], [49/24, 7/6]) From d5cd278ef301bb2d82a4b73b169b185fadb6795c Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 15:43:38 +0100 Subject: [PATCH 26/55] Reorder params --- src/samplers/support/hmc_core.jl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index cc9ebf3900..99e54b6310 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -15,7 +15,8 @@ sample_momentum(vi::VarInfo, spl::Sampler) = begin end # Leapfrog step -leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler, vi::VarInfo) = begin +leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, + model::Function, vi::VarInfo, spl::Sampler) = begin vi[spl] = θ grad = gradient2(vi, model, spl) @@ -26,13 +27,10 @@ leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam p_old = copy(p); θ_old = θ p -= ϵ * grad / 2 - θ += ϵ * p # full step for state vi[spl] = θ grad = gradient2(vi, model, spl) - - # Verify gradients; reject if gradients is NaN or Inf verifygrad(grad) || (θ = θ_old; p = p_old; break) p -= ϵ * grad / 2 @@ -40,9 +38,7 @@ leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam τ_valid += 1 end - # Return updated θ and momentum θ, p, τ_valid - end leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin @@ -90,7 +86,7 @@ find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) θ = vi[spl] - θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, spl, vi) + θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, vi, spl) vi[spl] = θ_prime setlogp!(vi, zero(Real)) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) # calculate new p(Θ, p) @@ -99,7 +95,7 @@ find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, spl, vi) + θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, vi, spl) vi[spl] = θ_prime setlogp!(vi, zero(Real)) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) From 0fdc4be5dfa26ec45d3fb1f9ae82d546522ecb6a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 15:54:05 +0100 Subject: [PATCH 27/55] Use new leapfrog in HMCDA --- src/samplers/hmcda.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 89808bc3b4..1940555989 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -90,9 +90,11 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) τ = max(1, round(Int, λ / ϵ)) dprintln(2, "leapfrog for $τ steps with step size $ϵ") - vi, p, τ_valid = leapfrog(vi, p, τ, ϵ, model, spl) + θ, p, τ_valid = leapfrog2(old_θ, p, τ, ϵ, model, vi, spl) dprintln(2, "computing new H...") + vi[spl] = θ + setlogp!(vi, zero(Real)) H = τ_valid == 0 ? Inf : find_H(p, model, vi, spl) dprintln(2, "computing ΔH...") From 47f6342e413d3410274648abb130b99a5006975a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 16:35:30 +0100 Subject: [PATCH 28/55] Improve HMCDA --- src/samplers/hmc.jl | 5 ++++- src/samplers/hmcda.jl | 29 +++++++++++++++-------------- src/samplers/nuts.jl | 2 +- src/samplers/support/hmc_core.jl | 23 +++++++++++------------ test/compiler.jl/noreturn.jl | 1 + 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index d964ee89f2..ecd965b236 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -73,7 +73,10 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) end vi = model() - if spl.alg.gid == 0 link!(vi, spl) end + if spl.alg.gid == 0 + link!(vi, spl) + runmodel(model, vi, spl) + end # HMC steps spl.info[:progress] = ProgressMeter.Progress(n, 1, "[$alg_str] Sampling...", 0) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 1940555989..d5025a648f 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -51,7 +51,7 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) if is_first if spl.alg.delta > 0 if spl.alg.gid != 0 link!(vi, spl) end # X -> R - ϵ = find_good_eps(model, spl, vi) # heuristically find optimal ϵ + ϵ = find_good_eps(model, vi, spl) # heuristically find optimal ϵ if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X else ϵ = spl.info[:ϵ][end] @@ -67,9 +67,6 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) vi else - dprintln(2, "recording old θ...") - old_θ = vi[spl] - # Set parameters δ = spl.alg.delta λ = spl.alg.lambda @@ -82,19 +79,22 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) dprintln(2, "sampling momentum...") p = sample_momentum(vi, spl) - dprintln(3, "X -> R...") - if spl.alg.gid != 0 link!(vi, spl) end + if spl.alg.gid != 0 + link!(vi, spl) + runmodel(model, vi, spl) + end - dprintln(2, "recording old H...") - oldH = find_H(p, model, vi, spl) + oldH = find_H(p, getlogp(vi)) + + dprintln(2, "recording old θ...") + old_θ = vi[spl] + old_logp = getlogp(vi) τ = max(1, round(Int, λ / ϵ)) dprintln(2, "leapfrog for $τ steps with step size $ϵ") θ, p, τ_valid = leapfrog2(old_θ, p, τ, ϵ, model, vi, spl) dprintln(2, "computing new H...") - vi[spl] = θ - setlogp!(vi, zero(Real)) H = τ_valid == 0 ? Inf : find_H(p, model, vi, spl) dprintln(2, "computing ΔH...") @@ -121,17 +121,18 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) push!(spl.info[:ϵ], spl.info[:ϵ_bar]) end - dprintln(3, "R -> X...") - if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end - dprintln(2, "decide wether to accept...") if rand() < α # accepted push!(spl.info[:accept_his], true) else # rejected push!(spl.info[:accept_his], false) vi[spl] = old_θ - setlogp!(vi, zero(Real)) + setlogp!(vi, old_logp) end + + dprintln(3, "R -> X...") + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end + vi end end diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index a4a9120ce0..11badf4652 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -45,7 +45,7 @@ end function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) if is_first if spl.alg.gid != 0 link!(vi, spl) end # X -> R - ϵ = find_good_eps(model, spl, vi) # heuristically find optimal ϵ + ϵ = find_good_eps(model, vi, spl) # heuristically find optimal ϵ if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X spl.info[:ϵ] = ϵ diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 99e54b6310..9003c682b1 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -20,18 +20,18 @@ leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, vi[spl] = θ grad = gradient2(vi, model, spl) - verifygrad(grad) || (return vi, p, 0) + verifygrad(grad) || (return θ, p, 0) τ_valid = 0 for t in 1:τ - p_old = copy(p); θ_old = θ + p_old = copy(p); θ_old = θ; old_logp = getlogp(vi) p -= ϵ * grad / 2 θ += ϵ * p # full step for state vi[spl] = θ grad = gradient2(vi, model, spl) - verifygrad(grad) || (θ = θ_old; p = p_old; break) + verifygrad(grad) || (vi[spl] = θ_old; setlogp!(vi, old_logp); θ = θ_old; p = p_old; break) p -= ϵ * grad / 2 @@ -71,6 +71,11 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam vi, p, τ_valid end +find_H(p::Vector, logp::Real) = begin + H = dot(p, p) / 2 + realpart(-logp) + if isnan(H) H = Inf else H end +end + # Compute Hamiltonian find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin # NOTE: getlogp(vi) = 0 means the current vals[end] hasn't been used at all. @@ -81,29 +86,23 @@ find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin if isnan(H) H = Inf else H end end -find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) = begin +find_good_eps{T}(model::Function, vi::VarInfo, spl::Sampler{T}) = begin ϵ, p = 1.0, sample_momentum(vi, spl) # set initial epsilon and momentums log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) θ = vi[spl] - θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, vi, spl) - vi[spl] = θ_prime - setlogp!(vi, zero(Real)) + θ_prime, p_prime, τ = leapfrog2(θ, p, 1, ϵ, model, vi, spl) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) # calculate new p(Θ, p) # Heuristically find optimal ϵ a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - θ_prime, p_prime, τ = leapfrog2(θ, copy(p), 1, ϵ, model, vi, spl) - vi[spl] = θ_prime - setlogp!(vi, zero(Real)) + θ_prime, p_prime, τ = leapfrog2(θ, p, 1, ϵ, model, vi, spl) log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) dprintln(1, "a = $a, log_p_r_Θ′ = $log_p_r_Θ′") end println("\r[$T] found initial ϵ: ", ϵ) - vi[spl] = θ - setlogp!(vi, zero(Real)) ϵ end diff --git a/test/compiler.jl/noreturn.jl b/test/compiler.jl/noreturn.jl index b5aa793e93..6f0dc3b299 100644 --- a/test/compiler.jl/noreturn.jl +++ b/test/compiler.jl/noreturn.jl @@ -1,3 +1,4 @@ +include("../utility.jl") using Distributions using Turing using Base.Test From 63b4b02584e2717ebdf1f3e3df541d4b00f87986 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 16:41:10 +0100 Subject: [PATCH 29/55] Improve HMCDA code --- src/samplers/hmcda.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index d5025a648f..37419643c6 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -76,41 +76,40 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) μ, γ, t_0, κ = spl.info[:μ], 0.05, 10, 0.75 ϵ_bar, H_bar = spl.info[:ϵ_bar], spl.info[:H_bar] - dprintln(2, "sampling momentum...") - p = sample_momentum(vi, spl) - + dprintln(3, "X-> R...") if spl.alg.gid != 0 link!(vi, spl) runmodel(model, vi, spl) end - oldH = find_H(p, getlogp(vi)) + dprintln(2, "sampling momentum...") + p = sample_momentum(vi, spl) - dprintln(2, "recording old θ...") + dprintln(2, "recording old values...") old_θ = vi[spl] old_logp = getlogp(vi) + old_H = find_H(p, old_logp) - τ = max(1, round(Int, λ / ϵ)) dprintln(2, "leapfrog for $τ steps with step size $ϵ") + τ = max(1, round(Int, λ / ϵ)) θ, p, τ_valid = leapfrog2(old_θ, p, τ, ϵ, model, vi, spl) dprintln(2, "computing new H...") H = τ_valid == 0 ? Inf : find_H(p, model, vi, spl) - dprintln(2, "computing ΔH...") - ΔH = H - oldH + dprintln(2, "computing accept rate α...") + α = min(1, exp(-(H - old_H))) - α = min(1, exp(-ΔH)) # MH accept rate haskey(spl.info, :progress) && ProgressMeter.update!( spl.info[:progress], spl.info[:progress].counter; showvalues = [(:ϵ, ϵ), (:α, α)] ) - # Use Dual Averaging to adapt ϵ + dprintln(2, "adapting step size ϵ...") m = spl.info[:m] += 1 if m < spl.alg.n_adapt - dprintln(1, " ϵ = $ϵ, α = $α, exp(-ΔH)=$(exp(-ΔH))") + dprintln(1, " ϵ = $ϵ, α = $α") H_bar = (1 - 1 / (m + t_0)) * H_bar + 1 / (m + t_0) * (δ - α) ϵ = exp(μ - sqrt(m) / γ * H_bar) ϵ_bar = exp(m^(-κ) * log(ϵ) + (1 - m^(-κ)) * log(ϵ_bar)) From 5c09dd84db97c2826781b012be4a514b302c477b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 17:08:43 +0100 Subject: [PATCH 30/55] Add info log to HMCDA --- src/samplers/hmc.jl | 9 ++++++++- src/samplers/hmcda.jl | 11 +++++------ src/samplers/support/hmc_core.jl | 2 ++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index ecd965b236..a7cb69fb26 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -50,6 +50,8 @@ end Sampler(alg::Hamiltonian) = begin info=Dict{Symbol, Any}() info[:accept_his] = [] + info[:lf_num] = 0 + info[:eval_num] = 0 Sampler(alg, info) end @@ -93,7 +95,12 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) if ~isa(alg, NUTS) # cccept rate for NUTS is meaningless - so no printing accept_rate = sum(spl.info[:accept_his]) / n # calculate the accept rate - println("[$alg_str] Done with accept rate = $accept_rate.") + log_str = """ + [$alg_str] Done with accept rate = $accept_rate; + #lf / sample = $(spl.info[:lf_num] / n); + #evals / sample = $(spl.info[:eval_num] / n). + """ + println(log_str) end Chain(0, samples) # wrap the result by Chain diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 37419643c6..e0c67b4f20 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -90,8 +90,8 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) old_logp = getlogp(vi) old_H = find_H(p, old_logp) - dprintln(2, "leapfrog for $τ steps with step size $ϵ") τ = max(1, round(Int, λ / ϵ)) + dprintln(2, "leapfrog for $τ steps with step size $ϵ") θ, p, τ_valid = leapfrog2(old_θ, p, τ, ϵ, model, vi, spl) dprintln(2, "computing new H...") @@ -100,7 +100,6 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) dprintln(2, "computing accept rate α...") α = min(1, exp(-(H - old_H))) - haskey(spl.info, :progress) && ProgressMeter.update!( spl.info[:progress], spl.info[:progress].counter; showvalues = [(:ϵ, ϵ), (:α, α)] @@ -121,12 +120,12 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) end dprintln(2, "decide wether to accept...") - if rand() < α # accepted + if rand() < α # accepted push!(spl.info[:accept_his], true) - else # rejected + else # rejected push!(spl.info[:accept_his], false) - vi[spl] = old_θ - setlogp!(vi, old_logp) + vi[spl] = old_θ # reset Θ + setlogp!(vi, old_logp) # reset logp end dprintln(3, "R -> X...") diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 9003c682b1..9139b66ee2 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -6,6 +6,7 @@ runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) = begin dprintln(4, "run model...") vi.index = 0 setlogp!(vi, zero(Real)) + if spl != nothing spl.info[:eval_num] += 1 end model(vi=vi, sampler=spl) # run model end @@ -28,6 +29,7 @@ leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, p -= ϵ * grad / 2 θ += ϵ * p # full step for state + spl.info[:lf_num] += 1 # record leapfrog num vi[spl] = θ grad = gradient2(vi, model, spl) From f003531c668339a77a5abc20838f6fb76d1bb772 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 17:10:36 +0100 Subject: [PATCH 31/55] Better alignment --- src/samplers/hmc.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index a7cb69fb26..f3d10ab516 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -96,9 +96,9 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) if ~isa(alg, NUTS) # cccept rate for NUTS is meaningless - so no printing accept_rate = sum(spl.info[:accept_his]) / n # calculate the accept rate log_str = """ - [$alg_str] Done with accept rate = $accept_rate; - #lf / sample = $(spl.info[:lf_num] / n); - #evals / sample = $(spl.info[:eval_num] / n). +[$alg_str] Done with accept rate = $accept_rate; + #lf / sample = $(spl.info[:lf_num] / n); + #evals / sample = $(spl.info[:eval_num] / n). """ println(log_str) end From 289b004a83acc0f7b15e23b70e040207434ebbe9 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 17:13:20 +0100 Subject: [PATCH 32/55] One more LDA run --- .travis.yml | 2 +- benchmarks/lda.run.jl | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 779a1a6524..73947e79b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ matrix: env: GROUP=MOC - os: osx env: GROUP=SV - + allow_failures: - env: GROUP=Test os: osx diff --git a/benchmarks/lda.run.jl b/benchmarks/lda.run.jl index 13b2b34080..2810be12c9 100644 --- a/benchmarks/lda.run.jl +++ b/benchmarks/lda.run.jl @@ -6,12 +6,13 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/lda-stan.data.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/lda.model.jl") -bench_res = tbenchmark("HMCDA(1000, 0.65, 1.5)", "ldamodel", "data=ldastandata[1]") -bench_res[4].names = ["phi[1]", "phi[2]"] -logd = build_logd("LDA", bench_res...) - include(Pkg.dir("Turing")*"/benchmarks/"*"lda-stan.run.jl") -logd["stan"] = lda_stan_d -logd["time_stan"] = lda_time -print_log(logd) +for alg in ["HMC(1000, 0.25, 6)", "HMCDA(1000, 0.65, 1.5)"] + bench_res = tbenchmark(alg, "ldamodel", "data=ldastandata[1]") + bench_res[4].names = ["phi[1]", "phi[2]"] + logd = build_logd("LDA", bench_res...) + logd["stan"] = lda_stan_d + logd["time_stan"] = lda_time + print_log(logd) +end From f13ac66cb4304c7e029cd5303e7e60ea6ab0e617 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 17:19:23 +0100 Subject: [PATCH 33/55] Make default chunksize setting only change in one run --- src/samplers/hmc.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index f3d10ab516..e2643a84d9 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -60,7 +60,9 @@ sample(model::Function, alg::Hamiltonian) = sample(model, alg, CHUNKSIZE) # NOTE: in the previous code, `sample` would call `run`; this is # now simplified: `sample` and `run` are merged into one function. function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) - global CHUNKSIZE = chunk_size; + default_chunk_size = CHUNKSIZE + global CHUNKSIZE = chunk_size + spl = Sampler(alg); alg_str = isa(alg, HMC) ? "HMC" : isa(alg, HMCDA) ? "HMCDA" : @@ -103,6 +105,8 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) println(log_str) end + global CHUNKSIZE = default_chunk_size + Chain(0, samples) # wrap the result by Chain end From 3a0b618e9e5aa9cef72614f98175422e1b69f877 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 17:39:25 +0100 Subject: [PATCH 34/55] Better time recording and remove new find_H --- src/samplers/hmc.jl | 18 ++++++++++-------- src/samplers/hmcda.jl | 5 ++--- src/samplers/support/hmc_core.jl | 6 +----- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index e2643a84d9..8ec6b7c5a2 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -69,6 +69,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) isa(alg, NUTS) ? "NUTS" : "Hamiltonian" # Initialization + time_total = zero(Float64) n = spl.alg.n_iters samples = Array{Sample}(n) weight = 1 / n @@ -86,7 +87,9 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) spl.info[:progress] = ProgressMeter.Progress(n, 1, "[$alg_str] Sampling...", 0) for i = 1:n dprintln(2, "$alg_str stepping...") - vi = step(model, spl, vi, i==1) + + time_total += @elapsed vi = step(model, spl, vi, i==1) + if spl.info[:accept_his][end] # accepted => store the new predcits samples[i].value = Sample(vi).value else # rejected => store the previous predcits @@ -95,15 +98,14 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) ProgressMeter.next!(spl.info[:progress]) end - if ~isa(alg, NUTS) # cccept rate for NUTS is meaningless - so no printing + println("[$alg_str] Finished with") + println(" Running time = $time_total;") + if ~isa(alg, NUTS) # accept rate for NUTS is meaningless - so no printing accept_rate = sum(spl.info[:accept_his]) / n # calculate the accept rate - log_str = """ -[$alg_str] Done with accept rate = $accept_rate; - #lf / sample = $(spl.info[:lf_num] / n); - #evals / sample = $(spl.info[:eval_num] / n). - """ - println(log_str) + println(" Accept rate = $accept_rate;") end + println(" #lf / sample = $(spl.info[:lf_num] / n);") + println(" #evals / sample = $(spl.info[:eval_num] / n).") global CHUNKSIZE = default_chunk_size diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index e0c67b4f20..46cb966b60 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -86,9 +86,8 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) p = sample_momentum(vi, spl) dprintln(2, "recording old values...") - old_θ = vi[spl] - old_logp = getlogp(vi) - old_H = find_H(p, old_logp) + old_θ = vi[spl]; old_logp = getlogp(vi) + old_H = find_H(p, model, vi, spl) τ = max(1, round(Int, λ / ϵ)) dprintln(2, "leapfrog for $τ steps with step size $ϵ") diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 9139b66ee2..ded520fcc6 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -58,6 +58,7 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam p -= ϵ * grad / 2 vi[spl] += ϵ * p # full step for state + spl.info[:lf_num] += 1 # record leapfrog num grad = gradient2(vi, model, spl) @@ -73,11 +74,6 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam vi, p, τ_valid end -find_H(p::Vector, logp::Real) = begin - H = dot(p, p) / 2 + realpart(-logp) - if isnan(H) H = Inf else H end -end - # Compute Hamiltonian find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin # NOTE: getlogp(vi) = 0 means the current vals[end] hasn't been used at all. From a023e20662ba839de8ffa7a06845a37c8476c318 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 21:40:58 +0100 Subject: [PATCH 35/55] Use new leapfrog in NUTS --- src/samplers/nuts.jl | 33 ++++++++++++++++++++------------ src/samplers/support/hmc_core.jl | 2 +- test/nuts.jl/nuts.jl | 1 + 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index 11badf4652..04ae2a7463 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -66,19 +66,23 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) μ, γ, t_0, κ = spl.info[:μ], 0.05, 10, 0.75 ϵ_bar, H_bar = spl.info[:ϵ_bar], spl.info[:H_bar] + dprintln(3, "X -> R...") + if spl.alg.gid != 0 + link!(vi, spl) + runmodel(model, vi, spl) + end + dprintln(2, "sampling momentum...") p = sample_momentum(vi, spl) - dprintln(3, "X -> R...") - if spl.alg.gid != 0 link!(vi, spl) end - dprintln(2, "recording old H...") H0 = find_H(p, model, vi, spl) dprintln(3, "sample slice variable u") logu = log(rand()) + (-H0) - θm, θp, rm, rp, j, n, s = deepcopy(vi), deepcopy(vi), deepcopy(p), deepcopy(p), 0, 1, 1 + θ = vi[spl] + θm, θp, rm, rp, j, n, s = θ, θ, p, p, 0, 1, 1 local α, n_α while s == 1 && j <= 2 @@ -95,11 +99,11 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) showvalues = [(:ϵ, ϵ), (:tree_depth, j)]) if s′ == 1 && rand() < min(1, n′ / n) - vi = deepcopy(θ′) + θ = θ′ end n = n + n′ - s = s′ * (dot(θp[spl] - θm[spl], rm) >= 0 ? 1 : 0) * (dot(θp[spl] - θm[spl], rp) >= 0 ? 1 : 0) + s = s′ * (dot(θp - θm, rm) >= 0 ? 1 : 0) * (dot(θp - θm, rp) >= 0 ? 1 : 0) j = j + 1 end @@ -121,11 +125,14 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) + vi[spl] = θ + runmodel(model, vi, spl) + vi end end -function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, +function build_tree(θ::Vector, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, model::Function, spl::Sampler, vi::VarInfo) doc""" - θ : model parameter @@ -138,14 +145,14 @@ function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::F """ if j == 0 # Base case - take one leapfrog step in the direction v. - θ′, r′, τ_valid = leapfrog(θ, r, 1, v * ϵ, model, spl) + θ′, r′, τ_valid = leapfrog2(θ, r, 1, v * ϵ, model, vi, spl) # Use old H to save computation - H′ = τ_valid == 0 ? Inf : find_H(r′, model, θ′, spl) + H′ = τ_valid == 0 ? Inf : find_H(r′, model, vi, spl) n′ = (logu <= -H′) ? 1 : 0 s′ = (logu < Δ_max + -H′) ? 1 : 0 α′ = exp(min(0, -H′ - (-H0))) - θ′, r′, deepcopy(θ′), deepcopy(r′), deepcopy(θ′), n′, s′, α′, 1 + θ′, r′, θ′, r′, θ′, n′, s′, α′, 1 else # Recursion - build the left and right subtrees. θm, rm, θp, rp, θ′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl, vi) @@ -156,10 +163,12 @@ function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::F else _, _, θp, rp, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl, vi) end - if rand() < n′′ / (n′ + n′′) θ′ = deepcopy(θ′′) end + if rand() < n′′ / (n′ + n′′) + θ′ = θ′′ + end α′ = α′ + α′′ n′_α = n′_α + n′′_α - s′ = s′′ * (dot(θp[spl] - θm[spl], rm) >= 0 ? 1 : 0) * (dot(θp[spl] - θm[spl], rp) >= 0 ? 1 : 0) + s′ = s′′ * (dot(θp - θm, rm) >= 0 ? 1 : 0) * (dot(θp - θm, rp) >= 0 ? 1 : 0) n′ = n′ + n′′ end diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index ded520fcc6..3309425224 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -47,7 +47,7 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam dprintln(3, "first gradient...") grad = gradient2(vi, model, spl) - # Verify gradients; reject if gradients is NaN or Inf. + verifygrad(grad) || (return vi, p, 0) dprintln(2, "leapfrog stepping...") diff --git a/test/nuts.jl/nuts.jl b/test/nuts.jl/nuts.jl index 37cc59692d..8ff70e758a 100644 --- a/test/nuts.jl/nuts.jl +++ b/test/nuts.jl/nuts.jl @@ -1,3 +1,4 @@ +include("../utility.jl") using Distributions, Turing @model gdemo(x) = begin From 0be52548a7814b1cf959cd13f41ef6a728737688 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 21:46:04 +0100 Subject: [PATCH 36/55] Fix #245 --- src/samplers/nuts.jl | 9 ++++++--- test/nuts.jl/nuts.jl | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index 04ae2a7463..6db4a758ed 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -32,7 +32,7 @@ immutable NUTS <: InferenceAlgorithm gid :: Int # group ID NUTS(n_adapt::Int, delta::Float64, space...) = - new(1, isa(space, Symbol) ? Set([space]) : Set(space), delta, Set(), 0) + new(1, n_adapt, delta, isa(space, Symbol) ? Set([space]) : Set(space), 0) NUTS(n_iters::Int, n_adapt::Int, delta::Float64, space...) = new(n_iters, n_adapt, delta, isa(space, Symbol) ? Set([space]) : Set(space), 0) NUTS(n_iters::Int, delta::Float64) = begin @@ -63,6 +63,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) δ = spl.alg.delta ϵ = spl.info[:ϵ] + dprintln(2, "current ϵ: $ϵ") μ, γ, t_0, κ = spl.info[:μ], 0.05, 10, 0.75 ϵ_bar, H_bar = spl.info[:ϵ_bar], spl.info[:H_bar] @@ -107,8 +108,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) j = j + 1 end - dprintln(3, "R -> X...") - if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end + # Use Dual Averaging to adapt ϵ m = spl.info[:m] += 1 @@ -128,6 +128,9 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) vi[spl] = θ runmodel(model, vi, spl) + dprintln(3, "R -> X...") + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end + vi end end diff --git a/test/nuts.jl/nuts.jl b/test/nuts.jl/nuts.jl index 8ff70e758a..559d02e878 100644 --- a/test/nuts.jl/nuts.jl +++ b/test/nuts.jl/nuts.jl @@ -13,3 +13,9 @@ alg = NUTS(2500, 500, 0.65) res = sample(gdemo([1.5, 2.0]), alg) check_numerical(res, [:s, :m], [49/24, 7/6]) + + +alg = Gibbs(2500, PG(50, 1, :s), NUTS(500, 0.65, :m)) +res = sample(gdemo([1.5, 2.0]), alg) + +check_numerical(res, [:s, :m], [49/24, 7/6]) From 7bf7e651cac979b413d025baf3df704435e14bcb Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 21:58:02 +0100 Subject: [PATCH 37/55] Cache logp in NUTS --- src/samplers/nuts.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index 6db4a758ed..eedcfb751a 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -83,6 +83,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) logu = log(rand()) + (-H0) θ = vi[spl] + logp = getlogp(vi) θm, θp, rm, rp, j, n, s = θ, θ, p, p, 0, 1, 1 local α, n_α @@ -90,9 +91,9 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) v_j = rand([-1, 1]) # Note: this variable actually does not depend on j; # it is set as `v_j` just to be consistent to the paper if v_j == -1 - θm, rm, _, _, θ′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl, vi) + θm, rm, _, _, θ′, logp′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl, vi) + _, _, θp, rp, θ′, logp′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl, vi) end haskey(spl.info, :progress) && ProgressMeter.update!(spl.info[:progress], @@ -101,6 +102,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) if s′ == 1 && rand() < min(1, n′ / n) θ = θ′ + logp = logp′ end n = n + n′ @@ -108,8 +110,6 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) j = j + 1 end - - # Use Dual Averaging to adapt ϵ m = spl.info[:m] += 1 if m < spl.alg.n_adapt @@ -126,7 +126,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) vi[spl] = θ - runmodel(model, vi, spl) + setlogp!(vi, logp) dprintln(3, "R -> X...") if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end @@ -155,19 +155,20 @@ function build_tree(θ::Vector, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Fl s′ = (logu < Δ_max + -H′) ? 1 : 0 α′ = exp(min(0, -H′ - (-H0))) - θ′, r′, θ′, r′, θ′, n′, s′, α′, 1 + θ′, r′, θ′, r′, θ′, getlogp(vi), n′, s′, α′, 1 else # Recursion - build the left and right subtrees. - θm, rm, θp, rp, θ′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl, vi) + θm, rm, θp, rp, θ′, logp′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl, vi) if s′ == 1 if v == -1 - θm, rm, _, _, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl, vi) + θm, rm, _, _, θ′′, logp′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl, vi) + _, _, θp, rp, θ′′, logp′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl, vi) end if rand() < n′′ / (n′ + n′′) θ′ = θ′′ + logp′ = logp′′ end α′ = α′ + α′′ n′_α = n′_α + n′′_α @@ -175,6 +176,6 @@ function build_tree(θ::Vector, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Fl n′ = n′ + n′′ end - θm, rm, θp, rp, θ′, n′, s′, α′, n′_α + θm, rm, θp, rp, θ′, logp′, n′, s′, α′, n′_α end end From ee500c059fdf5af378a52b639d6fff8799691b98 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 22:35:06 +0100 Subject: [PATCH 38/55] Cache grad --- src/core/ad.jl | 14 +++++++++++++- src/samplers/gibbs.jl | 4 ++++ src/samplers/hmc.jl | 1 + test/nuts.jl/nuts.jl | 1 - 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index f1aa301bec..e7f939c93b 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -27,6 +27,14 @@ gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin end gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin + + θ = vi[spl] + if spl != nothing && haskey(spl.info, :grad_cache) + if haskey(spl.info[:grad_cache], θ) + return spl.info[:grad_cache][θ] + end + end + # Initialisation grad = Vector{Float64}() @@ -73,13 +81,17 @@ gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi[range] = map(r -> Dual{chunk_dim, Float64}(r), reals) end end - + # 2. Run model and collect gradient vi = runmodel(model, vi, spl) dprintln(4, "collect gradients from logp...") append!(grad, collect(dualpart(-getlogp(vi)))) end + if spl != nothing && haskey(spl.info, :grad_cache) + spl.info[:grad_cache][θ] = grad + end + grad end diff --git a/src/samplers/gibbs.jl b/src/samplers/gibbs.jl index 4d6e83b247..e726a95449 100644 --- a/src/samplers/gibbs.jl +++ b/src/samplers/gibbs.jl @@ -75,6 +75,10 @@ function sample(model::Function, alg::Gibbs) dprintln(2, "$(typeof(local_spl)) stepping...") if isa(local_spl.alg, GibbsComponent) + if isa(local_spl.alg, Hamiltonian) # clean cache + local_spl.info[:grad_cache] = Dict{Vector,Vector}() + end + for _ = 1:local_spl.alg.n_iters dprintln(2, "recording old θ...") varInfo = step(model, local_spl, varInfo, i==1) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 8ec6b7c5a2..15399e30d6 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -52,6 +52,7 @@ Sampler(alg::Hamiltonian) = begin info[:accept_his] = [] info[:lf_num] = 0 info[:eval_num] = 0 + info[:grad_cache] = Dict{Vector,Vector}() Sampler(alg, info) end diff --git a/test/nuts.jl/nuts.jl b/test/nuts.jl/nuts.jl index 559d02e878..0f08fa7ad0 100644 --- a/test/nuts.jl/nuts.jl +++ b/test/nuts.jl/nuts.jl @@ -1,4 +1,3 @@ -include("../utility.jl") using Distributions, Turing @model gdemo(x) = begin From 410634d28ec73582eaf7c0dcd0fc363506cb8bb3 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 22:52:36 +0100 Subject: [PATCH 39/55] Merge observe of HMC and Void --- src/samplers/hmc.jl | 6 ++--- src/samplers/sampler.jl | 9 ++++--- test/nuts_nb.jl | 56 +++++++++++++++++++++++++++++++---------- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 15399e30d6..e0d4304c53 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -121,7 +121,5 @@ function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName r end -function observe{T<:Hamiltonian}(spl::Sampler{T}, d::Distribution, value::Any, vi::VarInfo) - dprintln(2, "observing...") - acclogp!(vi, logpdf(d, map(x -> Dual(x), value))) -end +observe{T<:Hamiltonian}(spl::Sampler{T}, d::Distribution, value::Any, vi::VarInfo) = + observe(nothing, d, value, vi) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 2feaffc52b..2f770ce7ea 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -38,7 +38,10 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin end observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = begin - lp = logpdf(dist, value) - vi.logw += lp - acclogp!(vi, lp) + if isa(dist, UnivariateDistribution) && isa(value, Vector) + println("catch you") + exit() + else + acclogp!(vi, logpdf(dist, map(x -> Dual(x), value))) + end end diff --git a/test/nuts_nb.jl b/test/nuts_nb.jl index 76e69f87ae..063dd465b2 100644 --- a/test/nuts_nb.jl +++ b/test/nuts_nb.jl @@ -1,18 +1,48 @@ -using Distributions -using Turing -using Stan +# using Distributions +# using Turing +# using Stan +# +# include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") +# include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") +# +# @model nbmodel(K, V, M, N, z, w, doc, alpha, beta) = begin +# theta ~ Dirichlet(alpha) +# phi = Array{Any}(K) +# for k = 1:K +# phi[k] ~ Dirichlet(beta) +# end +# for m = 1:M +# z[m] ~ Categorical(theta) +# end +# for n = 1:N +# w[n] ~ Categorical(phi[z[doc[n]]]) +# end +# phi +# end +# +# +# bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") +# bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] +# logd = build_logd("Naive Bayes", bench_res...) +# +# include(Pkg.dir("Turing")*"/benchmarks/"*"MoC-stan.run.jl") +# logd["stan"] = stan_d +# logd["time_stan"] = nb_time +# +# print_log(logd) -include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes-stan.data.jl") -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes.model.jl") +include("utility.jl") +using Distributions, Turing -bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") -bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] -logd = build_logd("Naive Bayes", bench_res...) +@model gdemo(x) = begin + s ~ InverseGamma(2,3) + m ~ Normal(0,sqrt(s)) + x ~ Normal(m, sqrt(s)) + return s, m +end -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes-stan.run.jl") -logd["stan"] = stan_d -logd["time_stan"] = nb_time +alg = NUTS(2500, 500, 0.65) +res = sample(gdemo([1.5, 2.0]), alg) -print_log(logd) +check_numerical(res, [:s, :m], [49/24, 7/6]) From b68b60abde395bebf36cc7345054e9538eec51c3 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 22:55:31 +0100 Subject: [PATCH 40/55] Improve observe code --- src/samplers/sampler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 2f770ce7ea..de7c680888 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -42,6 +42,6 @@ observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = begin println("catch you") exit() else - acclogp!(vi, logpdf(dist, map(x -> Dual(x), value))) + acclogp!(vi, logpdf(dist, value)) end end From 5e745da23f44be794063a908f92f348d3cec6b7a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 23:06:48 +0100 Subject: [PATCH 41/55] Fix setchunksize --- benchmarks/lda.run.jl | 2 ++ src/samplers/support/hmc_core.jl | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/lda.run.jl b/benchmarks/lda.run.jl index 2810be12c9..ac6273fd23 100644 --- a/benchmarks/lda.run.jl +++ b/benchmarks/lda.run.jl @@ -8,6 +8,8 @@ include(Pkg.dir("Turing")*"/example-models/stan-models/lda.model.jl") include(Pkg.dir("Turing")*"/benchmarks/"*"lda-stan.run.jl") +setchunksize(60) + for alg in ["HMC(1000, 0.25, 6)", "HMCDA(1000, 0.65, 1.5)"] bench_res = tbenchmark(alg, "ldamodel", "data=ldastandata[1]") bench_res[4].names = ["phi[1]", "phi[2]"] diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 3309425224..324b168f9e 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -1,6 +1,9 @@ global Δ_max = 1000 -setchunksize(chun_size::Int) = global CHUNKSIZE = chunk_size +setchunksize(chunk_size::Int) = begin + println("[Turing]: AD chunk size is set as $chunk_size") + global CHUNKSIZE = chunk_size +end runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) = begin dprintln(4, "run model...") From bb25da04233f2f5267a21c596e638a833a82cc15 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 12 May 2017 23:44:05 +0100 Subject: [PATCH 42/55] Vectorize observe #255 --- src/samplers/sampler.jl | 7 ++--- test/runtests.jl | 1 + .../vectorize_observe.jl} | 27 +++++++++++++++---- 3 files changed, 27 insertions(+), 8 deletions(-) rename test/{nuts_nb.jl => sampler.jl/vectorize_observe.jl} (64%) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index de7c680888..0e0e580c20 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -38,9 +38,10 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin end observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = begin - if isa(dist, UnivariateDistribution) && isa(value, Vector) - println("catch you") - exit() + if isa(dist, UnivariateDistribution) && isa(value, Vector) || + isa(dist, MultivariateDistribution) && isa(value, Matrix) || # Note: each value is a column vector + isa(dist, MatrixDistribution) && isa(value, Array{typeof(value[1]),3}) + acclogp!(vi, sum(logpdf(dist, value))) else acclogp!(vi, logpdf(dist, value)) end diff --git a/test/runtests.jl b/test/runtests.jl index 2febe73ebf..20a28079c2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,6 +25,7 @@ testcases = Dict( # samplers/ # support/ "resample.jl" => ["resample", "particlecontainer",], + "sampler.jl" => ["vectorize_observe",], "gibbs.jl" => ["gibbs", "gibbs2", "gibbs_constructor",], "nuts.jl" => ["nuts_cons", "nuts", # "nuts_geweke", diff --git a/test/nuts_nb.jl b/test/sampler.jl/vectorize_observe.jl similarity index 64% rename from test/nuts_nb.jl rename to test/sampler.jl/vectorize_observe.jl index 063dd465b2..80920c3c15 100644 --- a/test/nuts_nb.jl +++ b/test/sampler.jl/vectorize_observe.jl @@ -32,17 +32,34 @@ # print_log(logd) -include("utility.jl") using Distributions, Turing -@model gdemo(x) = begin +# Test for vectorize UnivariateDistribution +@model vdemo(x) = begin s ~ InverseGamma(2,3) m ~ Normal(0,sqrt(s)) - x ~ Normal(m, sqrt(s)) + x ~ Normal(m, sqrt(s)) + # for i = 1:length(x) + # x[i] ~ Normal(m, sqrt(s)) + # end return s, m end alg = NUTS(2500, 500, 0.65) -res = sample(gdemo([1.5, 2.0]), alg) +x = randn(1000) +res = sample(vdemo(x), alg) -check_numerical(res, [:s, :m], [49/24, 7/6]) +check_numerical(res, [:s, :m], [1, sum(x) / (1 + length(x))]) + +# Test for vectorize MultivariateDistribution + +D = 2 +@model vdemo2(x) = begin + μ ~ MvNormal(zeros(D), ones(D)) + x ~ MvNormal(μ, ones(D)) +end + +alg = NUTS(2500, 500, 0.65) +res = sample(vdemo2(randn(D,1000)), alg) + +# TODO: Test for vectorize MatrixDistribution From 81f77a898f03caa87008cdf612e8c344221ac738 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Sat, 13 May 2017 11:41:23 +0100 Subject: [PATCH 43/55] Much faster LDA (#117, #255) --- example-models/stan-models/lda.model.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example-models/stan-models/lda.model.jl b/example-models/stan-models/lda.model.jl index 66be471b9a..d32486f81c 100644 --- a/example-models/stan-models/lda.model.jl +++ b/example-models/stan-models/lda.model.jl @@ -43,9 +43,11 @@ # z[n] ~ Categorical(theta[doc[n]]) # end + phi_dot_theta = [log([dot(map(p -> p[i], phi), theta[m]) for i = 1:V]) for m=1:M] for n = 1:N - phi_dot_theta = [dot(map(p -> p[i], phi), theta[doc[n]]) for i = 1:V] - w[n] ~ Categorical(phi_dot_theta) + # phi_dot_theta = [dot(map(p -> p[i], phi), theta[doc[n]]) for i = 1:V] + # w[n] ~ Categorical(phi_dot_theta) + Turing.acclogp!(vi, phi_dot_theta[doc[n]][w[n]]) end end From b1f3c51801f326a7b8522bcee5fcd8332447f5be Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Sat, 13 May 2017 11:47:24 +0100 Subject: [PATCH 44/55] Increase max_treedepth for NUTS. --- src/samplers/nuts.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index eedcfb751a..769c4f441d 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -87,7 +87,7 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) θm, θp, rm, rp, j, n, s = θ, θ, p, p, 0, 1, 1 local α, n_α - while s == 1 && j <= 2 + while s == 1 && j <= 6 v_j = rand([-1, 1]) # Note: this variable actually does not depend on j; # it is set as `v_j` just to be consistent to the paper if v_j == -1 From d8c2d418fc02dccf2514042bb571f48761cc15d2 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Sat, 13 May 2017 11:49:31 +0100 Subject: [PATCH 45/55] Add NUTS to lda.run. --- benchmarks/lda.run.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/lda.run.jl b/benchmarks/lda.run.jl index ac6273fd23..02157b829f 100644 --- a/benchmarks/lda.run.jl +++ b/benchmarks/lda.run.jl @@ -10,7 +10,7 @@ include(Pkg.dir("Turing")*"/benchmarks/"*"lda-stan.run.jl") setchunksize(60) -for alg in ["HMC(1000, 0.25, 6)", "HMCDA(1000, 0.65, 1.5)"] +for alg in ["HMC(1000, 0.25, 6)", "HMCDA(1000, 0.65, 1.5)", "NUTS(2000, 1000, 0.65)"] bench_res = tbenchmark(alg, "ldamodel", "data=ldastandata[1]") bench_res[4].names = ["phi[1]", "phi[2]"] logd = build_logd("LDA", bench_res...) From 2278d1bbf9e289c3bdb527a29a551f4c14249082 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Sat, 13 May 2017 11:59:14 +0100 Subject: [PATCH 46/55] Faster MoC. --- .../BayesHmm-checkpoint.ipynb | 110 +++++++ example-models/notebooks/BayesHmm2.ipynb | 210 +++++++++++++ example-models/notebooks/gdemo-geweke.ipynb | 283 ++++++++++++++++++ example-models/stan-models/MoC.model.jl | 15 +- 4 files changed, 614 insertions(+), 4 deletions(-) create mode 100644 example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb create mode 100644 example-models/notebooks/BayesHmm2.ipynb create mode 100644 example-models/notebooks/gdemo-geweke.ipynb diff --git a/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb b/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb new file mode 100644 index 0000000000..34e27c8ca6 --- /dev/null +++ b/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "using Turing, Distributions\n", + "using PyPlot, PyCall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y = [ 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0 ];\n", + "N = length(y); K = 3;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "@model BayesHmm(y) = begin\n", + " s = tzeros(Int64, N)\n", + " m = tzeros(Real, K)\n", + " T = Array{Array}(K)\n", + " for i = 1:K\n", + " T[i] ~ Dirichlet(ones(K)/K)\n", + " # m[i] ~ Normal(1, 0.1) # Defining m this way causes label-switching problem.\n", + " m[i] ~ Normal(i, 0.01)\n", + " end\n", + " s[1] ~ Categorical(ones(Float64, K)/K)\n", + " for i = 2:N\n", + " s[i] ~ Categorical(vec(T[s[i-1]]))\n", + " y[i] ~ Normal(m[s[i]], 0.01)\n", + " end\n", + " return(s, m)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "g = Gibbs(300, HMC(1, 0.2, 5, :m, :T), PG(50, 1, :s))\n", + "c = sample(BayesHmm(y), g);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = c[:m][111];\n", + "s = c[:s][111];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernel_info": { + "name": "julia-0.5" + }, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/notebooks/BayesHmm2.ipynb b/example-models/notebooks/BayesHmm2.ipynb new file mode 100644 index 0000000000..afedbdaaee --- /dev/null +++ b/example-models/notebooks/BayesHmm2.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "using Turing, Distributions\n", + "using PyPlot, PyCall" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y = [ 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0 ];\n", + "N = length(y);\n", + "K = 3;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Assume - `T` is a parameter\n", + " Assume - `m` is a parameter\n", + " Assume - `s` is a parameter\n", + " Observe - `y` is an observation\n" + ] + }, + { + "data": { + "text/plain": [ + "BayesHmm (generic function with 4 methods)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@model BayesHmm(y) = begin\n", + " s = tzeros(Int64, N)\n", + " m = tzeros(Dual, K)\n", + " T = Array{Array}(K)\n", + " for i = 1:K\n", + " T[i] ~ Dirichlet(ones(K)/K)\n", + " # m[i] ~ Normal(1, 0.1) # Defining m this way causes label-switching problem.\n", + " m[i] ~ Normal(i, 0.01)\n", + " end\n", + " s[1] ~ Categorical(ones(Float64, K)/K)\n", + " for i = 2:N\n", + " s[i] ~ Categorical(vec(T[s[i-1]]))\n", + " y[i] ~ Normal(m[s[i]], 0.01)\n", + " end\n", + " return(s, m, T)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Turing.PG(50,1000,Turing.resampleSystematic,0.5,Set{Any}())" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g1 = Gibbs(300, HMC(2, 0.2, 5, :m, :T), PG(50, 5, :s))\n", + "g2 = PG(50, 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T[1] = ForwardDiff.Dual{0,Float64}[Dual(0.117584),Dual(0.744224),Dual(0.138192)], T[1] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[1] = Dual(0.9967711359430271), m[1] ~ Distributions.Normal{Float64}(μ=1.0, σ=0.01)\n", + "T[2] = ForwardDiff.Dual{0,Float64}[Dual(0.0064186),Dual(0.0065692),Dual(0.987012)], T[2] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[2] = Dual(2.007863731847725), m[2] ~ Distributions.Normal{Float64}(μ=2.0, σ=0.01)\n", + "T[3] = ForwardDiff.Dual{0,Float64}[Dual(0.713092),Dual(0.234245),Dual(0.0526634)], T[3] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[3] = Dual(2.9919946593762465), m[3] ~ Distributions.Normal{Float64}(μ=3.0, σ=0.01)\n", + "s[1] = Dual(3), s[1] ~ Distributions.Categorical{Float64}(K=3, p=[0.333333,0.333333,0.333333])\n" + ] + } + ], + "source": [ + "s1 = @sample(BayesHmm(y), g1);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(s1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = s1[:m][111];\n", + "s = s1[:s][111];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "s2 = @sample(BayesHmm(y), g2);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = s2[:m][150];\n", + "s = s2[:s][150];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/notebooks/gdemo-geweke.ipynb b/example-models/notebooks/gdemo-geweke.ipynb new file mode 100644 index 0000000000..2566beb7a3 --- /dev/null +++ b/example-models/notebooks/gdemo-geweke.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "using Distributions, Turing\n", + "using Gadfly" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Assume - `s` is a parameter\n", + " Assume - `m` is a parameter\n", + " Assume - `y` is a parameter\n", + " Observe - `x` is an observation\n" + ] + }, + { + "data": { + "text/plain": [ + "gdemo2 (generic function with 4 methods)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@model gdemo2(x, bkstep) = begin\n", + " y = similar(x)\n", + " if bkstep == false \n", + " # Forward Sample\n", + " s ~ InverseGamma(2,3)\n", + " m ~ Normal(0,sqrt(s))\n", + " y[1] ~ Normal(m, sqrt(s))\n", + " y[2] ~ Normal(m, sqrt(s))\n", + " elseif bkstep == true \n", + " # Backward Step 1: theta ~ theta | x\n", + " s ~ InverseGamma(2,3)\n", + " m ~ Normal(0,sqrt(s))\n", + " x[1] ~ Normal(m, sqrt(s))\n", + " x[2] ~ Normal(m, sqrt(s))\n", + " # Backward Step 2: x ~ x | theta\n", + " y[1] ~ Normal(m, sqrt(s))\n", + " y[2] ~ Normal(m, sqrt(s))\n", + " end\n", + " return s, m, y\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fw = PG(20, 300)\n", + "bk = Gibbs(10, PG(10,10, :s, :y), HMC(1, 0.25, 5, :m));" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[PG]: Finshed within 2.7866458892822266 seconds\n" + ] + } + ], + "source": [ + "s = @sample(gdemo2([1.5, 2], false), fw);" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iterations = 1:300\n", + "Thinning interval = 1\n", + "Chains = 1\n", + "Samples per chain = 300\n", + "\n", + "Empirical Posterior Estimates:\n", + " Mean SD Naive SE MCSE ESS\n", + " m 0.186876338 1.5828736 0.09138725 0.031917705 300\n", + "y[1] 0.021829037 2.4501294 0.14145829 0.080212894 300\n", + "y[2] 0.295076622 2.4325274 0.14044203 0.103072231 300\n", + " s 3.253114980 5.5768925 0.32198204 0.073906481 300\n", + "\n", + "Quantiles:\n", + " 2.5% 25.0% 50.0% 75.0% 97.5% \n", + " m -2.74503914 -0.76699002 0.118378945 1.0859695 3.4705196\n", + "y[1] -4.06774709 -1.35406974 0.044656838 1.2878395 4.0238171\n", + "y[2] -3.71981601 -0.95970933 0.145579527 1.4402614 5.3891412\n", + " s 0.48593145 1.08023792 1.781509454 3.3918546 13.4523979\n", + "\n" + ] + } + ], + "source": [ + "describe(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Gibbs]: Finshed within 4.269287109375 seconds\n", + "[Gibbs]: Finshed within 0.07929396629333496 seconds\n", + "[Gibbs]: Finshed within 0.0547490119934082 seconds\n", + "[Gibbs]: Finshed within 0.06913399696350098 seconds\n", + "[Gibbs]: Finshed within 0.06143689155578613 seconds\n", + "[Gibbs]: Finshed within 0.09006118774414062 seconds\n" + ] + } + ], + "source": [ + "N = 100\n", + "x = [s[:y][1]...]\n", + "s_bk = Array{Turing.Chain}(N)\n", + "\n", + "for i = 1:N\n", + " s_bk[i] = @sample(gdemo2(x, true), bk);\n", + " x = [s_bk[i][:y][1]...];\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "s2 = vcat(s_bk...);\n", + "describe(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = plot(s);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hstack(p[1:2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hstack(p[3:4])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "heideldiag(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import Turing.@sample\n", + "\n", + "macro sample(modelcall, alg, optionalps...)\n", + " modelf = modelcall.args[1] # get the function symbol for model\n", + " psyms = modelcall.args[2:end] # get the (passed-in) symbols\n", + " esc(quote\n", + " data_ = Dict()\n", + " arglist_ = Turing.TURING[:modelarglist]\n", + " localsyms_ = $psyms\n", + " for i_ = 1:length(arglist_)\n", + " localsym_ = localsyms_[i_] # passed-in symbols are local\n", + " arg_ = arglist_[i_] # arglist are symbols in model scope\n", + " data_[arg_] = eval(localsym_)\n", + " end\n", + " sample($modelf, data_, $alg, $optionalps...)\n", + " end)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/stan-models/MoC.model.jl b/example-models/stan-models/MoC.model.jl index 25437102b6..63ec620769 100644 --- a/example-models/stan-models/MoC.model.jl +++ b/example-models/stan-models/MoC.model.jl @@ -6,11 +6,18 @@ for k = 1:K phi[k] ~ Dirichlet(beta) end - for m = 1:M - z[m] ~ Categorical(theta) - end + #for m = 1:M + # z[m] ~ Categorical(theta) + #end + + log_theta = log(theta) + Turing.acclogp!(vi, sum(log_theta[z[1:M]])) + + log_phi = map(x->log(x), phi) for n = 1:N - w[n] ~ Categorical(phi[z[doc[n]]]) + # w[n] ~ Categorical(phi[z[doc[n]]]) + Turing.acclogp!(vi, log_phi[z[doc[n]]][w[n]]) end + phi end From f958da2b8c07fbf13009132a4be18d555c944ddf Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 13:55:09 +0100 Subject: [PATCH 47/55] Add a quick test --- test/quick.jl | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 test/quick.jl diff --git a/test/quick.jl b/test/quick.jl new file mode 100644 index 0000000000..22b2418b43 --- /dev/null +++ b/test/quick.jl @@ -0,0 +1,34 @@ +using Distributions +using Turing +using Stan + +include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") + +@model nbmodel(K, V, M, N, z, w, doc, alpha, beta) = begin + theta ~ Dirichlet(alpha) + phi = Array{Any}(K) + for k = 1:K + phi[k] ~ Dirichlet(beta) + end + for m = 1:M + z[m] ~ Categorical(theta) + end + for n = 1:N + w[n] ~ Categorical(phi[z[doc[n]]]) + end + phi +end + + +# bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") +# bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] +# logd = build_logd("Naive Bayes", bench_res...) +# +# include(Pkg.dir("Turing")*"/benchmarks/"*"MoC-stan.run.jl") +# logd["stan"] = stan_d +# logd["time_stan"] = nb_time +# +# print_log(logd) + +samples = sample(nbmodel(data=nbstandata[1]), NUTS(1000, 0.65)) From 410283077c2987319a4134457b10fc90884f663f Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 14:01:50 +0100 Subject: [PATCH 48/55] Fix cache for win32 --- src/core/ad.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/ad.jl b/src/core/ad.jl index e7f939c93b..4c76fe72c4 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -28,7 +28,7 @@ end gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin - θ = vi[spl] + θ = realpart(vi[spl]) if spl != nothing && haskey(spl.info, :grad_cache) if haskey(spl.info[:grad_cache], θ) return spl.info[:grad_cache][θ] From a9be13dd30d13ded27d1b9de528b5fb2e92fc19a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 14:09:59 +0100 Subject: [PATCH 49/55] Support new syntax for vec obs --- src/samplers/hmc.jl | 7 +++++-- src/samplers/sampler.jl | 14 ++++++-------- test/sampler.jl/vectorize_observe.jl | 6 +++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index e0d4304c53..4a36738935 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -121,5 +121,8 @@ function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName r end -observe{T<:Hamiltonian}(spl::Sampler{T}, d::Distribution, value::Any, vi::VarInfo) = - observe(nothing, d, value, vi) +observe{A<:Hamiltonian}(spl::Sampler{A}, d::Distribution, value::Any, vi::VarInfo) = + observe(nothing, d, value, vi) + +observe{A<:Hamiltonian,D<:Distribution}(spl::Sampler{A}, ds::Vector{D}, value::Any, vi::VarInfo) = + observe(nothing, ds, value, vi) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 0e0e580c20..9b3c06bef1 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -37,12 +37,10 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin r end -observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = begin - if isa(dist, UnivariateDistribution) && isa(value, Vector) || - isa(dist, MultivariateDistribution) && isa(value, Matrix) || # Note: each value is a column vector - isa(dist, MatrixDistribution) && isa(value, Array{typeof(value[1]),3}) - acclogp!(vi, sum(logpdf(dist, value))) - else - acclogp!(vi, logpdf(dist, value)) - end +observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = + acclogp!(vi, logpdf(dist, value)) + +observe{T<:Distribution}(spl::Void, dists::Vector{T}, value::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + acclogp!(vi, sum(logpdf(dists[1], value))) end diff --git a/test/sampler.jl/vectorize_observe.jl b/test/sampler.jl/vectorize_observe.jl index 80920c3c15..3452dd8c42 100644 --- a/test/sampler.jl/vectorize_observe.jl +++ b/test/sampler.jl/vectorize_observe.jl @@ -31,14 +31,14 @@ # # print_log(logd) - +include("../utility.jl") using Distributions, Turing # Test for vectorize UnivariateDistribution @model vdemo(x) = begin s ~ InverseGamma(2,3) m ~ Normal(0,sqrt(s)) - x ~ Normal(m, sqrt(s)) + x ~ [Normal(m, sqrt(s))] # for i = 1:length(x) # x[i] ~ Normal(m, sqrt(s)) # end @@ -56,7 +56,7 @@ check_numerical(res, [:s, :m], [1, sum(x) / (1 + length(x))]) D = 2 @model vdemo2(x) = begin μ ~ MvNormal(zeros(D), ones(D)) - x ~ MvNormal(μ, ones(D)) + x ~ [MvNormal(μ, ones(D))] end alg = NUTS(2500, 500, 0.65) From 76b8917b0a5f6308269881a629b2e90335cfe000 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 14:14:00 +0100 Subject: [PATCH 50/55] sync model --- test/quick.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/quick.jl b/test/quick.jl index 22b2418b43..d1ad32fa45 100644 --- a/test/quick.jl +++ b/test/quick.jl @@ -11,12 +11,16 @@ include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") for k = 1:K phi[k] ~ Dirichlet(beta) end - for m = 1:M - z[m] ~ Categorical(theta) - end + + log_theta = log(theta) + Turing.acclogp!(vi, sum(log_theta[z[1:M]])) + + log_phi = map(x->log(x), phi) for n = 1:N - w[n] ~ Categorical(phi[z[doc[n]]]) + # w[n] ~ Categorical(phi[z[doc[n]]]) + Turing.acclogp!(vi, log_phi[z[doc[n]]][w[n]]) end + phi end @@ -31,4 +35,4 @@ end # # print_log(logd) -samples = sample(nbmodel(data=nbstandata[1]), NUTS(1000, 0.65)) +samples = sample(nbmodel(data=nbstandata[1]), HMC(1000, 0.1, 4)) From a1afe04a43d7867dd3ca0460edae1fc54426aa4a Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 17:05:14 +0100 Subject: [PATCH 51/55] Assume vec for HMC --- src/core/compiler.jl | 22 ++++++++++++++++------ src/core/varinfo.jl | 15 +++++++++++---- src/samplers/hmc.jl | 16 +++++++++++++++- src/samplers/sampler.jl | 21 +++++++++++++++++++++ test/runtests.jl | 2 +- test/sampler.jl/vec_assume.jl | 17 +++++++++++++++++ 6 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 test/sampler.jl/vec_assume.jl diff --git a/src/core/compiler.jl b/src/core/compiler.jl index 063dbcf05c..95ea7be869 100644 --- a/src/core/compiler.jl +++ b/src/core/compiler.jl @@ -58,12 +58,22 @@ macro ~(left, right) csym_str = string(Turing._compiler_[:fname])*"_var"* string(@__LINE__) sym = Symbol($(string(left))) vn = Turing.VarName(vi, Symbol(csym_str), sym, "") - $(left) = Turing.assume( - sampler, - $(right), # dist - vn, # VarName - vi # VarInfo - ) + if isa($(right), Vector) + $(left) = Turing.assume( + sampler, + $(right), # dist + vn, # VarName + $(left), + vi # VarInfo + ) + else + $(left) = Turing.assume( + sampler, + $(right), # dist + vn, # VarName + vi # VarInfo + ) + end end else # Indexing diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index 9210e8c1ac..8cccc63a09 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -27,6 +27,8 @@ sym(vn::VarName) = Symbol("$(vn.sym)$(vn.indexing)") # simplified symbol cuid(vn::VarName) = (vn.csym, vn.sym, vn.indexing) # the uid which is only available at compile time +copybyindex(vn::VarName, indexing::String) = VarName(vn.csym, vn.sym, indexing, vn.counter) + ########### # VarInfo # ########### @@ -137,10 +139,15 @@ Base.getindex(vi::VarInfo, vn::VarName) = begin r = istrans(vi, vn) ? invlink(dist, r) : r end -# Base.setindex!(vi::VarInfo, r, vn::VarName) = begin -# dist = getdist(vi, vn) -# setval!(vi, vectorize(dist, r), vn) -# end +Base.getindex(vi::VarInfo, vns::Vector{VarName}) = begin + @assert haskey(vi, vns[1]) "[Turing] attempted to replay unexisting variables in VarInfo" + dist = getdist(vi, vns[1]) + if istrans(vi, vns[1]) + [reconstruct(dist, getval(vi, vn)) for vn in vns] + else + [invlink(dist, reconstruct(dist, getval(vi, vn))) for vn in vns] + end +end # NOTE: vi[view] will just return what insdie vi (no transformations applied) Base.getindex(vi::VarInfo, view::VarView) = getval(vi, view) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 4a36738935..d096b7e490 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -113,7 +113,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) Chain(0, samples) # wrap the result by Chain end -function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName, vi::VarInfo) +assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName, vi::VarInfo) = begin dprintln(2, "assuming...") updategid!(vi, vn, spl) r = vi[vn] @@ -121,6 +121,20 @@ function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName r end +assume{A<:Hamiltonian,D<:Distribution}(spl::Sampler{A}, dists::Vector{D}, vn::VarName, variable::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + dist = dists[1] + n = size(variable)[end] + + vns = map(i -> copybyindex(vn, "[$i]"), 1:n) + + rs = vi[vns] + + acclogp!(vi, sum(logpdf(dist, rs, istrans(vi, vns[1])))) + + rs +end + observe{A<:Hamiltonian}(spl::Sampler{A}, d::Distribution, value::Any, vi::VarInfo) = observe(nothing, d, value, vi) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 9b3c06bef1..053d7b7a05 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -37,6 +37,27 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin r end +assume{T<:Distribution}(spl::Void, dists::Vector{T}, vn::VarName, variable::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + dist = dists[1] + n = size(variable)[end] + + vns = map(i -> copybyindex(vn, "[$i]"), 1:n) + + if haskey(vi, vns[1]) + rs = vi[vns] + else + rs = rand(dist, n) + for i = 1:n + push!(vi, vns[i], rs[i], dist, 0) + end + end + + acclogp!(vi, sum(logpdf(dist, rs, istrans(vi, vns[1])))) + + rs +end + observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = acclogp!(vi, logpdf(dist, value)) diff --git a/test/runtests.jl b/test/runtests.jl index 20a28079c2..3a2c7cbeaa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,7 +25,7 @@ testcases = Dict( # samplers/ # support/ "resample.jl" => ["resample", "particlecontainer",], - "sampler.jl" => ["vectorize_observe",], + "sampler.jl" => ["vectorize_observe", "vec_assume",], "gibbs.jl" => ["gibbs", "gibbs2", "gibbs_constructor",], "nuts.jl" => ["nuts_cons", "nuts", # "nuts_geweke", diff --git a/test/sampler.jl/vec_assume.jl b/test/sampler.jl/vec_assume.jl new file mode 100644 index 0000000000..9eefa06638 --- /dev/null +++ b/test/sampler.jl/vec_assume.jl @@ -0,0 +1,17 @@ +include("../utility.jl") +using Distributions, Turing + + +setchunksize(100) +# Test for vectorize UnivariateDistribution +@model vdemo() = begin + x = Vector{Real}(100) + # x ~ [Normal(0, sqrt(4))] + for i = 1:100 + x[i] ~ Normal(0, sqrt(4)) + end +end + +alg = HMC(1000, 0.2, 4) +res = sample(vdemo(), alg) +println(mean(mean(res[:x]))) From af2b6c648e1b29e215597f9e730a55c94e168e85 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 17:28:09 +0100 Subject: [PATCH 52/55] Vectorize construct for uni --- src/core/varinfo.jl | 12 ++++++------ src/samplers/support/helper.jl | 8 +++++++- test/sampler.jl/vec_assume.jl | 8 ++++---- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index 8cccc63a09..fea5212318 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -70,13 +70,16 @@ typealias VarView Union{Int,UnitRange,Vector{Int},Vector{UnitRange}} getidx(vi::VarInfo, vn::VarName) = vi.idcs[vn] getrange(vi::VarInfo, vn::VarName) = vi.ranges[getidx(vi, vn)] +getranges(vi::VarInfo, vns::Vector{VarName}) = union(map(vn -> getrange(vi, vn), vns)...) getval(vi::VarInfo, vn::VarName) = vi.vals[end][getrange(vi, vn)] setval!(vi::VarInfo, val, vn::VarName) = vi.vals[end][getrange(vi, vn)] = val +getval(vi::VarInfo, vns::Vector{VarName}) = vi.vals[end][getranges(vi, vns)] + getval(vi::VarInfo, view::VarView) = vi.vals[end][view] setval!(vi::VarInfo, val::Any, view::VarView) = vi.vals[end][view] = val -setval!(vi::VarInfo, val::Any, view::Vector{UnitRange}) = map(v->vi.vals[end][v] = val, view) +setval!(vi::VarInfo, val::Any, view::Vector{UnitRange}) = map(v -> vi.vals[end][v] = val, view) getall(vi::VarInfo) = vi.vals[end] setall!(vi::VarInfo, val::Any) = vi.vals[end] = val @@ -142,11 +145,8 @@ end Base.getindex(vi::VarInfo, vns::Vector{VarName}) = begin @assert haskey(vi, vns[1]) "[Turing] attempted to replay unexisting variables in VarInfo" dist = getdist(vi, vns[1]) - if istrans(vi, vns[1]) - [reconstruct(dist, getval(vi, vn)) for vn in vns] - else - [invlink(dist, reconstruct(dist, getval(vi, vn))) for vn in vns] - end + rs = reconstruct(dist, getval(vi, vns)) + rs = istrans(vi, vns[1]) ? invlink(dist, rs) : rs end # NOTE: vi[view] will just return what insdie vi (no transformations applied) diff --git a/src/samplers/support/helper.jl b/src/samplers/support/helper.jl index 2ab376a9db..5d0cd51935 100644 --- a/src/samplers/support/helper.jl +++ b/src/samplers/support/helper.jl @@ -26,7 +26,13 @@ vectorize{T<:Real}(d::MatrixDistribution, r::Matrix{T}) = Vector{Real}(vec # Note this is not the case for MultivariateDistribution so I guess this might be lack of # support for some types related to matrices (like PDMat). reconstruct(d::Distribution, val::Vector) = reconstruct(d, val, typeof(val[1])) -reconstruct(d::UnivariateDistribution, val::Vector, T::Type) = T(val[1]) +reconstruct(d::UnivariateDistribution, val::Vector, T::Type) = begin + if length(val) == 1 + T(val[1]) + else + Vector{T}(val) + end +end reconstruct(d::MultivariateDistribution, val::Vector, T::Type) = Vector{T}(val) reconstruct(d::MatrixDistribution, val::Vector, T::Type) = Array{T, 2}(reshape(val, size(d)...)) diff --git a/test/sampler.jl/vec_assume.jl b/test/sampler.jl/vec_assume.jl index 9eefa06638..bb705b0950 100644 --- a/test/sampler.jl/vec_assume.jl +++ b/test/sampler.jl/vec_assume.jl @@ -6,10 +6,10 @@ setchunksize(100) # Test for vectorize UnivariateDistribution @model vdemo() = begin x = Vector{Real}(100) - # x ~ [Normal(0, sqrt(4))] - for i = 1:100 - x[i] ~ Normal(0, sqrt(4)) - end + x ~ [Normal(0, sqrt(4))] + # for i = 1:100 + # x[i] ~ Normal(0, sqrt(4)) + # end end alg = HMC(1000, 0.2, 4) From 78abffb6e21b5292bb7c87e9a1eff180a3d77427 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 17:48:31 +0100 Subject: [PATCH 53/55] Improve vec test --- test/sampler.jl/vec_assume.jl | 42 ++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/test/sampler.jl/vec_assume.jl b/test/sampler.jl/vec_assume.jl index bb705b0950..0fd6370f06 100644 --- a/test/sampler.jl/vec_assume.jl +++ b/test/sampler.jl/vec_assume.jl @@ -1,17 +1,39 @@ include("../utility.jl") -using Distributions, Turing +using Distributions, Turing, Base.Test + +N = 100 +setchunksize(N) +alg = HMC(1000, 0.2, 4) + +@model vdemo() = begin + x = Vector{Real}(N) + for i = 1:N + x[i] ~ Normal(0, sqrt(4)) + end +end + +t_loop = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 -setchunksize(100) # Test for vectorize UnivariateDistribution @model vdemo() = begin - x = Vector{Real}(100) - x ~ [Normal(0, sqrt(4))] - # for i = 1:100 - # x[i] ~ Normal(0, sqrt(4)) - # end + x = Vector{Real}(N) + x ~ [Normal(0, 2)] end -alg = HMC(1000, 0.2, 4) -res = sample(vdemo(), alg) -println(mean(mean(res[:x]))) +t_vec = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 + + +@model vdemo() = begin + x ~ MvNormal(zeros(N), 2 * ones(N)) +end + +t_mv = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 + +println("Time for") +println(" Loop : $t_loop") +println(" Vec : $t_vec") +println(" Mv : $t_mv") From 3cabb330a5c6c6d75718a4f67ba39aea8bf0e7c2 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 13 May 2017 18:52:09 +0100 Subject: [PATCH 54/55] Prepare test for vec Mv --- test/quick.jl | 6 ++---- test/sampler.jl/vec_assume_mv.jl | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 test/sampler.jl/vec_assume_mv.jl diff --git a/test/quick.jl b/test/quick.jl index d1ad32fa45..668192a06e 100644 --- a/test/quick.jl +++ b/test/quick.jl @@ -7,10 +7,8 @@ include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") @model nbmodel(K, V, M, N, z, w, doc, alpha, beta) = begin theta ~ Dirichlet(alpha) - phi = Array{Any}(K) - for k = 1:K - phi[k] ~ Dirichlet(beta) - end + phi = Vector{Vector{Real}}(K) + phi ~ [Dirichlet(beta)] log_theta = log(theta) Turing.acclogp!(vi, sum(log_theta[z[1:M]])) diff --git a/test/sampler.jl/vec_assume_mv.jl b/test/sampler.jl/vec_assume_mv.jl new file mode 100644 index 0000000000..4cd4893b7f --- /dev/null +++ b/test/sampler.jl/vec_assume_mv.jl @@ -0,0 +1,34 @@ +include("../utility.jl") +using Distributions, Turing, Base.Test + +N = 5 +beta = [0.5, 0.5] +setchunksize(N*length(beta)) +alg = HMC(1000, 0.2, 4) + +# Test for vectorize UnivariateDistribution +@model vdemo() = begin + phi = Vector{Vector{Real}}(N) + phi ~ [Dirichlet(beta)] +end + +t_vec = @elapsed res = sample(vdemo(), alg) + + + + +@model vdemo() = begin + phi = Vector{Vector{Real}}(N) + for i = 1:N + phi[i] ~ Dirichlet(beta) + end +end + +t_loop = @elapsed res = sample(vdemo(), alg) + + + + +println("Time for") +println(" Loop : $t_loop") +println(" Vec : $t_vec") From 3f8a1fd9a50351a3f5a75dbd61b0695a29301b9b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sun, 14 May 2017 17:32:38 +0100 Subject: [PATCH 55/55] Remove another copy() --- src/samplers/support/hmc_core.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 324b168f9e..4aa21650c3 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -28,7 +28,9 @@ leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, τ_valid = 0 for t in 1:τ - p_old = copy(p); θ_old = θ; old_logp = getlogp(vi) + # NOTE: we dont need copy here becase arr += another_arr + # doesn't change arr in-place + p_old = p; θ_old = (θ); old_logp = getlogp(vi) p -= ϵ * grad / 2 θ += ϵ * p # full step for state @@ -56,7 +58,7 @@ leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sam dprintln(2, "leapfrog stepping...") τ_valid = 0 for t in 1:τ # do 'leapfrog' for each var - p_old = copy(p); θ_old = vi[spl] + p_old = p; θ_old = vi[spl] p -= ϵ * grad / 2