diff --git a/.travis.yml b/.travis.yml index 779a1a6524..73947e79b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ matrix: env: GROUP=MOC - os: osx env: GROUP=SV - + allow_failures: - env: GROUP=Test os: osx diff --git a/benchmarks/benchmark.jl b/benchmarks/benchmark.jl index 56d4a52759..e6e343a3cb 100644 --- a/benchmarks/benchmark.jl +++ b/benchmarks/benchmark.jl @@ -6,14 +6,14 @@ using Stan CONFIG = Dict( "model-list" => [ "gdemo-geweke", - "MoC", + # "MoC", #"normal-loc", "normal-mixture", - "simplegauss", + "gdemo", "gauss", "bernoulli", #"negative-binomial", - "lda" + # "lda" ], "test-level" => 2 # 1 = model lang, 2 = whole interface diff --git a/benchmarks/bernoulli-stan.run.jl b/benchmarks/bernoulli-stan.run.jl index 74bd76c902..14eb1ccc92 100644 --- a/benchmarks/bernoulli-stan.run.jl +++ b/benchmarks/bernoulli-stan.run.jl @@ -4,7 +4,7 @@ using Stan include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.data.jl") -include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan..model.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.model.jl") stan_model_name = "bernoulli" berstan = Stanmodel(name=stan_model_name, model=berstanmodel, nchains=1); diff --git a/benchmarks/bernoulli.run.jl b/benchmarks/bernoulli.run.jl index 3776955a81..f88097ef3d 100644 --- a/benchmarks/bernoulli.run.jl +++ b/benchmarks/bernoulli.run.jl @@ -4,7 +4,7 @@ using Stan include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.data.jl") -include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli..model.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.model.jl") bench_res = tbenchmark("HMC(1000, 0.25, 5)", "bermodel", "berdata") logd = build_logd("Bernoulli Model", bench_res...) diff --git a/benchmarks/lda.run.jl b/benchmarks/lda.run.jl index 13b2b34080..02157b829f 100644 --- a/benchmarks/lda.run.jl +++ b/benchmarks/lda.run.jl @@ -6,12 +6,15 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/lda-stan.data.jl") include(Pkg.dir("Turing")*"/example-models/stan-models/lda.model.jl") -bench_res = tbenchmark("HMCDA(1000, 0.65, 1.5)", "ldamodel", "data=ldastandata[1]") -bench_res[4].names = ["phi[1]", "phi[2]"] -logd = build_logd("LDA", bench_res...) - include(Pkg.dir("Turing")*"/benchmarks/"*"lda-stan.run.jl") -logd["stan"] = lda_stan_d -logd["time_stan"] = lda_time -print_log(logd) +setchunksize(60) + +for alg in ["HMC(1000, 0.25, 6)", "HMCDA(1000, 0.65, 1.5)", "NUTS(2000, 1000, 0.65)"] + bench_res = tbenchmark(alg, "ldamodel", "data=ldastandata[1]") + bench_res[4].names = ["phi[1]", "phi[2]"] + logd = build_logd("LDA", bench_res...) + logd["stan"] = lda_stan_d + logd["time_stan"] = lda_time + print_log(logd) +end diff --git a/example-models/benchmarks/gauss.model.jl b/example-models/benchmarks/gauss.model.jl index 19e41180a1..de82e9483a 100644 --- a/example-models/benchmarks/gauss.model.jl +++ b/example-models/benchmarks/gauss.model.jl @@ -1,6 +1,6 @@ @model gaussmodel(x) = begin N = length(x) - lam = tzeros(Float64,N) + lam = tzeros(Real,N) mu ~ Normal(0, sqrt(1000)); for i = 1:N lam[i] ~ Gamma(1, 1) diff --git a/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb b/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb new file mode 100644 index 0000000000..34e27c8ca6 --- /dev/null +++ b/example-models/notebooks/.ipynb_checkpoints/BayesHmm-checkpoint.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "using Turing, Distributions\n", + "using PyPlot, PyCall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y = [ 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0 ];\n", + "N = length(y); K = 3;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "@model BayesHmm(y) = begin\n", + " s = tzeros(Int64, N)\n", + " m = tzeros(Real, K)\n", + " T = Array{Array}(K)\n", + " for i = 1:K\n", + " T[i] ~ Dirichlet(ones(K)/K)\n", + " # m[i] ~ Normal(1, 0.1) # Defining m this way causes label-switching problem.\n", + " m[i] ~ Normal(i, 0.01)\n", + " end\n", + " s[1] ~ Categorical(ones(Float64, K)/K)\n", + " for i = 2:N\n", + " s[i] ~ Categorical(vec(T[s[i-1]]))\n", + " y[i] ~ Normal(m[s[i]], 0.01)\n", + " end\n", + " return(s, m)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "g = Gibbs(300, HMC(1, 0.2, 5, :m, :T), PG(50, 1, :s))\n", + "c = sample(BayesHmm(y), g);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = c[:m][111];\n", + "s = c[:s][111];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernel_info": { + "name": "julia-0.5" + }, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/notebooks/BayesHmm2.ipynb b/example-models/notebooks/BayesHmm2.ipynb new file mode 100644 index 0000000000..afedbdaaee --- /dev/null +++ b/example-models/notebooks/BayesHmm2.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "using Turing, Distributions\n", + "using PyPlot, PyCall" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y = [ 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0 ];\n", + "N = length(y);\n", + "K = 3;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Assume - `T` is a parameter\n", + " Assume - `m` is a parameter\n", + " Assume - `s` is a parameter\n", + " Observe - `y` is an observation\n" + ] + }, + { + "data": { + "text/plain": [ + "BayesHmm (generic function with 4 methods)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@model BayesHmm(y) = begin\n", + " s = tzeros(Int64, N)\n", + " m = tzeros(Dual, K)\n", + " T = Array{Array}(K)\n", + " for i = 1:K\n", + " T[i] ~ Dirichlet(ones(K)/K)\n", + " # m[i] ~ Normal(1, 0.1) # Defining m this way causes label-switching problem.\n", + " m[i] ~ Normal(i, 0.01)\n", + " end\n", + " s[1] ~ Categorical(ones(Float64, K)/K)\n", + " for i = 2:N\n", + " s[i] ~ Categorical(vec(T[s[i-1]]))\n", + " y[i] ~ Normal(m[s[i]], 0.01)\n", + " end\n", + " return(s, m, T)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Turing.PG(50,1000,Turing.resampleSystematic,0.5,Set{Any}())" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g1 = Gibbs(300, HMC(2, 0.2, 5, :m, :T), PG(50, 5, :s))\n", + "g2 = PG(50, 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T[1] = ForwardDiff.Dual{0,Float64}[Dual(0.117584),Dual(0.744224),Dual(0.138192)], T[1] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[1] = Dual(0.9967711359430271), m[1] ~ Distributions.Normal{Float64}(μ=1.0, σ=0.01)\n", + "T[2] = ForwardDiff.Dual{0,Float64}[Dual(0.0064186),Dual(0.0065692),Dual(0.987012)], T[2] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[2] = Dual(2.007863731847725), m[2] ~ Distributions.Normal{Float64}(μ=2.0, σ=0.01)\n", + "T[3] = ForwardDiff.Dual{0,Float64}[Dual(0.713092),Dual(0.234245),Dual(0.0526634)], T[3] ~ Distributions.Dirichlet{Float64}(alpha=[0.333333,0.333333,0.333333])\n", + "m[3] = Dual(2.9919946593762465), m[3] ~ Distributions.Normal{Float64}(μ=3.0, σ=0.01)\n", + "s[1] = Dual(3), s[1] ~ Distributions.Categorical{Float64}(K=3, p=[0.333333,0.333333,0.333333])\n" + ] + } + ], + "source": [ + "s1 = @sample(BayesHmm(y), g1);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(s1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = s1[:m][111];\n", + "s = s1[:s][111];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "s2 = @sample(BayesHmm(y), g2);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "describe(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = s2[:m][150];\n", + "s = s2[:s][150];\n", + "PyPlot.plot(y, linestyle=\"None\", marker=\"+\", color = \"r\")\n", + "PyPlot.plot(m[s], linestyle=\"-\", marker=\"*\", color = \"b\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/notebooks/gdemo-geweke.ipynb b/example-models/notebooks/gdemo-geweke.ipynb new file mode 100644 index 0000000000..2566beb7a3 --- /dev/null +++ b/example-models/notebooks/gdemo-geweke.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "using Distributions, Turing\n", + "using Gadfly" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Assume - `s` is a parameter\n", + " Assume - `m` is a parameter\n", + " Assume - `y` is a parameter\n", + " Observe - `x` is an observation\n" + ] + }, + { + "data": { + "text/plain": [ + "gdemo2 (generic function with 4 methods)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@model gdemo2(x, bkstep) = begin\n", + " y = similar(x)\n", + " if bkstep == false \n", + " # Forward Sample\n", + " s ~ InverseGamma(2,3)\n", + " m ~ Normal(0,sqrt(s))\n", + " y[1] ~ Normal(m, sqrt(s))\n", + " y[2] ~ Normal(m, sqrt(s))\n", + " elseif bkstep == true \n", + " # Backward Step 1: theta ~ theta | x\n", + " s ~ InverseGamma(2,3)\n", + " m ~ Normal(0,sqrt(s))\n", + " x[1] ~ Normal(m, sqrt(s))\n", + " x[2] ~ Normal(m, sqrt(s))\n", + " # Backward Step 2: x ~ x | theta\n", + " y[1] ~ Normal(m, sqrt(s))\n", + " y[2] ~ Normal(m, sqrt(s))\n", + " end\n", + " return s, m, y\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fw = PG(20, 300)\n", + "bk = Gibbs(10, PG(10,10, :s, :y), HMC(1, 0.25, 5, :m));" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[PG]: Finshed within 2.7866458892822266 seconds\n" + ] + } + ], + "source": [ + "s = @sample(gdemo2([1.5, 2], false), fw);" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iterations = 1:300\n", + "Thinning interval = 1\n", + "Chains = 1\n", + "Samples per chain = 300\n", + "\n", + "Empirical Posterior Estimates:\n", + " Mean SD Naive SE MCSE ESS\n", + " m 0.186876338 1.5828736 0.09138725 0.031917705 300\n", + "y[1] 0.021829037 2.4501294 0.14145829 0.080212894 300\n", + "y[2] 0.295076622 2.4325274 0.14044203 0.103072231 300\n", + " s 3.253114980 5.5768925 0.32198204 0.073906481 300\n", + "\n", + "Quantiles:\n", + " 2.5% 25.0% 50.0% 75.0% 97.5% \n", + " m -2.74503914 -0.76699002 0.118378945 1.0859695 3.4705196\n", + "y[1] -4.06774709 -1.35406974 0.044656838 1.2878395 4.0238171\n", + "y[2] -3.71981601 -0.95970933 0.145579527 1.4402614 5.3891412\n", + " s 0.48593145 1.08023792 1.781509454 3.3918546 13.4523979\n", + "\n" + ] + } + ], + "source": [ + "describe(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Gibbs]: Finshed within 4.269287109375 seconds\n", + "[Gibbs]: Finshed within 0.07929396629333496 seconds\n", + "[Gibbs]: Finshed within 0.0547490119934082 seconds\n", + "[Gibbs]: Finshed within 0.06913399696350098 seconds\n", + "[Gibbs]: Finshed within 0.06143689155578613 seconds\n", + "[Gibbs]: Finshed within 0.09006118774414062 seconds\n" + ] + } + ], + "source": [ + "N = 100\n", + "x = [s[:y][1]...]\n", + "s_bk = Array{Turing.Chain}(N)\n", + "\n", + "for i = 1:N\n", + " s_bk[i] = @sample(gdemo2(x, true), bk);\n", + " x = [s_bk[i][:y][1]...];\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "s2 = vcat(s_bk...);\n", + "describe(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = plot(s);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hstack(p[1:2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "hstack(p[3:4])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "heideldiag(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import Turing.@sample\n", + "\n", + "macro sample(modelcall, alg, optionalps...)\n", + " modelf = modelcall.args[1] # get the function symbol for model\n", + " psyms = modelcall.args[2:end] # get the (passed-in) symbols\n", + " esc(quote\n", + " data_ = Dict()\n", + " arglist_ = Turing.TURING[:modelarglist]\n", + " localsyms_ = $psyms\n", + " for i_ = 1:length(arglist_)\n", + " localsym_ = localsyms_[i_] # passed-in symbols are local\n", + " arg_ = arglist_[i_] # arglist are symbols in model scope\n", + " data_[arg_] = eval(localsym_)\n", + " end\n", + " sample($modelf, data_, $alg, $optionalps...)\n", + " end)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Julia 0.5.0", + "language": "julia", + "name": "julia-0.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example-models/stan-models/MoC.model.jl b/example-models/stan-models/MoC.model.jl index 25437102b6..63ec620769 100644 --- a/example-models/stan-models/MoC.model.jl +++ b/example-models/stan-models/MoC.model.jl @@ -6,11 +6,18 @@ for k = 1:K phi[k] ~ Dirichlet(beta) end - for m = 1:M - z[m] ~ Categorical(theta) - end + #for m = 1:M + # z[m] ~ Categorical(theta) + #end + + log_theta = log(theta) + Turing.acclogp!(vi, sum(log_theta[z[1:M]])) + + log_phi = map(x->log(x), phi) for n = 1:N - w[n] ~ Categorical(phi[z[doc[n]]]) + # w[n] ~ Categorical(phi[z[doc[n]]]) + Turing.acclogp!(vi, log_phi[z[doc[n]]][w[n]]) end + phi end diff --git a/example-models/stan-models/lda.model.jl b/example-models/stan-models/lda.model.jl index 66be471b9a..d32486f81c 100644 --- a/example-models/stan-models/lda.model.jl +++ b/example-models/stan-models/lda.model.jl @@ -43,9 +43,11 @@ # z[n] ~ Categorical(theta[doc[n]]) # end + phi_dot_theta = [log([dot(map(p -> p[i], phi), theta[m]) for i = 1:V]) for m=1:M] for n = 1:N - phi_dot_theta = [dot(map(p -> p[i], phi), theta[doc[n]]) for i = 1:V] - w[n] ~ Categorical(phi_dot_theta) + # phi_dot_theta = [dot(map(p -> p[i], phi), theta[doc[n]]) for i = 1:V] + # w[n] ~ Categorical(phi_dot_theta) + Turing.acclogp!(vi, phi_dot_theta[doc[n]][w[n]]) end end diff --git a/src/core/ad.jl b/src/core/ad.jl index 88b8aa55e4..4c76fe72c4 100644 --- a/src/core/ad.jl +++ b/src/core/ad.jl @@ -10,14 +10,14 @@ grad = gradient(vi, model, spl) end ``` """ -gradient(_vi::VarInfo, model::Function) = gradient(_vi, model, nothing) +gradient(vi::VarInfo, model::Function) = gradient2(vi, model, nothing) gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin vi = deepcopy(_vi) f(x::Vector) = begin vi[spl] = x - -runmodel(model, vi, spl).logp + -getlogp(runmodel(model, vi, spl)) end g = x -> ForwardDiff.gradient(f, x::Vector, @@ -26,58 +26,70 @@ gradient(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin g(vi[spl]) end -gradient2(_vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin +gradient2(vi::VarInfo, model::Function, spl::Union{Void, Sampler}) = begin + + θ = realpart(vi[spl]) + if spl != nothing && haskey(spl.info, :grad_cache) + if haskey(spl.info[:grad_cache], θ) + return spl.info[:grad_cache][θ] + end + end + # Initialisation - vi = deepcopy(_vi); grad = Vector{Float64}() + grad = Vector{Float64}() # Split keys(vi) into chunks, dprintln(4, "making chunks...") - prior_key_chunks = []; key_chunk = []; prior_dim = 0 + vn_chunks = []; vn_chunk = []; chunk_dim = 0 - gkeys = getvns(vi, spl) - for k in gkeys + vns_all = getvns(vi, spl) + for k in vns_all l = length(getrange(vi, k)) # dimension for the current variable - if prior_dim + l > CHUNKSIZE - push!(prior_key_chunks, # store the previous chunk - (key_chunk, prior_dim)) - key_chunk = [] # initialise a new chunk - prior_dim = 0 # reset dimension counter + if chunk_dim + l > CHUNKSIZE + push!(vn_chunks, # store the previous chunk + (vn_chunk, chunk_dim)) + vn_chunk = [] # initialise a new chunk + chunk_dim = 0 # reset dimension counter end - push!(key_chunk, k) # put the current variable into the current chunk - prior_dim += l # update dimension counter + push!(vn_chunk, k) # put the current variable into the current chunk + chunk_dim += l # update dimension counter end - push!(prior_key_chunks, # push the last chunk - (key_chunk, prior_dim)) + push!(vn_chunks, # push the last chunk + (vn_chunk, chunk_dim)) # Chunk-wise forward AD - for (key_chunk, prior_dim) in prior_key_chunks - expand!(vi) # NOTE: we don't have to call last in the end - # because we don't return the amended VarInfo - # Set dual part correspondingly + for (vn_chunk, chunk_dim) in vn_chunks + # 1. Set dual part correspondingly dprintln(4, "set dual...") - dps = zeros(prior_dim) - prior_count = 1 - for k in gkeys + dps = zeros(chunk_dim) + + dim_count = 1 + for k in vns_all l = length(getrange(vi, k)) reals = realpart(getval(vi, k)) range = getrange(vi, k) - if k in key_chunk # for each variable to compute gradient in this round + if k in vn_chunk # for each variable to compute gradient in this round dprintln(5, "making dual...") for i = 1:l - dps[prior_count] = 1 # set dual part + dps[dim_count] = 1 # set dual part vi[range[i]] = ForwardDiff.Dual(reals[i], dps...) - dps[prior_count] = 0 # reset dual part - prior_count += 1 # count + dps[dim_count] = 0 # reset dual part + dim_count += 1 # count end dprintln(5, "make dual done") else # for other varilables (no gradient in this round) - vi[range] = map(r -> Dual{prior_dim, Float64}(r), reals) + vi[range] = map(r -> Dual{chunk_dim, Float64}(r), reals) end end + + # 2. Run model and collect gradient vi = runmodel(model, vi, spl) - # Collect gradient dprintln(4, "collect gradients from logp...") - append!(grad, collect(dualpart(-vi.logp))) + append!(grad, collect(dualpart(-getlogp(vi)))) + end + + if spl != nothing && haskey(spl.info, :grad_cache) + spl.info[:grad_cache][θ] = grad end grad diff --git a/src/core/compiler.jl b/src/core/compiler.jl index 063dbcf05c..95ea7be869 100644 --- a/src/core/compiler.jl +++ b/src/core/compiler.jl @@ -58,12 +58,22 @@ macro ~(left, right) csym_str = string(Turing._compiler_[:fname])*"_var"* string(@__LINE__) sym = Symbol($(string(left))) vn = Turing.VarName(vi, Symbol(csym_str), sym, "") - $(left) = Turing.assume( - sampler, - $(right), # dist - vn, # VarName - vi # VarInfo - ) + if isa($(right), Vector) + $(left) = Turing.assume( + sampler, + $(right), # dist + vn, # VarName + $(left), + vi # VarInfo + ) + else + $(left) = Turing.assume( + sampler, + $(right), # dist + vn, # VarName + vi # VarInfo + ) + end end else # Indexing diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl index d84d887a17..fea5212318 100644 --- a/src/core/varinfo.jl +++ b/src/core/varinfo.jl @@ -27,6 +27,8 @@ sym(vn::VarName) = Symbol("$(vn.sym)$(vn.indexing)") # simplified symbol cuid(vn::VarName) = (vn.csym, vn.sym, vn.indexing) # the uid which is only available at compile time +copybyindex(vn::VarName, indexing::String) = VarName(vn.csym, vn.sym, indexing, vn.counter) + ########### # VarInfo # ########### @@ -37,24 +39,30 @@ type VarInfo ranges :: Vector{UnitRange{Int}} vals :: Vector{Vector{Real}} dists :: Vector{Distribution} - gids :: Vector{Int} # group ids - trans :: Vector{Bool} - logp :: Real # NOTE: logp should be a vector. + gids :: Vector{Int} + trans :: Vector{Vector{Bool}} + logp :: Vector{Real} logw :: Real # NOTE: importance weight when sampling from the prior. index :: Int # index of current randomness num_produce :: Int # num of produce calls from trace, each produce corresponds to an observe. - VarInfo() = new( - Dict{VarName, Int}(), - Vector{VarName}(), - Vector{UnitRange{Int}}(), - Vector{Vector{Real}}(), - Vector{Distribution}(), - Vector{Int}(), - Vector{Bool}(), - 0.0,0.0, - 0, - 0 - ) + VarInfo() = begin + vals = Vector{Vector{Real}}(); push!(vals, Vector{Real}()) + trans = Vector{Vector{Real}}(); push!(trans, Vector{Real}()) + logp = Vector{Real}(); push!(logp, zero(Real)) + + new( + Dict{VarName, Int}(), + Vector{VarName}(), + Vector{UnitRange{Int}}(), + vals, + Vector{Distribution}(), + Vector{Int}(), + trans, logp, + zero(Real), + 0, + 0 + ) + end end typealias VarView Union{Int,UnitRange,Vector{Int},Vector{UnitRange}} @@ -62,18 +70,20 @@ typealias VarView Union{Int,UnitRange,Vector{Int},Vector{UnitRange}} getidx(vi::VarInfo, vn::VarName) = vi.idcs[vn] getrange(vi::VarInfo, vn::VarName) = vi.ranges[getidx(vi, vn)] +getranges(vi::VarInfo, vns::Vector{VarName}) = union(map(vn -> getrange(vi, vn), vns)...) getval(vi::VarInfo, vn::VarName) = vi.vals[end][getrange(vi, vn)] setval!(vi::VarInfo, val, vn::VarName) = vi.vals[end][getrange(vi, vn)] = val +getval(vi::VarInfo, vns::Vector{VarName}) = vi.vals[end][getranges(vi, vns)] + getval(vi::VarInfo, view::VarView) = vi.vals[end][view] setval!(vi::VarInfo, val::Any, view::VarView) = vi.vals[end][view] = val -setval!(vi::VarInfo, val::Any, view::Vector{UnitRange}) = map(v->vi.vals[end][v] = val, view) +setval!(vi::VarInfo, val::Any, view::Vector{UnitRange}) = map(v -> vi.vals[end][v] = val, view) getall(vi::VarInfo) = vi.vals[end] setall!(vi::VarInfo, val::Any) = vi.vals[end] = val - getsym(vi::VarInfo, vn::VarName) = vi.vns[getidx(vi, vn)].sym getdist(vi::VarInfo, vn::VarName) = vi.dists[getidx(vi, vn)] @@ -82,32 +92,33 @@ getgid(vi::VarInfo, vn::VarName) = vi.gids[getidx(vi, vn)] setgid!(vi::VarInfo, gid::Int, vn::VarName) = vi.gids[getidx(vi, vn)] = gid -istransformed(vi::VarInfo, vn::VarName) = vi.trans[getidx(vi, vn)] +istrans(vi::VarInfo, vn::VarName) = vi.trans[end][getidx(vi, vn)] +settrans!(vi::VarInfo, trans::Bool, vn::VarName) = vi.trans[end][getidx(vi, vn)] = trans + +getlogp(vi::VarInfo) = vi.logp[end] +setlogp!(vi::VarInfo, logp::Real) = vi.logp[end] = logp +acclogp!(vi::VarInfo, logp::Real) = vi.logp[end] += logp isempty(vi::VarInfo) = isempty(vi.idcs) # X -> R for all variables associated with given sampler -function link(_vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) - gvns = getvns(vi, spl) - for vn in gvns +link!(vi::VarInfo, spl::Sampler) = begin + for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, link(dist, reconstruct(dist, getval(vi, vn)))), vn) - vi.trans[getidx(vi, vn)] = true + settrans!(vi, true, vn) end - vi + setlogp!(vi, zero(Real)) end # R -> X for all variables associated with given sampler -function invlink(_vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) - gvns = getvns(vi, spl) - for vn in gvns +invlink!(vi::VarInfo, spl::Sampler) = begin + for vn in getvns(vi, spl) dist = getdist(vi, vn) setval!(vi, vectorize(dist, invlink(dist, reconstruct(dist, getval(vi, vn)))), vn) - vi.trans[getidx(vi, vn)] = false + settrans!(vi, false, vn) end - vi + setlogp!(vi, zero(Real)) end function cleandual!(vi::VarInfo) @@ -115,7 +126,7 @@ function cleandual!(vi::VarInfo) range = getrange(vi, vn) vi[range] = realpart(vi[range]) end - vi.logp = realpart(vi.logp) + setlogp!(vi, realpart(getlogp(vi))) vi.logw = realpart(vi.logw) end @@ -128,13 +139,15 @@ Base.getindex(vi::VarInfo, vn::VarName) = begin @assert haskey(vi, vn) "[Turing] attempted to replay unexisting variables in VarInfo" dist = getdist(vi, vn) r = reconstruct(dist, getval(vi, vn)) - r = istransformed(vi, vn) ? invlink(dist, r) : r + r = istrans(vi, vn) ? invlink(dist, r) : r end -# Base.setindex!(vi::VarInfo, r, vn::VarName) = begin -# dist = getdist(vi, vn) -# setval!(vi, vectorize(dist, r), vn) -# end +Base.getindex(vi::VarInfo, vns::Vector{VarName}) = begin + @assert haskey(vi, vns[1]) "[Turing] attempted to replay unexisting variables in VarInfo" + dist = getdist(vi, vns[1]) + rs = reconstruct(dist, getval(vi, vns)) + rs = istrans(vi, vns[1]) ? invlink(dist, rs) : rs +end # NOTE: vi[view] will just return what insdie vi (no transformations applied) Base.getindex(vi::VarInfo, view::VarView) = getval(vi, view) @@ -174,8 +187,6 @@ push!(vi::VarInfo, vn::VarName, r::Any, dist::Distribution, gid::Int) = begin @assert ~(vn in vns(vi)) "[push!] attempt to add an exisitng variable $(sym(vn)) ($(vn)) to VarInfo (keys=$(keys(vi))) with dist=$dist, gid=$gid" - if isempty(vi.vals) push!(vi.vals, Vector{Real}()) end - val = vectorize(dist, r) vi.idcs[vn] = length(vi.idcs) + 1 @@ -185,7 +196,7 @@ push!(vi::VarInfo, vn::VarName, r::Any, dist::Distribution, gid::Int) = begin append!(vi.vals[end], val) push!(vi.dists, dist) push!(vi.gids, gid) - push!(vi.trans, false) + push!(vi.trans[end], false) vi end @@ -200,11 +211,22 @@ end # Utility functions for VarInfo # ################################# -expand!(vi::VarInfo) = push!(vi.vals, deepcopy(vi.vals[end])) -last(_vi::VarInfo) = begin - vi = deepcopy(_vi) - splice!(vi.vals, 1:length(vi.vals)-1) - vi +expand!(vi::VarInfo) = begin + push!(vi.vals, realpart(vi.vals[end])); vi.vals[end], vi.vals[end-1] = vi.vals[end-1], vi.vals[end] + push!(vi.trans, deepcopy(vi.trans[end])) + push!(vi.logp, zero(Real)) +end + +shrink!(vi::VarInfo) = begin + pop!(vi.vals) + pop!(vi.trans) + pop!(vi.logp) +end + +last!(vi::VarInfo) = begin + vi.vals = vi.vals[end:end] + vi.trans = vi.trans[end:end] + vi.logp = vi.logp[end:end] end # Get all indices of variables belonging to gid or 0 @@ -214,7 +236,6 @@ getidcs(vi::VarInfo, spl::Sampler) = begin # NOTE: 0b00 is the sanity flag for # |\____ getidcs (mask = 0b10) # \_____ getranges (mask = 0b01) - # TODO: set these as constants if ~haskey(spl.info, :cache_updated) spl.info[:cache_updated] = CACHERESET end if haskey(spl.info, :idcs) && (spl.info[:cache_updated] & CACHEIDCS) > 0 spl.info[:idcs] diff --git a/src/samplers/gibbs.jl b/src/samplers/gibbs.jl index 4d6e83b247..e726a95449 100644 --- a/src/samplers/gibbs.jl +++ b/src/samplers/gibbs.jl @@ -75,6 +75,10 @@ function sample(model::Function, alg::Gibbs) dprintln(2, "$(typeof(local_spl)) stepping...") if isa(local_spl.alg, GibbsComponent) + if isa(local_spl.alg, Hamiltonian) # clean cache + local_spl.info[:grad_cache] = Dict{Vector,Vector}() + end + for _ = 1:local_spl.alg.n_iters dprintln(2, "recording old θ...") varInfo = step(model, local_spl, varInfo, i==1) diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index 3ba5684248..d096b7e490 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -50,6 +50,9 @@ end Sampler(alg::Hamiltonian) = begin info=Dict{Symbol, Any}() info[:accept_his] = [] + info[:lf_num] = 0 + info[:eval_num] = 0 + info[:grad_cache] = Dict{Vector,Vector}() Sampler(alg, info) end @@ -58,29 +61,36 @@ sample(model::Function, alg::Hamiltonian) = sample(model, alg, CHUNKSIZE) # NOTE: in the previous code, `sample` would call `run`; this is # now simplified: `sample` and `run` are merged into one function. function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) - global CHUNKSIZE = chunk_size; + default_chunk_size = CHUNKSIZE + global CHUNKSIZE = chunk_size + spl = Sampler(alg); alg_str = isa(alg, HMC) ? "HMC" : isa(alg, HMCDA) ? "HMCDA" : isa(alg, NUTS) ? "NUTS" : "Hamiltonian" # Initialization + time_total = zero(Float64) n = spl.alg.n_iters samples = Array{Sample}(n) weight = 1 / n for i = 1:n samples[i] = Sample(weight, Dict{Symbol, Any}()) end - accept_num = 0 # record the accept number vi = model() - if spl.alg.gid == 0 vi = link(vi, spl) end + if spl.alg.gid == 0 + link!(vi, spl) + runmodel(model, vi, spl) + end # HMC steps spl.info[:progress] = ProgressMeter.Progress(n, 1, "[$alg_str] Sampling...", 0) for i = 1:n dprintln(2, "$alg_str stepping...") - vi = step(model, spl, vi, i==1) + + time_total += @elapsed vi = step(model, spl, vi, i==1) + if spl.info[:accept_his][end] # accepted => store the new predcits samples[i].value = Sample(vi).value else # rejected => store the previous predcits @@ -89,23 +99,44 @@ function sample{T<:Hamiltonian}(model::Function, alg::T, chunk_size::Int) ProgressMeter.next!(spl.info[:progress]) end - if ~isa(alg, NUTS) # cccept rate for NUTS is meaningless - so no printing - accept_rate = accept_num / n # calculate the accept rate - println("[$alg_str] Done with accept rate = $accept_rate.") + println("[$alg_str] Finished with") + println(" Running time = $time_total;") + if ~isa(alg, NUTS) # accept rate for NUTS is meaningless - so no printing + accept_rate = sum(spl.info[:accept_his]) / n # calculate the accept rate + println(" Accept rate = $accept_rate;") end + println(" #lf / sample = $(spl.info[:lf_num] / n);") + println(" #evals / sample = $(spl.info[:eval_num] / n).") + + global CHUNKSIZE = default_chunk_size Chain(0, samples) # wrap the result by Chain end -function assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName, vi::VarInfo) +assume{T<:Hamiltonian}(spl::Sampler{T}, dist::Distribution, vn::VarName, vi::VarInfo) = begin dprintln(2, "assuming...") updategid!(vi, vn, spl) r = vi[vn] - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end -function observe{T<:Hamiltonian}(spl::Sampler{T}, d::Distribution, value::Any, vi::VarInfo) - dprintln(2, "observing...") - vi.logp += logpdf(d, map(x -> Dual(x), value)) +assume{A<:Hamiltonian,D<:Distribution}(spl::Sampler{A}, dists::Vector{D}, vn::VarName, variable::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + dist = dists[1] + n = size(variable)[end] + + vns = map(i -> copybyindex(vn, "[$i]"), 1:n) + + rs = vi[vns] + + acclogp!(vi, sum(logpdf(dist, rs, istrans(vi, vns[1])))) + + rs end + +observe{A<:Hamiltonian}(spl::Sampler{A}, d::Distribution, value::Any, vi::VarInfo) = + observe(nothing, d, value, vi) + +observe{A<:Hamiltonian,D<:Distribution}(spl::Sampler{A}, ds::Vector{D}, value::Any, vi::VarInfo) = + observe(nothing, ds, value, vi) diff --git a/src/samplers/hmcda.jl b/src/samplers/hmcda.jl index 1fe08440af..46cb966b60 100644 --- a/src/samplers/hmcda.jl +++ b/src/samplers/hmcda.jl @@ -49,16 +49,10 @@ end function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) if is_first - vi_0 = deepcopy(vi) - if spl.alg.delta > 0 - - dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end - - # Heuristically find optimal ϵ - ϵ = find_good_eps(model, spl, vi) - + if spl.alg.gid != 0 link!(vi, spl) end # X -> R + ϵ = find_good_eps(model, vi, spl) # heuristically find optimal ϵ + if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X else ϵ = spl.info[:ϵ][end] end @@ -71,11 +65,8 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_0 + vi else - dprintln(2, "recording old θ...") - old_vi = deepcopy(vi) - # Set parameters δ = spl.alg.delta λ = spl.alg.lambda @@ -85,36 +76,38 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) μ, γ, t_0, κ = spl.info[:μ], 0.05, 10, 0.75 ϵ_bar, H_bar = spl.info[:ϵ_bar], spl.info[:H_bar] + dprintln(3, "X-> R...") + if spl.alg.gid != 0 + link!(vi, spl) + runmodel(model, vi, spl) + end + dprintln(2, "sampling momentum...") p = sample_momentum(vi, spl) - dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end - - dprintln(2, "recording old H...") - oldH = find_H(p, model, vi, spl) + dprintln(2, "recording old values...") + old_θ = vi[spl]; old_logp = getlogp(vi) + old_H = find_H(p, model, vi, spl) τ = max(1, round(Int, λ / ϵ)) dprintln(2, "leapfrog for $τ steps with step size $ϵ") - vi, p, τ_valid = leapfrog(vi, p, τ, ϵ, model, spl) + θ, p, τ_valid = leapfrog2(old_θ, p, τ, ϵ, model, vi, spl) dprintln(2, "computing new H...") H = τ_valid == 0 ? Inf : find_H(p, model, vi, spl) - dprintln(2, "computing ΔH...") - ΔH = H - oldH - - α = min(1, exp(-ΔH)) # MH accept rate + dprintln(2, "computing accept rate α...") + α = min(1, exp(-(H - old_H))) haskey(spl.info, :progress) && ProgressMeter.update!( spl.info[:progress], spl.info[:progress].counter; showvalues = [(:ϵ, ϵ), (:α, α)] ) - # Use Dual Averaging to adapt ϵ + dprintln(2, "adapting step size ϵ...") m = spl.info[:m] += 1 if m < spl.alg.n_adapt - dprintln(1, " ϵ = $ϵ, α = $α, exp(-ΔH)=$(exp(-ΔH))") + dprintln(1, " ϵ = $ϵ, α = $α") H_bar = (1 - 1 / (m + t_0)) * H_bar + 1 / (m + t_0) * (δ - α) ϵ = exp(μ - sqrt(m) / γ * H_bar) ϵ_bar = exp(m^(-κ) * log(ϵ) + (1 - m^(-κ)) * log(ϵ_bar)) @@ -125,16 +118,18 @@ function step(model, spl::Sampler{HMCDA}, vi::VarInfo, is_first::Bool) push!(spl.info[:ϵ], spl.info[:ϵ_bar]) end - dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl); cleandual!(vi) end - dprintln(2, "decide wether to accept...") - if rand() < α # accepted + if rand() < α # accepted push!(spl.info[:accept_his], true) - vi - else # rejected + else # rejected push!(spl.info[:accept_his], false) - old_vi + vi[spl] = old_θ # reset Θ + setlogp!(vi, old_logp) # reset logp end + + dprintln(3, "R -> X...") + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end + + vi end end diff --git a/src/samplers/is.jl b/src/samplers/is.jl index 4a1dadd3da..fe2337582a 100644 --- a/src/samplers/is.jl +++ b/src/samplers/is.jl @@ -57,4 +57,4 @@ assume(spl::Sampler{IS}, dist::Distribution, vn::VarName, vi::VarInfo) = begin end observe(spl::Sampler{IS}, dist::Distribution, value::Any, vi::VarInfo) = - vi.logp += logpdf(dist, value) + acclogp!(vi, logpdf(dist, value)) diff --git a/src/samplers/nuts.jl b/src/samplers/nuts.jl index d6ff9c0ae4..769c4f441d 100644 --- a/src/samplers/nuts.jl +++ b/src/samplers/nuts.jl @@ -32,7 +32,7 @@ immutable NUTS <: InferenceAlgorithm gid :: Int # group ID NUTS(n_adapt::Int, delta::Float64, space...) = - new(1, isa(space, Symbol) ? Set([space]) : Set(space), delta, Set(), 0) + new(1, n_adapt, delta, isa(space, Symbol) ? Set([space]) : Set(space), 0) NUTS(n_iters::Int, n_adapt::Int, delta::Float64, space...) = new(n_iters, n_adapt, delta, isa(space, Symbol) ? Set([space]) : Set(space), 0) NUTS(n_iters::Int, delta::Float64) = begin @@ -44,17 +44,9 @@ end function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) if is_first - vi_0 = deepcopy(vi) - - dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end - - # Heuristically find optimal ϵ - # ϵ_bar, ϵ = find_good_eps(model, spl, vi) - ϵ = find_good_eps(model, spl, vi) - - dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl) end + if spl.alg.gid != 0 link!(vi, spl) end # X -> R + ϵ = find_good_eps(model, vi, spl) # heuristically find optimal ϵ + if spl.alg.gid != 0 invlink!(vi, spl) end # R -> X spl.info[:ϵ] = ϵ spl.info[:μ] = log(10 * ϵ) @@ -65,37 +57,43 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_0 + vi else # Set parameters δ = spl.alg.delta ϵ = spl.info[:ϵ] + dprintln(2, "current ϵ: $ϵ") μ, γ, t_0, κ = spl.info[:μ], 0.05, 10, 0.75 ϵ_bar, H_bar = spl.info[:ϵ_bar], spl.info[:H_bar] + dprintln(3, "X -> R...") + if spl.alg.gid != 0 + link!(vi, spl) + runmodel(model, vi, spl) + end + dprintln(2, "sampling momentum...") p = sample_momentum(vi, spl) - dprintln(3, "X -> R...") - if spl.alg.gid != 0 vi = link(vi, spl) end - dprintln(2, "recording old H...") H0 = find_H(p, model, vi, spl) dprintln(3, "sample slice variable u") logu = log(rand()) + (-H0) - θm, θp, rm, rp, j, vi_new, n, s = deepcopy(vi), deepcopy(vi), deepcopy(p), deepcopy(p), 0, deepcopy(vi), 1, 1 + θ = vi[spl] + logp = getlogp(vi) + θm, θp, rm, rp, j, n, s = θ, θ, p, p, 0, 1, 1 local α, n_α - while s == 1 && j <= 2 + while s == 1 && j <= 6 v_j = rand([-1, 1]) # Note: this variable actually does not depend on j; # it is set as `v_j` just to be consistent to the paper if v_j == -1 - θm, rm, _, _, θ′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl) + θm, rm, _, _, θ′, logp′, n′, s′, α, n_α = build_tree(θm, rm, logu, v_j, j, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl) + _, _, θp, rp, θ′, logp′, n′, s′, α, n_α = build_tree(θp, rp, logu, v_j, j, ϵ, H0, model, spl, vi) end haskey(spl.info, :progress) && ProgressMeter.update!(spl.info[:progress], @@ -103,17 +101,15 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) showvalues = [(:ϵ, ϵ), (:tree_depth, j)]) if s′ == 1 && rand() < min(1, n′ / n) - vi_new = deepcopy(θ′) + θ = θ′ + logp = logp′ end n = n + n′ - s = s′ * (dot(θp[spl] - θm[spl], rm) >= 0 ? 1 : 0) * (dot(θp[spl] - θm[spl], rp) >= 0 ? 1 : 0) + s = s′ * (dot(θp - θm, rm) >= 0 ? 1 : 0) * (dot(θp - θm, rp) >= 0 ? 1 : 0) j = j + 1 end - dprintln(3, "R -> X...") - if spl.alg.gid != 0 vi = invlink(vi, spl); cleandual!(vi) end - # Use Dual Averaging to adapt ϵ m = spl.info[:m] += 1 if m < spl.alg.n_adapt @@ -129,11 +125,18 @@ function step(model::Function, spl::Sampler{NUTS}, vi::VarInfo, is_first::Bool) push!(spl.info[:accept_his], true) - vi_new + vi[spl] = θ + setlogp!(vi, logp) + + dprintln(3, "R -> X...") + if spl.alg.gid != 0 invlink!(vi, spl); cleandual!(vi) end + + vi end end -function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, model::Function, spl::Sampler) +function build_tree(θ::Vector, r::Vector, logu::Float64, v::Int, j::Int, ϵ::Float64, H0::Float64, + model::Function, spl::Sampler, vi::VarInfo) doc""" - θ : model parameter - r : momentum variable @@ -145,31 +148,34 @@ function build_tree(θ::VarInfo, r::Vector, logu::Float64, v::Int, j::Int, ϵ::F """ if j == 0 # Base case - take one leapfrog step in the direction v. - θ′, r′, τ_valid = leapfrog(θ, r, 1, v * ϵ, model, spl) + θ′, r′, τ_valid = leapfrog2(θ, r, 1, v * ϵ, model, vi, spl) # Use old H to save computation - H′ = τ_valid == 0 ? Inf : find_H(r′, model, θ′, spl) + H′ = τ_valid == 0 ? Inf : find_H(r′, model, vi, spl) n′ = (logu <= -H′) ? 1 : 0 s′ = (logu < Δ_max + -H′) ? 1 : 0 α′ = exp(min(0, -H′ - (-H0))) - θ′, r′, deepcopy(θ′), deepcopy(r′), deepcopy(θ′), n′, s′, α′, 1 + θ′, r′, θ′, r′, θ′, getlogp(vi), n′, s′, α′, 1 else # Recursion - build the left and right subtrees. - θm, rm, θp, rp, θ′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl) + θm, rm, θp, rp, θ′, logp′, n′, s′, α′, n′_α = build_tree(θ, r, logu, v, j - 1, ϵ, H0, model, spl, vi) if s′ == 1 if v == -1 - θm, rm, _, _, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl) + θm, rm, _, _, θ′′, logp′′, n′′, s′′, α′′, n′′_α = build_tree(θm, rm, logu, v, j - 1, ϵ, H0, model, spl, vi) else - _, _, θp, rp, θ′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl) + _, _, θp, rp, θ′′, logp′′, n′′, s′′, α′′, n′′_α = build_tree(θp, rp, logu, v, j - 1, ϵ, H0, model, spl, vi) + end + if rand() < n′′ / (n′ + n′′) + θ′ = θ′′ + logp′ = logp′′ end - if rand() < n′′ / (n′ + n′′) θ′ = deepcopy(θ′′) end α′ = α′ + α′′ n′_α = n′_α + n′′_α - s′ = s′′ * (dot(θp[spl] - θm[spl], rm) >= 0 ? 1 : 0) * (dot(θp[spl] - θm[spl], rp) >= 0 ? 1 : 0) + s′ = s′′ * (dot(θp - θm, rm) >= 0 ? 1 : 0) * (dot(θp - θm, rp) >= 0 ? 1 : 0) n′ = n′ + n′′ end - θm, rm, θp, rp, θ′, n′, s′, α′, n′_α + θm, rm, θp, rp, θ′, logp′, n′, s′, α′, n′_α end end diff --git a/src/samplers/pgibbs.jl b/src/samplers/pgibbs.jl index 11669cfd98..1f866a5035 100644 --- a/src/samplers/pgibbs.jl +++ b/src/samplers/pgibbs.jl @@ -50,7 +50,7 @@ step(model::Function, spl::Sampler{PG}, vi::VarInfo, _::Bool) = step(model, spl, step(model::Function, spl::Sampler{PG}, vi::VarInfo) = begin particles = ParticleContainer{TraceR}(model) - vi.index = 0; vi.num_produce = 0; # We need this line cause fork2 deepcopy `vi`. + vi.index = 0; vi.num_produce = 0; # We need this line cause fork2 deepcopy `vi`. ref_particle = isempty(vi) ? nothing : fork2(TraceR(model, spl, vi)) @@ -119,8 +119,5 @@ assume{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, vn::VarName, _::Va end end -observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) = begin - lp = logpdf(dist, value) - vi.logp += lp - produce(lp) -end +observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) = + produce(logpdf(dist, value)) diff --git a/src/samplers/sampler.jl b/src/samplers/sampler.jl index 8d5f419a98..053d7b7a05 100644 --- a/src/samplers/sampler.jl +++ b/src/samplers/sampler.jl @@ -33,12 +33,35 @@ assume(spl::Void, dist::Distribution, vn::VarName, vi::VarInfo) = begin r = rand(dist) push!(vi, vn, r, dist, 0) end - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end -observe(spl::Void, d::Distribution, value::Any, vi::VarInfo) = begin - lp = logpdf(d, value) - vi.logw += lp - vi.logp += lp +assume{T<:Distribution}(spl::Void, dists::Vector{T}, vn::VarName, variable::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + dist = dists[1] + n = size(variable)[end] + + vns = map(i -> copybyindex(vn, "[$i]"), 1:n) + + if haskey(vi, vns[1]) + rs = vi[vns] + else + rs = rand(dist, n) + for i = 1:n + push!(vi, vns[i], rs[i], dist, 0) + end + end + + acclogp!(vi, sum(logpdf(dist, rs, istrans(vi, vns[1])))) + + rs +end + +observe(spl::Void, dist::Distribution, value::Any, vi::VarInfo) = + acclogp!(vi, logpdf(dist, value)) + +observe{T<:Distribution}(spl::Void, dists::Vector{T}, value::Any, vi::VarInfo) = begin + @assert length(dists) == 1 "[observe] Turing only support vectorizing i.i.d distribution" + acclogp!(vi, sum(logpdf(dists[1], value))) end diff --git a/src/samplers/support/helper.jl b/src/samplers/support/helper.jl index a4b7b24d55..5d0cd51935 100644 --- a/src/samplers/support/helper.jl +++ b/src/samplers/support/helper.jl @@ -26,7 +26,13 @@ vectorize{T<:Real}(d::MatrixDistribution, r::Matrix{T}) = Vector{Real}(vec # Note this is not the case for MultivariateDistribution so I guess this might be lack of # support for some types related to matrices (like PDMat). reconstruct(d::Distribution, val::Vector) = reconstruct(d, val, typeof(val[1])) -reconstruct(d::UnivariateDistribution, val::Vector, T::Type) = T(val[1]) +reconstruct(d::UnivariateDistribution, val::Vector, T::Type) = begin + if length(val) == 1 + T(val[1]) + else + Vector{T}(val) + end +end reconstruct(d::MultivariateDistribution, val::Vector, T::Type) = Vector{T}(val) reconstruct(d::MatrixDistribution, val::Vector, T::Type) = Array{T, 2}(reshape(val, size(d)...)) @@ -41,6 +47,6 @@ Sample(vi::VarInfo) = begin value[sym(vn)] = realpart(vi[vn]) end # NOTE: do we need to check if lp is 0? - value[:lp] = realpart(vi.logp) + value[:lp] = realpart(getlogp(vi)) Sample(0.0, value) end diff --git a/src/samplers/support/hmc_core.jl b/src/samplers/support/hmc_core.jl index 17e706bcf8..4aa21650c3 100644 --- a/src/samplers/support/hmc_core.jl +++ b/src/samplers/support/hmc_core.jl @@ -1,47 +1,74 @@ global Δ_max = 1000 -setchunksize(chun_size::Int) = global CHUNKSIZE = chunk_size +setchunksize(chunk_size::Int) = begin + println("[Turing]: AD chunk size is set as $chunk_size") + global CHUNKSIZE = chunk_size +end -function runmodel(model::Function, _vi::VarInfo, spl::Union{Void,Sampler}) +runmodel(model::Function, vi::VarInfo, spl::Union{Void,Sampler}) = begin dprintln(4, "run model...") - vi = deepcopy(_vi) - vi.logp = 0.0 vi.index = 0 - model(vi=vi, sampler=spl) # run model\ + setlogp!(vi, zero(Real)) + if spl != nothing spl.info[:eval_num] += 1 end + model(vi=vi, sampler=spl) # run model end -function sample_momentum(vi::VarInfo, spl::Sampler) +sample_momentum(vi::VarInfo, spl::Sampler) = begin dprintln(2, "sampling momentum...") randn(length(getranges(vi, spl))) end # Leapfrog step -function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) +leapfrog2(θ::Vector, p::Vector, τ::Int, ϵ::Float64, + model::Function, vi::VarInfo, spl::Sampler) = begin + + vi[spl] = θ + grad = gradient2(vi, model, spl) + verifygrad(grad) || (return θ, p, 0) + + τ_valid = 0 + for t in 1:τ + # NOTE: we dont need copy here becase arr += another_arr + # doesn't change arr in-place + p_old = p; θ_old = (θ); old_logp = getlogp(vi) + + p -= ϵ * grad / 2 + θ += ϵ * p # full step for state + spl.info[:lf_num] += 1 # record leapfrog num + + vi[spl] = θ + grad = gradient2(vi, model, spl) + verifygrad(grad) || (vi[spl] = θ_old; setlogp!(vi, old_logp); θ = θ_old; p = p_old; break) + + p -= ϵ * grad / 2 - vi = deepcopy(_vi) - p = deepcopy(_p) + τ_valid += 1 + end + + θ, p, τ_valid +end + +leapfrog(vi::VarInfo, p::Vector, τ::Int, ϵ::Float64, model::Function, spl::Sampler) = begin dprintln(3, "first gradient...") grad = gradient2(vi, model, spl) - # Verify gradients; reject if gradients is NaN or Inf. verifygrad(grad) || (return vi, p, 0) + dprintln(2, "leapfrog stepping...") - τ_valid = 0; p_old = deepcopy(p) + τ_valid = 0 for t in 1:τ # do 'leapfrog' for each var - p_old[1:end] = p[1:end] + p_old = p; θ_old = vi[spl] p -= ϵ * grad / 2 - expand!(vi) - vi[spl] += ϵ * p # full step for state + spl.info[:lf_num] += 1 # record leapfrog num grad = gradient2(vi, model, spl) # Verify gradients; reject if gradients is NaN or Inf - # verifygrad(grad) || (pop!(vi.vals); pop!(vi.logp); p = p_old; break) - verifygrad(grad) || (pop!(vi.vals); p = p_old; break) + verifygrad(grad) || (vi[spl] = θ_old; p = p_old; break) p -= ϵ * grad / 2 @@ -49,37 +76,33 @@ function leapfrog(_vi::VarInfo, _p::Vector, τ::Int, ϵ::Float64, model::Functio end # Return updated θ and momentum - last(vi), p, τ_valid + vi, p, τ_valid end # Compute Hamiltonian -function find_H(p::Vector, model::Function, _vi::VarInfo, spl::Sampler) - vi = deepcopy(_vi) - vi = runmodel(model, vi, spl) - H = dot(p, p) / 2 + realpart(-vi.logp) +find_H(p::Vector, model::Function, vi::VarInfo, spl::Sampler) = begin + # NOTE: getlogp(vi) = 0 means the current vals[end] hasn't been used at all. + # This can be a result of link/invlink (where expand! is used) + if getlogp(vi) == 0 vi = runmodel(model, vi, spl) end + + H = dot(p, p) / 2 + realpart(-getlogp(vi)) if isnan(H) H = Inf else H end end -function find_good_eps{T}(model::Function, spl::Sampler{T}, vi::VarInfo) +find_good_eps{T}(model::Function, vi::VarInfo, spl::Sampler{T}) = begin ϵ, p = 1.0, sample_momentum(vi, spl) # set initial epsilon and momentums log_p_r_Θ = -find_H(p, model, vi, spl) # calculate p(Θ, r) = exp(-H(Θ, r)) - # # Make a leapfrog step until accept - # vi_prime, p_prime, τ_valid = leapfrog(vi, p, 1, ϵ, model, spl) - # while τ_valid == 0 - # ϵ *= 0.5 - # vi_prime, p_prime, τ_valid = leapfrog(vi, p, 1, ϵ, model, spl) - # end - # ϵ_bar = ϵ - vi_prime, p_prime, τ = leapfrog(vi, p, 1, ϵ, model, spl) - log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) # calculate new p(Θ, p) + θ = vi[spl] + θ_prime, p_prime, τ = leapfrog2(θ, p, 1, ϵ, model, vi, spl) + log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) # calculate new p(Θ, p) # Heuristically find optimal ϵ a = 2.0 * (log_p_r_Θ′ - log_p_r_Θ > log(0.5) ? 1 : 0) - 1 while (exp(log_p_r_Θ′ - log_p_r_Θ))^a > 2.0^(-a) ϵ = 2.0^a * ϵ - vi_prime, p_prime, τ = leapfrog(vi, p, 1, ϵ, model, spl) - log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi_prime, spl) + θ_prime, p_prime, τ = leapfrog2(θ, p, 1, ϵ, model, vi, spl) + log_p_r_Θ′ = τ == 0 ? -Inf : -find_H(p_prime, model, vi, spl) dprintln(1, "a = $a, log_p_r_Θ′ = $log_p_r_Θ′") end diff --git a/test/compiler.jl/noreturn.jl b/test/compiler.jl/noreturn.jl index b5aa793e93..6f0dc3b299 100644 --- a/test/compiler.jl/noreturn.jl +++ b/test/compiler.jl/noreturn.jl @@ -1,3 +1,4 @@ +include("../utility.jl") using Distributions using Turing using Base.Test diff --git a/test/nuts.jl/nuts.jl b/test/nuts.jl/nuts.jl index 06d983bcf7..0f08fa7ad0 100644 --- a/test/nuts.jl/nuts.jl +++ b/test/nuts.jl/nuts.jl @@ -8,7 +8,13 @@ using Distributions, Turing return s, m end -alg = NUTS(1000, 200, 0.65) +alg = NUTS(2500, 500, 0.65) +res = sample(gdemo([1.5, 2.0]), alg) + +check_numerical(res, [:s, :m], [49/24, 7/6]) + + +alg = Gibbs(2500, PG(50, 1, :s), NUTS(500, 0.65, :m)) res = sample(gdemo([1.5, 2.0]), alg) check_numerical(res, [:s, :m], [49/24, 7/6]) diff --git a/test/nuts_nb.jl b/test/nuts_nb.jl deleted file mode 100644 index 76e69f87ae..0000000000 --- a/test/nuts_nb.jl +++ /dev/null @@ -1,18 +0,0 @@ -using Distributions -using Turing -using Stan - -include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") - -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes-stan.data.jl") -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes.model.jl") - -bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") -bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] -logd = build_logd("Naive Bayes", bench_res...) - -include(Pkg.dir("Turing")*"/benchmarks/naive.bayes-stan.run.jl") -logd["stan"] = stan_d -logd["time_stan"] = nb_time - -print_log(logd) diff --git a/test/quick.jl b/test/quick.jl new file mode 100644 index 0000000000..668192a06e --- /dev/null +++ b/test/quick.jl @@ -0,0 +1,36 @@ +using Distributions +using Turing +using Stan + +include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") +include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") + +@model nbmodel(K, V, M, N, z, w, doc, alpha, beta) = begin + theta ~ Dirichlet(alpha) + phi = Vector{Vector{Real}}(K) + phi ~ [Dirichlet(beta)] + + log_theta = log(theta) + Turing.acclogp!(vi, sum(log_theta[z[1:M]])) + + log_phi = map(x->log(x), phi) + for n = 1:N + # w[n] ~ Categorical(phi[z[doc[n]]]) + Turing.acclogp!(vi, log_phi[z[doc[n]]][w[n]]) + end + + phi +end + + +# bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") +# bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] +# logd = build_logd("Naive Bayes", bench_res...) +# +# include(Pkg.dir("Turing")*"/benchmarks/"*"MoC-stan.run.jl") +# logd["stan"] = stan_d +# logd["time_stan"] = nb_time +# +# print_log(logd) + +samples = sample(nbmodel(data=nbstandata[1]), HMC(1000, 0.1, 4)) diff --git a/test/runtests.jl b/test/runtests.jl index 2febe73ebf..3a2c7cbeaa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,6 +25,7 @@ testcases = Dict( # samplers/ # support/ "resample.jl" => ["resample", "particlecontainer",], + "sampler.jl" => ["vectorize_observe", "vec_assume",], "gibbs.jl" => ["gibbs", "gibbs2", "gibbs_constructor",], "nuts.jl" => ["nuts_cons", "nuts", # "nuts_geweke", diff --git a/test/sampler.jl/vec_assume.jl b/test/sampler.jl/vec_assume.jl new file mode 100644 index 0000000000..0fd6370f06 --- /dev/null +++ b/test/sampler.jl/vec_assume.jl @@ -0,0 +1,39 @@ +include("../utility.jl") +using Distributions, Turing, Base.Test + +N = 100 +setchunksize(N) +alg = HMC(1000, 0.2, 4) + +@model vdemo() = begin + x = Vector{Real}(N) + for i = 1:N + x[i] ~ Normal(0, sqrt(4)) + end +end + +t_loop = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 + + +# Test for vectorize UnivariateDistribution +@model vdemo() = begin + x = Vector{Real}(N) + x ~ [Normal(0, 2)] +end + +t_vec = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 + + +@model vdemo() = begin + x ~ MvNormal(zeros(N), 2 * ones(N)) +end + +t_mv = @elapsed res = sample(vdemo(), alg) +@test_approx_eq_eps mean(mean(res[:x])) 0 0.1 + +println("Time for") +println(" Loop : $t_loop") +println(" Vec : $t_vec") +println(" Mv : $t_mv") diff --git a/test/sampler.jl/vec_assume_mv.jl b/test/sampler.jl/vec_assume_mv.jl new file mode 100644 index 0000000000..4cd4893b7f --- /dev/null +++ b/test/sampler.jl/vec_assume_mv.jl @@ -0,0 +1,34 @@ +include("../utility.jl") +using Distributions, Turing, Base.Test + +N = 5 +beta = [0.5, 0.5] +setchunksize(N*length(beta)) +alg = HMC(1000, 0.2, 4) + +# Test for vectorize UnivariateDistribution +@model vdemo() = begin + phi = Vector{Vector{Real}}(N) + phi ~ [Dirichlet(beta)] +end + +t_vec = @elapsed res = sample(vdemo(), alg) + + + + +@model vdemo() = begin + phi = Vector{Vector{Real}}(N) + for i = 1:N + phi[i] ~ Dirichlet(beta) + end +end + +t_loop = @elapsed res = sample(vdemo(), alg) + + + + +println("Time for") +println(" Loop : $t_loop") +println(" Vec : $t_vec") diff --git a/test/sampler.jl/vectorize_observe.jl b/test/sampler.jl/vectorize_observe.jl new file mode 100644 index 0000000000..3452dd8c42 --- /dev/null +++ b/test/sampler.jl/vectorize_observe.jl @@ -0,0 +1,65 @@ +# using Distributions +# using Turing +# using Stan +# +# include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl") +# include(Pkg.dir("Turing")*"/example-models/stan-models/MoC-stan.data.jl") +# +# @model nbmodel(K, V, M, N, z, w, doc, alpha, beta) = begin +# theta ~ Dirichlet(alpha) +# phi = Array{Any}(K) +# for k = 1:K +# phi[k] ~ Dirichlet(beta) +# end +# for m = 1:M +# z[m] ~ Categorical(theta) +# end +# for n = 1:N +# w[n] ~ Categorical(phi[z[doc[n]]]) +# end +# phi +# end +# +# +# bench_res = tbenchmark("NUTS(1000, 0.65)", "nbmodel", "data=nbstandata[1]") +# bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"] +# logd = build_logd("Naive Bayes", bench_res...) +# +# include(Pkg.dir("Turing")*"/benchmarks/"*"MoC-stan.run.jl") +# logd["stan"] = stan_d +# logd["time_stan"] = nb_time +# +# print_log(logd) + +include("../utility.jl") +using Distributions, Turing + +# Test for vectorize UnivariateDistribution +@model vdemo(x) = begin + s ~ InverseGamma(2,3) + m ~ Normal(0,sqrt(s)) + x ~ [Normal(m, sqrt(s))] + # for i = 1:length(x) + # x[i] ~ Normal(m, sqrt(s)) + # end + return s, m +end + +alg = NUTS(2500, 500, 0.65) +x = randn(1000) +res = sample(vdemo(x), alg) + +check_numerical(res, [:s, :m], [1, sum(x) / (1 + length(x))]) + +# Test for vectorize MultivariateDistribution + +D = 2 +@model vdemo2(x) = begin + μ ~ MvNormal(zeros(D), ones(D)) + x ~ [MvNormal(μ, ones(D))] +end + +alg = NUTS(2500, 500, 0.65) +res = sample(vdemo2(randn(D,1000)), alg) + +# TODO: Test for vectorize MatrixDistribution diff --git a/test/utility.jl b/test/utility.jl index 0c95ae359d..4a683a32f2 100644 --- a/test/utility.jl +++ b/test/utility.jl @@ -2,7 +2,7 @@ # TODO: can we somehow update the tests so that we can remove these two functions below? using Turing -using Turing: checkindex, setval!, updategid!, vectorize, CACHERESET, VarInfo, VarName, Sampler +using Turing: checkindex, setval!, updategid!, acclogp!, vectorize, CACHERESET, VarInfo, VarName, Sampler randr(vi::VarInfo, vn::VarName, dist::Distribution, count::Bool) = begin vi.index = count ? vi.index + 1 : vi.index @@ -13,7 +13,7 @@ randr(vi::VarInfo, vn::VarName, dist::Distribution, count::Bool) = begin else if count checkindex(vn, vi) end r = vi[vn] - vi.logp += logpdf(dist, r, istransformed(vi, vn)) + acclogp!(vi, logpdf(dist, r, istrans(vi, vn))) r end end