From a142edf883248a2cfa9543fb6f49f3eb0a64af63 Mon Sep 17 00:00:00 2001 From: Roman Bange Date: Sat, 26 Sep 2020 01:21:30 +0200 Subject: [PATCH 1/2] add GPU differentiable logpdf for normal distributions --- src/extensions/Distributions.jl | 9 +++++++++ src/extensions/extensions.jl | 1 + test/extensions.jl | 12 ++++++++++++ test/runtests.jl | 2 +- 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/extensions/Distributions.jl diff --git a/src/extensions/Distributions.jl b/src/extensions/Distributions.jl new file mode 100644 index 0000000..23b1b1a --- /dev/null +++ b/src/extensions/Distributions.jl @@ -0,0 +1,9 @@ +""" +GPU automatic differentiable version for the logpdf function of normal distributions. +Adding an epsilon value to guarantee numeric stability if sigma is exactly zero +(e.g. if relu is used in output layer). +""" +function normlogpdf(μ, σ, x; ϵ = 1.0f-8) + z = (x .- μ) ./ (σ .+ ϵ) + -(z .^ 2 .+ log(2.0f0π)) / 2.0f0 .- log.(σ .+ ϵ) +end diff --git a/src/extensions/extensions.jl b/src/extensions/extensions.jl index 9375b69..f4a17d9 100644 --- a/src/extensions/extensions.jl +++ b/src/extensions/extensions.jl @@ -3,3 +3,4 @@ include("CUDA.jl") include("Zygote.jl") include("ReinforcementLearningBase.jl") include("ElasticArrays.jl") +include("Distributions.jl") diff --git a/test/extensions.jl b/test/extensions.jl index 07659dc..347ddb9 100644 --- a/test/extensions.jl +++ b/test/extensions.jl @@ -14,3 +14,15 @@ clip_by_global_norm!(gs, ps, 4.0f0) @test isapprox(gs[:x], [0.0 0.0 0.0; 0.0 0.0 0.0]) end + + +@testset "Distributions" begin + @test isapprox(logpdf(Normal(), 2), normlogpdf(0, 1, 2)) + @test isapprox(logpdf.([Normal(), Normal()], [2, 10]), normlogpdf([0, 0], [1, 1], [2, 10])) + + # Test numeric stability for 0 sigma + @test isnan(normlogpdf(0, 0, 2, ϵ=0)) + @test !isnan(normlogpdf(0, 0, 2)) + + # GPU differentiability not testable +end diff --git a/test/runtests.jl b/test/runtests.jl index b71ad52..a563f68 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,7 @@ using ReinforcementLearningCore using Random using Test using StatsBase -using Distributions: probs +using Distributions: probs, Normal, logpdf using ReinforcementLearningEnvironments using Flux using Zygote From 75e4e525657ac0302357d9d4a58334bfa11f7e4e Mon Sep 17 00:00:00 2001 From: Jun Tian Date: Sat, 26 Sep 2020 13:22:48 +0800 Subject: [PATCH 2/2] add tests on GPU --- src/extensions/Distributions.jl | 4 ++++ test/extensions.jl | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/extensions/Distributions.jl b/src/extensions/Distributions.jl index 23b1b1a..6a3cfbf 100644 --- a/src/extensions/Distributions.jl +++ b/src/extensions/Distributions.jl @@ -1,3 +1,7 @@ +export normlogpdf + +# watch https://github.com/JuliaStats/Distributions.jl/issues/1183 + """ GPU automatic differentiable version for the logpdf function of normal distributions. Adding an epsilon value to guarantee numeric stability if sigma is exactly zero diff --git a/test/extensions.jl b/test/extensions.jl index 347ddb9..6a3265a 100644 --- a/test/extensions.jl +++ b/test/extensions.jl @@ -24,5 +24,13 @@ end @test isnan(normlogpdf(0, 0, 2, ϵ=0)) @test !isnan(normlogpdf(0, 0, 2)) - # GPU differentiability not testable + if CUDA.functional() + cpu_grad = Zygote.gradient([0.2, 0.5]) do x + sum(logpdf.([Normal(1,0.1), Normal(2,0.2)], x)) + end + gpu_grad = Zygote.gradient(cu([0.2, 0.5])) do x + sum(normlogpdf(cu([1, 2]), cu([0.1, 0.2]),x)) + end + @test isapprox(cpu_grad[1], gpu_grad[1] |> Array) + end end