From be4a77c9b51a47442b56f112e720f73f6e069943 Mon Sep 17 00:00:00 2001 From: "Tamas K. Papp" Date: Fri, 10 May 2019 08:39:38 +0200 Subject: [PATCH 1/3] fix typos, clarify what methods should be defined. This was missed in #31787. --- stdlib/Random/docs/src/index.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md index c81946fc2a2de..32f5f5be8d4c1 100644 --- a/stdlib/Random/docs/src/index.md +++ b/stdlib/Random/docs/src/index.md @@ -84,13 +84,20 @@ Generating random values for some distributions may involve various trade-offs. The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both. -The object returned by `Sampler` is then used to generate the random values, by a method of `rand` defined for this purpose. Samplers can be arbitrary values, but for most applications the following predefined samplers may be sufficient: +The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X::T` that can be sampled from, the implementor should define the method -1. `SamplerType{T}()` can be used for implementing samplers that draw from type `T` (e.g. `rand(Int)`). +```julia +rand(rng, sampler) +``` +for the particular sampler returned by `Sampler(rng, X, repetition)` + +Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for most applications the following predefined samplers may be sufficient: + +1. `SamplerType{T}()` can be used for implementing samplers that draw from type `T` (e.g. `rand(Int)`). This is the default returned by `Sampler` for *types*. -2. `SamplerTrivial(self)` is a simple wrapper for `self`, which can be accessed with `[]`. This is the recommended sampler when no pre-computed information is needed (e.g. `rand(1:3)`). +2. `SamplerTrivial(self)` is a simple wrapper for `self`, which can be accessed with `[]`. This is the recommended sampler when no pre-computed information is needed (e.g. `rand(1:3)`), and is the default returned by `Sampler` for *values*. -3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values. +3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values, which should be computed in a *custom method* of `Sampler`. We provide examples for each of these. We assume here that the choice of algorithm is independent of the RNG, so we use `AbstractRNG` in our signatures. @@ -185,7 +192,7 @@ is defined to return `SamplerTrivial` with a `Random.CloseOpen01{T}}` type defin #### An optimized sampler with pre-computed data -Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that some to one. When many values are needed from this distribution, the fastest method if using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it. +Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that sum to one. When many values are needed from this distribution, the fastest method is using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it. Suppose that the distribution is described by ```julia From 229bf3eafd204881836d411961d07d497262b959 Mon Sep 17 00:00:00 2001 From: "Tamas K. Papp" Date: Fri, 10 May 2019 09:30:31 +0200 Subject: [PATCH 2/3] remove example for `Random.CloseOpen01{T}}` it is just confusing at this stage. --- stdlib/Random/docs/src/index.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md index 32f5f5be8d4c1..68470c5ce1387 100644 --- a/stdlib/Random/docs/src/index.md +++ b/stdlib/Random/docs/src/index.md @@ -181,15 +181,6 @@ julia> rand(Die(4), 3) Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`. -A `SamplerTrivial` does not have to wrap the original object. For example, in `Random`, `AbstractFloat` types are special-cased, because by default random values are not produced in the whole type domain, but rather in `[0,1)`. - -Consequently, a method -```julia -Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {RNG<:AbstractRNG,T<:AbstractFloat} = - Sampler(RNG, CloseOpen01(T), n) -``` -is defined to return `SamplerTrivial` with a `Random.CloseOpen01{T}}` type defined for this purpose, which has an appropriate `rand` method defined for it. - #### An optimized sampler with pre-computed data Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that sum to one. When many values are needed from this distribution, the fastest method is using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it. From 7e32b9861edcac34753ada4fba01e6ac2b2ae193 Mon Sep 17 00:00:00 2001 From: "Tamas K. Papp" Date: Fri, 10 May 2019 11:19:30 +0200 Subject: [PATCH 3/3] Replace gentype with eltype. In the Die example, define eltype from the beginning, instead of fixing it later, but mention what would happen if we didn't. Incidental typo fixes and clarifications. --- stdlib/Random/docs/src/index.md | 14 +++++++++++--- stdlib/Random/src/Random.jl | 28 ++++++++++++++++------------ stdlib/Random/test/runtests.jl | 8 ++++---- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md index 68470c5ce1387..1a2e360e7b2b2 100644 --- a/stdlib/Random/docs/src/index.md +++ b/stdlib/Random/docs/src/index.md @@ -89,7 +89,7 @@ The object returned by `Sampler` is then used to generate the random values. Whe ```julia rand(rng, sampler) ``` -for the particular sampler returned by `Sampler(rng, X, repetition)` +for the particular sampler returned by `Sampler(rng, X, repetition)`. Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for most applications the following predefined samplers may be sufficient: @@ -99,6 +99,12 @@ Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for mo 3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values, which should be computed in a *custom method* of `Sampler`. +For `SamplerTrivial` and `SamplerSimple`, +```julia +eltype(::Type{T}) +``` +should be defined to determine the returned type for custom random distributions of type `T`. This is used for pre-allocated containers, eg sampling an array of values. + We provide examples for each of these. We assume here that the choice of algorithm is independent of the RNG, so we use `AbstractRNG` in our signatures. ```@docs @@ -169,17 +175,19 @@ In order to define random generation out of objects of type `S`, the following m ```jldoctest Die; setup = :(Random.seed!(1)) julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides); +julia> Base.eltype(::Type{Die}) = Int + julia> rand(Die(4)) 3 julia> rand(Die(4), 3) -3-element Array{Any,1}: +3-element Array{Int,1}: 3 4 2 ``` -Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`. +Given a collection type `S`, if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In this example, if we did not define a method for `eltype`, a `Vector{Any}` would have been produced. #### An optimized sampler with pre-computed data diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index baa82df0bf114..68319111c58d9 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -16,7 +16,7 @@ using Base.GMP: Limb using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing -import Base: copymutable, copy, copy!, ==, hash, convert +import Base: copymutable, copy, copy!, ==, hash, convert, eltype using Serialization import Serialization: serialize, deserialize import Base: rand, randn @@ -40,10 +40,6 @@ Supertype for random number generators such as [`MersenneTwister`](@ref) and [`R """ abstract type AbstractRNG end -gentype(::Type{X}) where {X} = eltype(X) -gentype(x) = gentype(typeof(x)) - - ### integers # we define types which encode the generation of a specific number of bits @@ -81,7 +77,7 @@ for UI = (:UInt10, :UInt10Raw, :UInt23, :UInt23Raw, :UInt52, :UInt52Raw, end end -gentype(::Type{<:UniformBits{T}}) where {T} = T +eltype(::Type{<:UniformBits{T}}) where {T} = T ### floats @@ -97,7 +93,7 @@ const CloseOpen12_64 = CloseOpen12{Float64} CloseOpen01(::Type{T}=Float64) where {T<:AbstractFloat} = CloseOpen01{T}() CloseOpen12(::Type{T}=Float64) where {T<:AbstractFloat} = CloseOpen12{T}() -gentype(::Type{<:FloatInterval{T}}) where {T<:AbstractFloat} = T +eltype(::Type{<:FloatInterval{T}}) where {T<:AbstractFloat} = T const BitFloatType = Union{Type{Float16},Type{Float32},Type{Float64}} @@ -105,7 +101,7 @@ const BitFloatType = Union{Type{Float16},Type{Float32},Type{Float64}} abstract type Sampler{E} end -gentype(::Type{<:Sampler{E}}) where {E} = E +eltype(::Type{<:Sampler{E}}) where {E} = E # temporarily for BaseBenchmarks RangeGenerator(x) = Sampler(GLOBAL_RNG, x) @@ -135,6 +131,10 @@ the amount of precomputation, if applicable. [`Random.SamplerType`](@ref) and [`Random.SamplerTrivial`](@ref) are default fallbacks for *types* and *values*, respectively. [`Random.SamplerSimple`](@ref) can be used to store pre-computed values without defining extra types for only this purpose. + +Generally, for most custom types that yield random values, defining a new method for +`Sampler` is *not* required, as the above solutions should be sufficient. See the manual for +details and examples. """ Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof(rng), x, r) Sampler(rng::AbstractRNG, ::Type{X}, r::Repetition=Val(Inf)) where {X} = Sampler(typeof(rng), X, r) @@ -170,9 +170,11 @@ end Create a sampler that just wraps the given value `x`. This is the default fall-back for values. +`eltype(x)` is used to determine the types returned by this sampler, and should be defined. + The recommended use case is sampling from values without precomputed data. """ -SamplerTrivial(x::T) where {T} = SamplerTrivial{T,gentype(T)}(x) +SamplerTrivial(x::T) where {T} = SamplerTrivial{T,eltype(T)}(x) Sampler(::Type{<:AbstractRNG}, x, ::Repetition) = SamplerTrivial(x) @@ -189,16 +191,18 @@ end Create a sampler that wraps the given value `x` and the `data`. +`eltype(x)` is used to determine the types returned by this sampler, and should be defined. + The recommended use case is sampling from values with precomputed data. """ -SamplerSimple(x::T, data::S) where {T,S} = SamplerSimple{T,S,gentype(T)}(x, data) +SamplerSimple(x::T, data::S) where {T,S} = SamplerSimple{T,S,eltype(T)}(x, data) Base.getindex(sp::SamplerSimple) = sp.self # simple sampler carrying a (type) tag T and data struct SamplerTag{T,S,E} <: Sampler{E} data::S - SamplerTag{T}(s::S) where {T,S} = new{T,S,gentype(T)}(s) + SamplerTag{T}(s::S) where {T,S} = new{T,S,eltype(T)}(s) end @@ -271,7 +275,7 @@ end rand(r::AbstractRNG, dims::Integer...) = rand(r, Float64, Dims(dims)) rand( dims::Integer...) = rand(Float64, Dims(dims)) -rand(r::AbstractRNG, X, dims::Dims) = rand!(r, Array{gentype(X)}(undef, dims), X) +rand(r::AbstractRNG, X, dims::Dims) = rand!(r, Array{eltype(X)}(undef, dims), X) rand( X, dims::Dims) = rand(GLOBAL_RNG, X, dims) rand(r::AbstractRNG, X, d::Integer, dims::Integer...) = rand(r, X, Dims((d, dims...))) diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl index c1d68910289f2..30b54c6c00b89 100644 --- a/stdlib/Random/test/runtests.jl +++ b/stdlib/Random/test/runtests.jl @@ -689,10 +689,10 @@ end end end -@testset "gentype for UniformBits" begin - @test Random.gentype(Random.UInt52()) == UInt64 - @test Random.gentype(Random.UInt52(UInt128)) == UInt128 - @test Random.gentype(Random.UInt104()) == UInt128 +@testset "eltype for UniformBits" begin + @test eltype(Random.UInt52()) == UInt64 + @test eltype(Random.UInt52(UInt128)) == UInt128 + @test eltype(Random.UInt104()) == UInt128 end @testset "shuffle[!]" begin