From ffc04c0e7e0ce5fc44a5f14d8094a7bcc1e40206 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 27 Nov 2014 09:15:23 +0530 Subject: [PATCH] rand!(A): use pointer directly instead of pointer_to_array This is to avoid the small allocations of pointer_to_array, which can add up and slow down repeated calls to rand!(A) on a same array A, in particular when A is small. --- base/random.jl | 59 ++++++++++++++++++++++++++++---------------------- test/random.jl | 2 +- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/base/random.jl b/base/random.jl index 2353f2bd50e98..620838c80d5b6 100644 --- a/base/random.jl +++ b/base/random.jl @@ -1,6 +1,6 @@ module Random -using Base.dSFMT +using Base: dSFMT, IntTypes export srand, rand, rand!, @@ -219,7 +219,10 @@ end # MersenneTwister -function rand_AbstractArray_Float64!{I<:FloatInterval}(r::MersenneTwister, A::AbstractArray{Float64}, n=length(A), ::Type{I}=CloseOpen) +@inline _store!(A::AbstractArray, x, i) = @inbounds A[i] = x +@inline _store!(A::Ptr, x, i) = unsafe_store!(A, x, i) + +function rand_AbstractArray_Float64!{I<:FloatInterval}(r::MersenneTwister, A::Union(AbstractArray{Float64}, Ptr{Float64}), n::Int, ::Type{I}=CloseOpen) # what follows is equivalent to this simple loop but more efficient: # for i=1:n # @inbounds A[i] = rand(r, I) @@ -233,19 +236,21 @@ function rand_AbstractArray_Float64!{I<:FloatInterval}(r::MersenneTwister, A::Ab end m2 = min(n, m+s) for i=m+1:m2 - @inbounds A[i] = rand_inbounds(r, I) + _store!(A, rand_inbounds(r, I), i) end m = m2 end A end -rand!(r::MersenneTwister, A::AbstractArray{Float64}) = rand_AbstractArray_Float64!(r, A) +rand!(r::MersenneTwister, A::AbstractArray{Float64}) = rand_AbstractArray_Float64!(r, A, length(A)) fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::Type{CloseOpen}) = dsfmt_fill_array_close_open!(s, A, n) fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::Type{Close1Open2}) = dsfmt_fill_array_close1_open2!(s, A, n) -function rand!{I<:FloatInterval}(r::MersenneTwister, A::Array{Float64}, n::Int=length(A), ::Type{I}=CloseOpen) +@inline rand!{I<:FloatInterval}(r::MersenneTwister, A::Array{Float64}, n::Int=length(A), ::Type{I}=CloseOpen) = (rand!(r, pointer(A), n, I); A) + +function rand!{I<:FloatInterval}(r::MersenneTwister, pA::Ptr{Float64}, n::Int, ::Type{I}) # depending on the alignment of A, the data written by fill_array! may have # to be left-shifted by up to 15 bytes (cf. unsafe_copy! below) for # reproducibility purposes; @@ -254,9 +259,8 @@ function rand!{I<:FloatInterval}(r::MersenneTwister, A::Array{Float64}, n::Int=l # generated by the scalar version of rand n2 = (n-2) ÷ 2 * 2 if n2 < dsfmt_get_min_array_size() - rand_AbstractArray_Float64!(r, A, n, I) + rand_AbstractArray_Float64!(r, pA, n, I) else - pA = pointer(A) align = Csize_t(pA) % 16 if align > 0 pA2 = pA + 16 - align @@ -266,10 +270,10 @@ function rand!{I<:FloatInterval}(r::MersenneTwister, A::Array{Float64}, n::Int=l fill_array!(r.state, pA, n2, I) end for i=n2+1:n - A[i] = rand(r, I) + unsafe_store!(pA, rand(r, I), i) end end - A + pA end @inline mask128(u::UInt128, ::Type{Float16}) = (u & 0x03ff03ff03ff03ff03ff03ff03ff03ff) | 0x3c003c003c003c003c003c003c003c00 @@ -278,10 +282,10 @@ end function rand!{T<:Union(Float16, Float32)}(r::MersenneTwister, A::Array{T}, ::Type{Close1Open2}) n = length(A) n128 = n * sizeof(T) ÷ 16 - rand!(r, pointer_to_array(convert(Ptr{Float64}, pointer(A)), 2*n128), 2*n128, Close1Open2) - A128 = pointer_to_array(convert(Ptr{UInt128}, pointer(A)), n128) - @inbounds for i in 1:n128 - u = A128[i] + rand!(r, Ptr{Float64}(pointer(A)), 2*n128, Close1Open2) + A128 = Ptr{UInt128}(pointer(A)) + for i in 1:n128 + u = unsafe_load(A128, i) # "A128[i]" in comment below u $= u << 26 # at this point, the 64 low bits of u, "k" being the k-th bit of A128[i] and "+" the bit xor, are: # [..., 58+32,..., 53+27, 52+26, ..., 33+7, 32+6, ..., 27+1, 26, ..., 1] @@ -291,7 +295,7 @@ function rand!{T<:Union(Float16, Float32)}(r::MersenneTwister, A::Array{T}, ::Ty # this is obviously satisfied on the 32 low bits side, and on the high side, the entropy comes # from bits 33:52 of A128[i] and then from bits 27:32 (which are discarded on the low side) # this is similar for the 64 high bits of u - A128[i] = mask128(u, T) + unsafe_store!(A128, mask128(u, T), i) end for i in 16*n128÷sizeof(T)+1:n @inbounds A[i] = rand(r, T) + one(T) @@ -311,35 +315,38 @@ end rand!{T<:Union(Float16, Float32)}(r::MersenneTwister, A::Array{T}) = rand!(r, A, CloseOpen) -function rand!(r::MersenneTwister, A::Array{UInt128}, n::Int=length(A)) - Af = pointer_to_array(convert(Ptr{Float64}, pointer(A)), 2n) +function rand!(r::MersenneTwister, A::Ptr{UInt128}, n::Int) + Af = Ptr{Float64}(A) i = n while true rand!(r, Af, 2i, Close1Open2) n < 5 && break i = 0 - @inbounds while n-i >= 5 - u = A[i+=1] - A[n] $= u << 48 - A[n-=1] $= u << 36 - A[n-=1] $= u << 24 - A[n-=1] $= u << 12 + while n-i >= 5 + u = unsafe_load(A, i+=1) + unsafe_store!(A, unsafe_load(A, n) $ u<<48, n) + n-=1 + unsafe_store!(A, unsafe_load(A, n) $ u<<36, n) + n-=1 + unsafe_store!(A, unsafe_load(A, n) $ u<<24, n) + n-=1 + unsafe_store!(A, unsafe_load(A, n) $ u<<12, n) n-=1 end end if n > 0 u = rand_ui2x52_raw(r) for i = 1:n - @inbounds A[i] $= u << 12*i + unsafe_store!(A, unsafe_load(A, i) $ u << 12*i, i) end end A end -function rand!{T<:Union(Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128)}(r::MersenneTwister, A::Array{T}) - n=length(A) +function rand!{T<:Union(IntTypes...)}(r::MersenneTwister, A::Array{T}) + n = length(A) n128 = n * sizeof(T) ÷ 16 - rand!(r, pointer_to_array(convert(Ptr{UInt128}, pointer(A)), n128)) + rand!(r, Ptr{UInt128}(pointer(A)), n128) for i = 16*n128÷sizeof(T)+1:n @inbounds A[i] = rand(r, T) end diff --git a/test/random.jl b/test/random.jl index 3bf0d87fec087..a54d70454c7ed 100644 --- a/test/random.jl +++ b/test/random.jl @@ -235,7 +235,7 @@ let mt = MersenneTwister(0) @test rand!(mt, AF64)[end] == 0.957735065345398 @test rand!(mt, AF64)[end] == 0.6492481059865669 resize!(AF64, 2*length(mt.vals)) - @test Base.Random.rand_AbstractArray_Float64!(mt, AF64)[end] == 0.432757268470779 + @test Base.Random.rand_AbstractArray_Float64!(mt, AF64, length(AF64))[end] == 0.432757268470779 end # Issue #9037