-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Closed
Labels
Description
I experienced a two time slow-down with copy! and .=, but not [:]. I suspect this might have something to do with the discussion here: https://discourse.julialang.org/t/increase-in-allocations-with-julia-v1-11-beta/112838/2
I put the following code in a script, debug.jl, and did include("debug.jl") in a fresh REPL session. I've included the lowered code print out.
using BenchmarkTools, Random, LinearAlgebra
const T = Float64
const N = 5000^2
BLAS.set_num_threads(14)
@show BLAS.get_num_threads()
# @show Hwloc.num_physical_cores()
function test_Vector(::Type{T}, N::Integer) where T
a = randn(T, N)
b = zeros(T, N)
c = zeros(T, N)
d = zeros(T, N)
println("Test Vector:")
@btime copy!($b, $a)
@btime $c[:] = $a
@btime $d .= $a
return nothing
end
function test_Memory(::Type{T}, N::Integer) where T
a = Memory{T}(undef, N)
randn!(a)
b = Memory{T}(undef, N)
c = Memory{T}(undef, N)
d = Memory{T}(undef, N)
println("Test Memory")
@btime copy!($b, $a)
@btime $c[:] = $a
@btime $d .= $a
return nothing
end
function print_lowered_Memory(::Type{T}, N::Integer) where T
a = Memory{T}(undef, N)
randn!(a)
b = Memory{T}(undef, N)
c = Memory{T}(undef, N)
d = Memory{T}(undef, N)
println("Lowered: copy! ")
@show @code_lowered copy!(b, a)
println("Lowered: [:]")
@show @code_lowered c[:] = a
println("Lowered: .= ")
function myfunc!(d, a)
return d .= a
end
@show @code_lowered myfunc!(d, a)
println()
return nothing
end
function print_lowered_Vector(::Type{T}, N::Integer) where T
a = Vector{T}(undef, N)
randn!(a)
b = Vector{T}(undef, N)
c = Vector{T}(undef, N)
d = Vector{T}(undef, N)
println("Lowered: copy! ")
@show @code_lowered copy!(b, a)
println("Lowered: [:]")
@show @code_lowered c[:] = a
println("Lowered: .= ")
function myfunc!(d, a)
return d .= a
end
@show @code_lowered myfunc!(d, a)
println()
return nothing
end
# Timing:
println("Vector:")
test_Vector(T, N)
println("Memory:")
test_Memory(T, N)
println()
# Print lowered code:
println("Vector:")
print_lowered_Vector(T, N)
println("Memory:")
print_lowered_Memory(T, N)
@show versioninfo()
nothing
On my machine with a fresh Julia REPL session, the output is:
julia> include("debug.jl")
BLAS.get_num_threads() = 14
Vector:
Test Vector:
14.829 ms (0 allocations: 0 bytes)
14.484 ms (0 allocations: 0 bytes)
14.406 ms (0 allocations: 0 bytes)
Memory:
Test Memory
23.982 ms (0 allocations: 0 bytes)
14.938 ms (0 allocations: 0 bytes)
24.168 ms (0 allocations: 0 bytes)
Vector:
Lowered: copy!
#= /home/roy/Documents/repos/explore/examples/debug.jl:75 =# @code_lowered(copy!(b, a)) = CodeInfo(
1 ─ %1 = Base.:(==)
│ %2 = Base.firstindex(dst)
│ %3 = Base.firstindex(src)
│ %4 = (%1)(%2, %3)
└── goto #3 if not %4
2 ─ goto #4
3 ─ %7 = Base.throw
│ %8 = Base.ArgumentError("vectors must have the same offset for copy! (consider using `copyto!`)")
└── (%7)(%8)
4 ┄ %10 = Base.:!=
│ %11 = Base.length(dst)
│ %12 = Base.length(src)
│ %13 = (%10)(%11, %12)
└── goto #6 if not %13
5 ─ %15 = Base.resize!
│ %16 = Base.length(src)
└── (%15)(dst, %16)
6 ┄ %18 = Base.copyto!(dst, src)
└── return %18
)
Lowered: [:]
#= /home/roy/Documents/repos/explore/examples/debug.jl:78 =# @code_lowered(c[:] = a) = CodeInfo(
1 ─ nothing
│ lI = Base.length(A)
│ %3 = $(Expr(:boundscheck))
└── goto #3 if not %3
2 ─ %5 = lI
└── Base.setindex_shape_check(X, %5)
3 ┄ %7 = lI
│ %8 = %7 > 0
└── goto #5 if not %8
4 ─ %10 = lI
└── Base.unsafe_copyto!(A, 1, X, 1, %10)
5 ┄ %12 = A
└── return %12
)
Lowered: .=
#= /home/roy/Documents/repos/explore/examples/debug.jl:84 =# @code_lowered(myfunc!(d, a)) = CodeInfo(
1 ─ %1 = Base.broadcasted(Base.identity, a)
│ %2 = Base.materialize!(d, %1)
└── return %2
)
Memory:
Lowered: copy!
#= /home/roy/Documents/repos/explore/examples/debug.jl:49 =# @code_lowered(copy!(b, a)) = CodeInfo(
1 ─ %1 = Base.:(==)
│ %2 = Base.firstindex(dst)
│ %3 = Base.firstindex(src)
│ %4 = (%1)(%2, %3)
└── goto #3 if not %4
2 ─ goto #4
3 ─ %7 = Base.throw
│ %8 = Base.ArgumentError("vectors must have the same offset for copy! (consider using `copyto!`)")
└── (%7)(%8)
4 ┄ %10 = Base.:!=
│ %11 = Base.length(dst)
│ %12 = Base.length(src)
│ %13 = (%10)(%11, %12)
└── goto #6 if not %13
5 ─ %15 = Base.resize!
│ %16 = Base.length(src)
└── (%15)(dst, %16)
6 ┄ %18 = Base.copyto!(dst, src)
└── return %18
)
Lowered: [:]
#= /home/roy/Documents/repos/explore/examples/debug.jl:52 =# @code_lowered(c[:] = a) = CodeInfo(
1 ─ nothing
│ lI = Base.length(A)
│ %3 = $(Expr(:boundscheck))
└── goto #3 if not %3
2 ─ %5 = lI
└── Base.setindex_shape_check(X, %5)
3 ┄ %7 = lI
│ %8 = %7 > 0
└── goto #5 if not %8
4 ─ %10 = lI
└── Base.unsafe_copyto!(A, 1, X, 1, %10)
5 ┄ %12 = A
└── return %12
)
Lowered: .=
#= /home/roy/Documents/repos/explore/examples/debug.jl:58 =# @code_lowered(myfunc!(d, a)) = CodeInfo(
1 ─ %1 = Base.broadcasted(Base.identity, a)
│ %2 = Base.materialize!(d, %1)
└── return %2
)
Julia Version 1.11.0-rc1
Commit 3a35aec36d1 (2024-06-25 10:23 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 16 × AMD Ryzen 7 1700 Eight-Core Processor
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, znver1)
Threads: 1 default, 0 interactive, 1 GC (on 16 virtual cores)
versioninfo() = nothing
On my machine, there is no difference in the above print out for the lowered code between the Vector and Memory versions of copy!, .=, and [:] =. It seems the Base.unsafe_copyto! from the lowered [:] = with Memory doesn't suffer from the performance hit.