Skip to content

v1.11-rc1: Using copy! or .= with Memory is slower than [:] = and Vector #55079

@RoyCCWang

Description

@RoyCCWang

I experienced a two time slow-down with copy! and .=, but not [:]. I suspect this might have something to do with the discussion here: https://discourse.julialang.org/t/increase-in-allocations-with-julia-v1-11-beta/112838/2

I put the following code in a script, debug.jl, and did include("debug.jl") in a fresh REPL session. I've included the lowered code print out.

using BenchmarkTools, Random, LinearAlgebra

const T = Float64
const N = 5000^2

BLAS.set_num_threads(14)
@show BLAS.get_num_threads()
# @show Hwloc.num_physical_cores()

function test_Vector(::Type{T}, N::Integer) where T
    a = randn(T, N)

    b = zeros(T, N)
    c = zeros(T, N)
    d = zeros(T, N)
    
    println("Test Vector:")
    @btime copy!($b, $a)
    @btime $c[:] = $a
    @btime $d .= $a
    return nothing
end

function test_Memory(::Type{T}, N::Integer) where T
    a = Memory{T}(undef, N)
    randn!(a)
    
    b = Memory{T}(undef, N)
    c = Memory{T}(undef, N)
    d = Memory{T}(undef, N)

    println("Test Memory")
    @btime copy!($b, $a)
    @btime $c[:] = $a
    @btime $d .= $a
    return nothing
end

function print_lowered_Memory(::Type{T}, N::Integer) where T

    a = Memory{T}(undef, N)
    randn!(a)

    b = Memory{T}(undef, N)
    c = Memory{T}(undef, N)
    d = Memory{T}(undef, N)

    println("Lowered: copy! ")
    @show @code_lowered copy!(b, a)
   
    println("Lowered: [:]")
    @show @code_lowered c[:] = a
   
    println("Lowered: .= ")
    function myfunc!(d, a)
        return d .= a
    end
    @show @code_lowered myfunc!(d, a)
    println()

    return nothing
end


function print_lowered_Vector(::Type{T}, N::Integer) where T

    a = Vector{T}(undef, N)
    randn!(a)

    b = Vector{T}(undef, N)
    c = Vector{T}(undef, N)
    d = Vector{T}(undef, N)

    println("Lowered: copy! ")
    @show @code_lowered copy!(b, a)
    
    println("Lowered: [:]")
    @show @code_lowered c[:] = a
    
    println("Lowered: .= ")
    function myfunc!(d, a)
        return d .= a
    end
    @show @code_lowered myfunc!(d, a)
    println()

    return nothing
end

# Timing:
println("Vector:")
test_Vector(T, N)

println("Memory:")
test_Memory(T, N)
println()

# Print lowered code:
println("Vector:")
print_lowered_Vector(T, N)

println("Memory:")
print_lowered_Memory(T, N)

@show versioninfo()
nothing

On my machine with a fresh Julia REPL session, the output is:

julia> include("debug.jl")
BLAS.get_num_threads() = 14
Vector:
Test Vector:
  14.829 ms (0 allocations: 0 bytes)
  14.484 ms (0 allocations: 0 bytes)
  14.406 ms (0 allocations: 0 bytes)
Memory:
Test Memory
  23.982 ms (0 allocations: 0 bytes)
  14.938 ms (0 allocations: 0 bytes)
  24.168 ms (0 allocations: 0 bytes)

Vector:
Lowered: copy! 
#= /home/roy/Documents/repos/explore/examples/debug.jl:75 =# @code_lowered(copy!(b, a)) = CodeInfo(
1 ─ %1  = Base.:(==)
│   %2  = Base.firstindex(dst)
│   %3  = Base.firstindex(src)
│   %4  = (%1)(%2, %3)
└──       goto #3 if not %4
2 ─       goto #4
3 ─ %7  = Base.throw
│   %8  = Base.ArgumentError("vectors must have the same offset for copy! (consider using `copyto!`)")
└──       (%7)(%8)
4 ┄ %10 = Base.:!=
│   %11 = Base.length(dst)
│   %12 = Base.length(src)
│   %13 = (%10)(%11, %12)
└──       goto #6 if not %13
5 ─ %15 = Base.resize!
│   %16 = Base.length(src)
└──       (%15)(dst, %16)
6 ┄ %18 = Base.copyto!(dst, src)
└──       return %18
)
Lowered: [:]
#= /home/roy/Documents/repos/explore/examples/debug.jl:78 =# @code_lowered(c[:] = a) = CodeInfo(
1 ─       nothing
│         lI = Base.length(A)
│   %3  = $(Expr(:boundscheck))
└──       goto #3 if not %3
2 ─ %5  = lI
└──       Base.setindex_shape_check(X, %5)
3 ┄ %7  = lI
│   %8  = %7 > 0
└──       goto #5 if not %8
4 ─ %10 = lI
└──       Base.unsafe_copyto!(A, 1, X, 1, %10)
5 ┄ %12 = A
└──       return %12
)
Lowered: .= 
#= /home/roy/Documents/repos/explore/examples/debug.jl:84 =# @code_lowered(myfunc!(d, a)) = CodeInfo(
1 ─ %1 = Base.broadcasted(Base.identity, a)
│   %2 = Base.materialize!(d, %1)
└──      return %2
)

Memory:
Lowered: copy! 
#= /home/roy/Documents/repos/explore/examples/debug.jl:49 =# @code_lowered(copy!(b, a)) = CodeInfo(
1 ─ %1  = Base.:(==)
│   %2  = Base.firstindex(dst)
│   %3  = Base.firstindex(src)
│   %4  = (%1)(%2, %3)
└──       goto #3 if not %4
2 ─       goto #4
3 ─ %7  = Base.throw
│   %8  = Base.ArgumentError("vectors must have the same offset for copy! (consider using `copyto!`)")
└──       (%7)(%8)
4 ┄ %10 = Base.:!=
│   %11 = Base.length(dst)
│   %12 = Base.length(src)
│   %13 = (%10)(%11, %12)
└──       goto #6 if not %13
5 ─ %15 = Base.resize!
│   %16 = Base.length(src)
└──       (%15)(dst, %16)
6 ┄ %18 = Base.copyto!(dst, src)
└──       return %18
)
Lowered: [:]
#= /home/roy/Documents/repos/explore/examples/debug.jl:52 =# @code_lowered(c[:] = a) = CodeInfo(
1 ─       nothing
│         lI = Base.length(A)
│   %3  = $(Expr(:boundscheck))
└──       goto #3 if not %3
2 ─ %5  = lI
└──       Base.setindex_shape_check(X, %5)
3 ┄ %7  = lI
│   %8  = %7 > 0
└──       goto #5 if not %8
4 ─ %10 = lI
└──       Base.unsafe_copyto!(A, 1, X, 1, %10)
5 ┄ %12 = A
└──       return %12
)
Lowered: .= 
#= /home/roy/Documents/repos/explore/examples/debug.jl:58 =# @code_lowered(myfunc!(d, a)) = CodeInfo(
1 ─ %1 = Base.broadcasted(Base.identity, a)
│   %2 = Base.materialize!(d, %1)
└──      return %2
)

Julia Version 1.11.0-rc1
Commit 3a35aec36d1 (2024-06-25 10:23 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 16 × AMD Ryzen 7 1700 Eight-Core Processor
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, znver1)
Threads: 1 default, 0 interactive, 1 GC (on 16 virtual cores)
versioninfo() = nothing

On my machine, there is no difference in the above print out for the lowered code between the Vector and Memory versions of copy!, .=, and [:] =. It seems the Base.unsafe_copyto! from the lowered [:] = with Memory doesn't suffer from the performance hit.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions