Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name = "VectorizationBase"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
authors = ["Chris Elrod <[email protected]>"]
version = "0.21.72"
version = "0.21.73"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b"
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
Expand All @@ -17,6 +18,7 @@ StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"

[compat]
ArrayInterface = "7"
BitTwiddlingConvenienceFunctions = "0.1.6"
CPUSummary = "0.1.1 - 0.1.8, 0.1.11, 0.2"
HostCPUFeatures = "0.1"
IfElse = "0.1"
Expand Down
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ makedocs(;
prettyurls = get(ENV, "CI", "false") == "true",
canonical = "https://JuliaSIMD.github.io/VectorizationBase.jl"
),
pages = ["Home" => "index.md"],
pages = ["Home" => "index.md"]
)

deploydocs(; repo = "github.com/JuliaSIMD/VectorizationBase.jl")
113 changes: 42 additions & 71 deletions src/VectorizationBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,61 +6,22 @@ end
import StaticArrayInterface, LinearAlgebra, Libdl, IfElse, LayoutPointers
const ArrayInterface = StaticArrayInterface
using StaticArrayInterface:
contiguous_axis,
contiguous_axis_indicator,
contiguous_batch_size,
stride_rank,
device,
CPUPointer,
CPUIndex,
known_length,
known_first,
known_last,
static_size,
static_strides,
offsets,
static_first,
static_last,
static_length
stride_rank, static_strides, offsets, static_first, static_last, static_length
import IfElse: ifelse

using CPUSummary:
cache_type,
num_cache,
num_cache_levels,
num_cores,
num_l1cache,
num_l2cache,
cache_associativity,
num_l3cache,
sys_threads,
cache_inclusive,
num_l4cache,
cache_linesize,
num_machines,
cache_size,
num_sockets
using CPUSummary: cache_linesize
using HostCPUFeatures:
register_size,
static_sizeof,
fast_int64_to_double,
pick_vector_width,
pick_vector_width_shift,
prevpow2,
simd_integer_register_size,
fma_fast,
smax,
smin,
has_feature,
has_opmask_registers,
register_count,
static_sizeof,
cpu_name,
register_size,
unwrap,
intlog2,
nextpow2,
fast_half
using BitTwiddlingConvenienceFunctions: intlog2, nextpow2, prevpow2

import Base:
Float16,
Expand All @@ -74,36 +35,43 @@ import Base:
UInt16,
UInt32,
UInt64,
Bool
Bool

using SIMDTypes:
Bit,
FloatingTypes,
SignedHW,
UnsignedHW,
IntegerTypesHW,
NativeTypesExceptBitandFloat16,
NativeTypesExceptBit,
NativeTypesExceptFloat16,
NativeTypes,
_Vec
using LayoutPointers:
AbstractStridedPointer,
StridedPointer,
StridedBitPointer,
memory_reference,
stridedpointer,
zstridedpointer,
similar_no_offset,
similar_with_offset,
grouped_strided_pointer,
stridedpointers,
bytestrides,
DensePointerWrapper,
zero_offsets

using Static
using Static: One, Zero, eq, ne, lt, le, gt, ge
using Static:
Static,
One,
Zero,
eq,
lt,
le,
gt,
ge,
ne,
True,
False,
StaticBool,
StaticInt,
known,
static

@inline function promote(x::X, y::Y) where {X,Y}
T = promote_type(X, Y)
Expand Down Expand Up @@ -363,15 +331,15 @@ function Base.show(io::IO, v::AbstractSIMDVector{W,T}) where {W,T}
end
print(io, ">")
end
Base.bitstring(m::AbstractMask{W}) where {W} = bitstring(data(m))[end-W+1:end]
Base.bitstring(m::AbstractMask{W}) where {W} = bitstring(data(m))[(end-W+1):end]
function Base.show(io::IO, m::AbstractMask{W}) where {W}
bits = data(m)
if m isa EVLMask
print(io, "EVLMask{$W,Bit}<")
else
print(io, "Mask{$W,Bit}<")
end
for w ∈ 0:W-1
for w ∈ 0:(W-1)
print(io, (bits & 0x01) % Int)
bits >>= 0x01
w < W - 1 && print(io, ", ")
Expand All @@ -381,7 +349,7 @@ end
function Base.show(io::IO, vu::VecUnroll{N,W,T,V}) where {N,W,T,V}
println(io, "$(N+1) x $V")
d = data(vu)
for n = 1:N+1
for n = 1:(N+1)
show(io, d[n])
n > N || println(io)
end
Expand Down Expand Up @@ -508,20 +476,23 @@ demoteint(::Type{Int64}, W::StaticInt) = gt(W, pick_vector_width(Int64))
end
meta = Expr(:meta, :inline)
if VERSION >= v"1.8.0-beta"
purity = Expr(:purity,
#= consistent =# true,
#= effect_free =# true,
#= nothrow =# true,
#= terminates_globally =# true,
#= terminates_locally =# false)
purity = Expr(
:purity,
#= consistent =#true,
#= effect_free =#true,
#= nothrow =#true,
#= terminates_globally =#true,
#= terminates_locally =#false
)
if VERSION >= v"1.11"
push!(purity.args,
#= notaskstate =# true,
#= inaccessiblememonly =# true,
#= noub =# true,
#= noub_if_noinbounds =# false,
#= consistent_overlay =# false,
#= nortcall =# true,
push!(
purity.args,
#= notaskstate =#true,
#= inaccessiblememonly =#true,
#= noub =#true,
#= noub_if_noinbounds =#false,
#= consistent_overlay =#false,
#= nortcall =#true
)
end
push!(meta.args, purity)
Expand All @@ -537,9 +508,9 @@ function vec_quote(demote, W, Wpow2, offset::Int = 0)
iszero(offset) && push!(call.args, :y)
foreach(
w -> push!(call.args, Expr(:call, getfield, :x, w, false)),
max(1, offset):min(W, Wpow2)-1
max(1, offset):(min(W, Wpow2)-1)
)
foreach(w -> push!(call.args, Expr(:call, :zero, :T)), W+1:Wpow2)
foreach(w -> push!(call.args, Expr(:call, :zero, :T)), (W+1):Wpow2)
call
end
@generated function _vec(
Expand Down Expand Up @@ -578,7 +549,7 @@ else
end
end
@inline reduce_to_onevec(f::F, vu::VecUnroll) where {F} =
ArrayInterface.reduce_tup(f, data(vu))
Static.reduce_tup(f, data(vu))

if VERSION >= v"1.7.0" && hasfield(Method, :recursion_relation)
dont_limit = Returns(true)
Expand Down
30 changes: 1 addition & 29 deletions src/base_defs.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,3 @@
const FASTDICT = Dict{Symbol,Expr}([
:(+) => :(Base.FastMath.add_fast),
:(-) => :(Base.FastMath.sub_fast),
:(*) => :(Base.FastMath.mul_fast),
:(/) => :(Base.FastMath.div_fast),
:(÷) => :(VectorizationBase.vdiv_fast), # VectorizationBase.vdiv == integer, VectorizationBase.vfdiv == float
:(%) => :(Base.FastMath.rem_fast),
:abs2 => :(Base.FastMath.abs2_fast),
:inv => :(Base.FastMath.inv_fast), # this is slower in most benchmarks
:hypot => :(Base.FastMath.hypot_fast),
:max => :(Base.FastMath.max_fast),
:min => :(Base.FastMath.min_fast),
:muladd => :(VectorizationBase.vmuladd_fast),
:fma => :(VectorizationBase.vfma_fast),
:vfmadd => :(VectorizationBase.vfmadd_fast),
:vfnmadd => :(VectorizationBase.vfnmadd_fast),
:vfmsub => :(VectorizationBase.vfmsub_fast),
:vfnmsub => :(VectorizationBase.vfnmsub_fast),
:log => :(SLEEFPirates.log_fast),
:log2 => :(SLEEFPirates.log2_fast),
:log10 => :(SLEEFPirates.log10_fast),
:(^) => :(Base.FastMath.pow_fast)
])

for (op, f) ∈ [
(:(Base.:-), :vsub),
(:(Base.FastMath.sub_fast), :vsub_fast),
Expand Down Expand Up @@ -260,11 +236,7 @@ end
x, y = promote(a, b)
VecUnroll(fmap(ifelse, getfield(m, :data), unrolldata(x), unrolldata(y)))
end
@inline function IfElse.ifelse(
m::VecUnroll{<:Any,<:Any,Bool},
a::Real,
b::Real
)
@inline function IfElse.ifelse(m::VecUnroll{<:Any,<:Any,Bool}, a::Real, b::Real)
x, y = promote(a, b)
VecUnroll(fmap(ifelse, getfield(m, :data), unrolldata(x), unrolldata(y)))
end
Expand Down
2 changes: 1 addition & 1 deletion src/cartesianvindex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ struct CartesianVIndex{N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} <:
) where {N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} = new{N,T}(I)
end
Base.length(::CartesianVIndex{N}) where {N} = N
ArrayInterface.known_length(::Type{<:CartesianVIndex{N}}) where {N} = N
StaticArrayInterface.known_length(::Type{<:CartesianVIndex{N}}) where {N} = N
Base.Tuple(i::CartesianVIndex) = getfield(i, :I)
function Base.:(:)(I::CartesianVIndex{N}, J::CartesianVIndex{N}) where {N}
CartesianIndices(map((i, j) -> i:j, getfield(I, :I), getfield(J, :I)))
Expand Down
2 changes: 1 addition & 1 deletion src/early_definitions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ end
end

@inline integer_preference(::StaticInt{B}) where {B} =
ifelse(ArrayInterface.ge(StaticInt{B}(), StaticInt{8}()), Int, Int32)
ifelse(ge(StaticInt{B}(), StaticInt{8}()), Int, Int32)

@inline pick_integer(::Union{StaticInt{W},Val{W}}) where {W} =
integer_preference(simd_integer_register_size() ÷ StaticInt{W}())
Expand Down
10 changes: 4 additions & 6 deletions src/lazymul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,7 @@ end
if iszero(r)
quote
$(Expr(:meta, :inline))
p,
VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
p, LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
end
else
quote
Expand All @@ -451,8 +450,7 @@ end
if iszero(r)
quote
$(Expr(:meta, :inline))
p,
VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
p, LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
end
else
quote
Expand All @@ -479,7 +477,7 @@ end
if iszero(r)
quote
$(Expr(:meta, :inline))
VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
end
else
quote
Expand Down Expand Up @@ -509,7 +507,7 @@ end
if iszero(r)
quote
$(Expr(:meta, :inline))
VectorizationBase.LazyMulAdd{$N,$(I * M)}(-MM{$W,$d}(getfield(b, :data)))
LazyMulAdd{$N,$(I * M)}(-MM{$W,$d}(getfield(b, :data)))
end
else
quote
Expand Down
2 changes: 1 addition & 1 deletion src/llvm_intrin/binary_ops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ end
v2::AbstractSIMD{W,T}
) where {W,T<:FloatingTypes} = trunc(vfdiv_fast(v1, v2))
@inline vdiv_fast(v1::T, v2::T) where {T<:FloatingTypes} =
trunc(Base.FastMath.div_float_fast(v1, v2))
trunc(Core.Intrinsics.div_float_fast(v1, v2))
@inline vdiv_fast(v1::T, v2::T) where {T<:Number} = v1 ÷ v2
@inline vdiv(v1::T, v2::T) where {T<:Number} = v1 ÷ v2
@inline vdiv(v1::T, v2::T) where {T<:FloatingTypes} = vdiv_fast(v1, v2)
Expand Down
Loading