From a348cec8f5c79e89a542fcb60aa7bd6f4d8b54ca Mon Sep 17 00:00:00 2001 From: Yingbo Ma Date: Thu, 18 Nov 2021 22:45:57 -0500 Subject: [PATCH 1/4] Failed attempts --- Project.toml | 1 + src/ForwardDiff.jl | 1 + src/partials.jl | 167 ++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 158 insertions(+), 11 deletions(-) diff --git a/Project.toml b/Project.toml index 3318b46d..93b0d8a0 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] Calculus = "0.2, 0.3, 0.4, 0.5" diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl index 93d3b246..6affc113 100644 --- a/src/ForwardDiff.jl +++ b/src/ForwardDiff.jl @@ -8,6 +8,7 @@ if VERSION >= v"1.6" end using Random using LinearAlgebra +using VectorizationBase import Printf import NaNMath diff --git a/src/partials.jl b/src/partials.jl index fce67b0a..b60ffa68 100644 --- a/src/partials.jl +++ b/src/partials.jl @@ -197,30 +197,175 @@ end return tupexpr(i -> :(rand(V)), N) end -@generated function scale_tuple(tup::NTuple{N}, x) where N - return tupexpr(i -> :(tup[$i] * x), N) +const SIMDFloat = Union{Float64, Float32} +const SIMDInt = Union{ + Int128, Int64, Int32, Int16, Int8, + UInt128, UInt64, UInt32, UInt16, UInt8, + Bool + } +const SIMDType = Union{SIMDFloat, SIMDInt} + +function julia_type_to_llvm_type(@nospecialize(T::DataType)) + T === Float64 ? "double" : + T === Float32 ? "float" : + T <: Union{Int128,UInt128} ? "i128" : + T <: Union{Int64,UInt64} ? "i64" : + T <: Union{Int32,UInt32} ? "i32" : + T <: Union{Int16,UInt16} ? "i16" : + T <: Union{Bool,Int8,UInt8} ? "i8" : + error("$T cannot be mapped to a LLVM type") end -@generated function div_tuple_by_scalar(tup::NTuple{N}, x) where N - return tupexpr(i -> :(tup[$i] / x), N) +function llvmir_scalar_to_vec(@nospecialize(T::DataType), n::Int, vname::String) + S = julia_type_to_llvm_type(T) + el = string("ele", vname) + """ + %$el = insertelement <$n x $S> undef, $S %0, i32 0 + %$vname = shufflevector <$n x $S> %$el, <$n x $S> undef, <$n x i32> zeroinitializer + """ end -@generated function add_tuples(a::NTuple{N}, b::NTuple{N}) where N - return tupexpr(i -> :(a[$i] + b[$i]), N) +@generated function scale_tuple(tup::NTuple{N,T1}, x::S1) where {N,T1,S1} + (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(tup[$i] * x), N) + + T = promote_type(T1, S1) + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + op = T <: SIMDFloat ? "fmul nsz contract" : "mul" + llvmir = """ + %el = insertelement <$N x $S> undef, $S %1, i32 0 + %vx = shufflevector <$N x $S> %el, <$N x $S> undef, <$N x i32> zeroinitializer + %res = $op <$N x $S> %0, %vx + ret <$N x $S> %res + """ + + quote + $(Expr(:meta, :inline)) + t = Base.@ntuple $N i->$T(tup[i]) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(t), $T(x)) + Base.@ntuple $N i->ret[i].value + end +end + +@generated function div_tuple_by_scalar(tup::NTuple{N,T1}, x::S1) where {N,T1,S1} + (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(tup[$i] / x), N) + + T = typeof(one(T1) / one(S1)) + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + op = T <: SIMDFloat ? "fdiv nsz contract" : "div" + llvmir = """ + %el = insertelement <$N x $S> undef, $S %1, i32 0 + %vx = shufflevector <$N x $S> %el, <$N x $S> undef, <$N x i32> zeroinitializer + %res = $op <$N x $S> %0, %vx + ret <$N x $S> %res + """ + + quote + $(Expr(:meta, :inline)) + t = Base.@ntuple $N i->$T(tup[i]) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(t), $T(x)) + Base.@ntuple $N i->ret[i].value + end +end + +@generated function add_tuples(a::NTuple{N,T1}, b::NTuple{N,S1}) where {N,T1,S1} + (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(a[$i] + b[$i]), N) + + T = promote_type(T1, S1) + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + op = T <: SIMDFloat ? "fadd nsz contract" : "add" + llvmir = """ + %res = $op <$N x $S> %0, %1 + ret <$N x $S> %res + """ + + quote + $(Expr(:meta, :inline)) + at = Base.@ntuple $N i->$T(a[i]) + bt = Base.@ntuple $N i->$T(b[i]) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(at), $VT(bt)) + Base.@ntuple $N i->ret[i].value + end end -@generated function sub_tuples(a::NTuple{N}, b::NTuple{N}) where N - return tupexpr(i -> :(a[$i] - b[$i]), N) +@generated function sub_tuples(a::NTuple{N,T1}, b::NTuple{N,S1}) where {N,T1,S1} + (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(a[$i] - b[$i]), N) + + T = promote_type(T1, S1) + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + op = T <: SIMDFloat ? "fsub nsz contract" : "sub" + llvmir = """ + %res = $op <$N x $S> %0, %1 + ret <$N x $S> %res + """ + + quote + $(Expr(:meta, :inline)) + at = Base.@ntuple $N i->$T(a[i]) + bt = Base.@ntuple $N i->$T(b[i]) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(at), $VT(bt)) + Base.@ntuple $N i->ret[i].value + end end -@generated function minus_tuple(tup::NTuple{N}) where N - return tupexpr(i -> :(-tup[$i]), N) +@generated function minus_tuple(tup::NTuple{N,T}) where {N,T} + T <: SIMDType || return tupexpr(i -> :(-tup[$i]), N) + + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + op = T <: SIMDFloat ? "fneg nsz contract" : "sub" + llvmir = """ + %res = $op <$N x $S> %0 + ret <$N x $S> %res + """ + + quote + $(Expr(:meta, :inline)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT}, $VT(tup)) + Base.@ntuple $N i->ret[i].value + end end -@generated function mul_tuples(a::NTuple{N}, b::NTuple{N}, afactor, bfactor) where N +@generated function mul_tuples(a::NTuple{N,V1}, b::NTuple{N,V2}, afactor::S1, bfactor::S2) where {N,V1,V2,S1,S2} return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N) end +#= +@inline function scale_tuple(tup::NTuple{N,T}, x) where {N,T<:SIMDType} + Tuple(Vec{N,T}(tup...) * x) +end + +@inline function div_tuple_by_scalar(tup::NTuple{N,T}, x) where {N,T<:SIMDType} + Tuple(Vec{N,T}(tup...) / x) +end + +@inline function add_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T<:SIMDType,S<:SIMDType} + va = Vec{N,T}(a...) + vb = Vec{N,S}(b...) + return Tuple(va + vb) +end + +@inline function sub_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T<:SIMDType,S<:SIMDType} + va = Vec{N,T}(a...) + vb = Vec{N,S}(b...) + return Tuple(va - vb) +end + +@inline function minus_tuple(a::NTuple{N,T}) where {N,T<:SIMDType} + va = Vec{N,T}(a...) + return Tuple(-va) +end + +@inline function mul_tuples(a::NTuple{N,T}, b::NTuple{N,S}, afactor::SIMDType, bfactor::SIMDType) where {N,T<:SIMDType,S<:SIMDType} + va = Vec{N,T}(a...) + vb = Vec{N,S}(b...) + return Tuple(muladd(afactor, va, bfactor * vb)) +end +=# + ################### # Pretty Printing # ################### From 0a5df539b65cabb7fd330995b32c7cb12970e20b Mon Sep 17 00:00:00 2001 From: Yingbo Ma Date: Thu, 18 Nov 2021 23:30:03 -0500 Subject: [PATCH 2/4] Slightly better SIMD criterion --- Project.toml | 1 - src/ForwardDiff.jl | 1 - src/partials.jl | 125 ++++++++++++++++++++----------------------- test/PartialsTest.jl | 7 ++- 4 files changed, 63 insertions(+), 71 deletions(-) diff --git a/Project.toml b/Project.toml index 93b0d8a0..3318b46d 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] Calculus = "0.2, 0.3, 0.4, 0.5" diff --git a/src/ForwardDiff.jl b/src/ForwardDiff.jl index 6affc113..93d3b246 100644 --- a/src/ForwardDiff.jl +++ b/src/ForwardDiff.jl @@ -8,7 +8,6 @@ if VERSION >= v"1.6" end using Random using LinearAlgebra -using VectorizationBase import Printf import NaNMath diff --git a/src/partials.jl b/src/partials.jl index b60ffa68..95d1dc4c 100644 --- a/src/partials.jl +++ b/src/partials.jl @@ -216,19 +216,11 @@ function julia_type_to_llvm_type(@nospecialize(T::DataType)) error("$T cannot be mapped to a LLVM type") end -function llvmir_scalar_to_vec(@nospecialize(T::DataType), n::Int, vname::String) - S = julia_type_to_llvm_type(T) - el = string("ele", vname) - """ - %$el = insertelement <$n x $S> undef, $S %0, i32 0 - %$vname = shufflevector <$n x $S> %$el, <$n x $S> undef, <$n x i32> zeroinitializer - """ -end - -@generated function scale_tuple(tup::NTuple{N,T1}, x::S1) where {N,T1,S1} - (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(tup[$i] * x), N) +@generated function scale_tuple(tup::NTuple{N,T}, x::S) where {N,T,S} + if !(T === S && S <: SIMDType) + return tupexpr(i -> :(tup[$i] * x), N) + end - T = promote_type(T1, S1) S = julia_type_to_llvm_type(T) VT = NTuple{N, VecElement{T}} op = T <: SIMDFloat ? "fmul nsz contract" : "mul" @@ -241,16 +233,16 @@ end quote $(Expr(:meta, :inline)) - t = Base.@ntuple $N i->$T(tup[i]) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(t), $T(x)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(tup), x) Base.@ntuple $N i->ret[i].value end end -@generated function div_tuple_by_scalar(tup::NTuple{N,T1}, x::S1) where {N,T1,S1} - (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(tup[$i] / x), N) +@generated function div_tuple_by_scalar(tup::NTuple{N,T}, x::S) where {N,T,S} + if !(T === S === typeof(one(T) / one(S)) && S <: SIMDType) + return tupexpr(i -> :(tup[$i] / x), N) + end - T = typeof(one(T1) / one(S1)) S = julia_type_to_llvm_type(T) VT = NTuple{N, VecElement{T}} op = T <: SIMDFloat ? "fdiv nsz contract" : "div" @@ -263,16 +255,16 @@ end quote $(Expr(:meta, :inline)) - t = Base.@ntuple $N i->$T(tup[i]) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(t), $T(x)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $T}, $VT(tup), x) Base.@ntuple $N i->ret[i].value end end -@generated function add_tuples(a::NTuple{N,T1}, b::NTuple{N,S1}) where {N,T1,S1} - (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(a[$i] + b[$i]), N) +@generated function add_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T,S} + if !(T === S && S <: SIMDType) + return tupexpr(i -> :(a[$i] + b[$i]), N) + end - T = promote_type(T1, S1) S = julia_type_to_llvm_type(T) VT = NTuple{N, VecElement{T}} op = T <: SIMDFloat ? "fadd nsz contract" : "add" @@ -283,17 +275,16 @@ end quote $(Expr(:meta, :inline)) - at = Base.@ntuple $N i->$T(a[i]) - bt = Base.@ntuple $N i->$T(b[i]) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(at), $VT(bt)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(a), $VT(b)) Base.@ntuple $N i->ret[i].value end end -@generated function sub_tuples(a::NTuple{N,T1}, b::NTuple{N,S1}) where {N,T1,S1} - (T1 <: SIMDType && S1 <: SIMDType) || return tupexpr(i -> :(a[$i] - b[$i]), N) +@generated function sub_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T,S} + if !(T === S && S <: SIMDType) + return tupexpr(i -> :(a[$i] - b[$i]), N) + end - T = promote_type(T1, S1) S = julia_type_to_llvm_type(T) VT = NTuple{N, VecElement{T}} op = T <: SIMDFloat ? "fsub nsz contract" : "sub" @@ -304,9 +295,7 @@ end quote $(Expr(:meta, :inline)) - at = Base.@ntuple $N i->$T(a[i]) - bt = Base.@ntuple $N i->$T(b[i]) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(at), $VT(bt)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT, $VT}, $VT(a), $VT(b)) Base.@ntuple $N i->ret[i].value end end @@ -316,11 +305,17 @@ end S = julia_type_to_llvm_type(T) VT = NTuple{N, VecElement{T}} - op = T <: SIMDFloat ? "fneg nsz contract" : "sub" - llvmir = """ - %res = $op <$N x $S> %0 - ret <$N x $S> %res - """ + if T <: SIMDFloat + llvmir = """ + %res = fneg nsz contract <$N x $S> %0 + ret <$N x $S> %res + """ + else + llvmir = """ + %res = sub <$N x $S> zeroinitializer, %0 + ret <$N x $S> %res + """ + end quote $(Expr(:meta, :inline)) @@ -330,41 +325,35 @@ end end @generated function mul_tuples(a::NTuple{N,V1}, b::NTuple{N,V2}, afactor::S1, bfactor::S2) where {N,V1,V2,S1,S2} - return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N) -end - -#= -@inline function scale_tuple(tup::NTuple{N,T}, x) where {N,T<:SIMDType} - Tuple(Vec{N,T}(tup...) * x) -end - -@inline function div_tuple_by_scalar(tup::NTuple{N,T}, x) where {N,T<:SIMDType} - Tuple(Vec{N,T}(tup...) / x) -end - -@inline function add_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T<:SIMDType,S<:SIMDType} - va = Vec{N,T}(a...) - vb = Vec{N,S}(b...) - return Tuple(va + vb) -end - -@inline function sub_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T<:SIMDType,S<:SIMDType} - va = Vec{N,T}(a...) - vb = Vec{N,S}(b...) - return Tuple(va - vb) -end + if !(V1 === V2 === S1 === S2 && S2 <: SIMDFloat) + return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N) + end -@inline function minus_tuple(a::NTuple{N,T}) where {N,T<:SIMDType} - va = Vec{N,T}(a...) - return Tuple(-va) -end + T = V1 + S = julia_type_to_llvm_type(T) + fmuladd = "@llvm.fmuladd.v$(N)f$(sizeof(T)*8)" -@inline function mul_tuples(a::NTuple{N,T}, b::NTuple{N,S}, afactor::SIMDType, bfactor::SIMDType) where {N,T<:SIMDType,S<:SIMDType} - va = Vec{N,T}(a...) - vb = Vec{N,S}(b...) - return Tuple(muladd(afactor, va, bfactor * vb)) + VT = NTuple{N, VecElement{T}} + llvmir = """ + declare <$N x $S> $fmuladd(<$N x $S>, <$N x $S>, <$N x $S>) + + define <$N x $S> @entry(<$N x $S>, <$N x $S>, $S, $S) alwaysinline { + top: + %el1 = insertelement <$N x $S> undef, $S %2, i32 0 + %afactor = shufflevector <$N x $S> %el1, <$N x $S> undef, <$N x i32> zeroinitializer + %el2 = insertelement <$N x $S> undef, $S %3, i32 0 + %bfactor = shufflevector <$N x $S> %el2, <$N x $S> undef, <$N x i32> zeroinitializer + %tmp = fmul nsz contract <$N x $S> %1, %bfactor + %res = call nsz contract <$N x $S> $fmuladd(<$N x $S> %0, <$N x $S> %afactor, <$N x $S> %tmp) + ret <$N x $S> %res + } + """ + quote + $(Expr(:meta, :inline)) + ret = Base.llvmcall(($llvmir, "entry"), $VT, Tuple{$VT, $VT, $T, $T}, $VT(a), $VT(b), afactor, bfactor) + Base.@ntuple $N i->ret[i].value + end end -=# ################### # Pretty Printing # diff --git a/test/PartialsTest.jl b/test/PartialsTest.jl index 39fb05d7..84320446 100644 --- a/test/PartialsTest.jl +++ b/test/PartialsTest.jl @@ -7,6 +7,10 @@ using ForwardDiff: Partials samerng() = MersenneTwister(1) +approx_tuple(x, y) = all(zip(x, y)) do (a, b) + a ≈ b +end + for N in (0, 3), T in (Int, Float32, Float64) println(" ...testing Partials{$N,$T}") @@ -114,7 +118,8 @@ for N in (0, 3), T in (Int, Float32, Float64) if N > 0 @test ForwardDiff._div_partials(PARTIALS, PARTIALS2, X, Y) == ForwardDiff._mul_partials(PARTIALS, PARTIALS2, inv(Y), -X/(Y^2)) - @test ForwardDiff._mul_partials(PARTIALS, PARTIALS2, X, Y).values == map((a, b) -> (X * a) + (Y * b), VALUES, VALUES2) + # FMA + @test approx_tuple(ForwardDiff._mul_partials(PARTIALS, PARTIALS2, X, Y).values, map((a, b) -> (X * a) + (Y * b), VALUES, VALUES2)) @test ForwardDiff._mul_partials(ZERO_PARTIALS, PARTIALS, X, Y) == Y * PARTIALS @test ForwardDiff._mul_partials(PARTIALS, ZERO_PARTIALS, X, Y) == X * PARTIALS From 0068d28a5c9928c33aa03b8570819b967dda2797 Mon Sep 17 00:00:00 2001 From: Yingbo Ma Date: Fri, 19 Nov 2021 00:18:56 -0500 Subject: [PATCH 3/4] Julia version bound for `fneg` --- src/partials.jl | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/partials.jl b/src/partials.jl index 95d1dc4c..7eaa7d91 100644 --- a/src/partials.jl +++ b/src/partials.jl @@ -300,27 +300,33 @@ end end end -@generated function minus_tuple(tup::NTuple{N,T}) where {N,T} - T <: SIMDType || return tupexpr(i -> :(-tup[$i]), N) - - S = julia_type_to_llvm_type(T) - VT = NTuple{N, VecElement{T}} - if T <: SIMDFloat - llvmir = """ - %res = fneg nsz contract <$N x $S> %0 - ret <$N x $S> %res - """ - else - llvmir = """ - %res = sub <$N x $S> zeroinitializer, %0 - ret <$N x $S> %res - """ +if VERSION >= v"1.4" # fsub requires LLVM 8 (Julia 1.4) + @generated function minus_tuple(tup::NTuple{N,T}) where {N,T} + T <: SIMDType || return tupexpr(i -> :(-tup[$i]), N) + + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + if T <: SIMDFloat + llvmir = """ + %res = fneg nsz contract <$N x $S> %0 + ret <$N x $S> %res + """ + else + llvmir = """ + %res = sub <$N x $S> zeroinitializer, %0 + ret <$N x $S> %res + """ + end + + quote + $(Expr(:meta, :inline)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT}, $VT(tup)) + Base.@ntuple $N i->ret[i].value + end end - - quote - $(Expr(:meta, :inline)) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT}, $VT(tup)) - Base.@ntuple $N i->ret[i].value +else + @generated function minus_tuple(tup::NTuple{N,T}) where {N,T} + return tupexpr(i -> :(-tup[$i]), N) end end From e50d653d1d800b634979da566f08f113166deb1b Mon Sep 17 00:00:00 2001 From: Yingbo Ma Date: Fri, 19 Nov 2021 00:27:29 -0500 Subject: [PATCH 4/4] Julia 1.0 doesn't get nice things --- src/partials.jl | 59 +++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/src/partials.jl b/src/partials.jl index 7eaa7d91..0325ea75 100644 --- a/src/partials.jl +++ b/src/partials.jl @@ -205,6 +205,9 @@ const SIMDInt = Union{ } const SIMDType = Union{SIMDFloat, SIMDInt} +# This may not be a sharp bound, but at least people won't get worse result. +const HAS_FLEXIABLE_VECTOR_LENGTH = VERSION >= v"1.6" + function julia_type_to_llvm_type(@nospecialize(T::DataType)) T === Float64 ? "double" : T === Float32 ? "float" : @@ -217,7 +220,7 @@ function julia_type_to_llvm_type(@nospecialize(T::DataType)) end @generated function scale_tuple(tup::NTuple{N,T}, x::S) where {N,T,S} - if !(T === S && S <: SIMDType) + if !(HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType) return tupexpr(i -> :(tup[$i] * x), N) end @@ -239,7 +242,7 @@ end end @generated function div_tuple_by_scalar(tup::NTuple{N,T}, x::S) where {N,T,S} - if !(T === S === typeof(one(T) / one(S)) && S <: SIMDType) + if !(HAS_FLEXIABLE_VECTOR_LENGTH && T === S === typeof(one(T) / one(S)) && S <: SIMDType) return tupexpr(i -> :(tup[$i] / x), N) end @@ -261,7 +264,7 @@ end end @generated function add_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T,S} - if !(T === S && S <: SIMDType) + if !(HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType) return tupexpr(i -> :(a[$i] + b[$i]), N) end @@ -281,7 +284,7 @@ end end @generated function sub_tuples(a::NTuple{N,T}, b::NTuple{N,S}) where {N,T,S} - if !(T === S && S <: SIMDType) + if !(HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType) return tupexpr(i -> :(a[$i] - b[$i]), N) end @@ -300,38 +303,32 @@ end end end -if VERSION >= v"1.4" # fsub requires LLVM 8 (Julia 1.4) - @generated function minus_tuple(tup::NTuple{N,T}) where {N,T} - T <: SIMDType || return tupexpr(i -> :(-tup[$i]), N) - - S = julia_type_to_llvm_type(T) - VT = NTuple{N, VecElement{T}} - if T <: SIMDFloat - llvmir = """ - %res = fneg nsz contract <$N x $S> %0 - ret <$N x $S> %res - """ - else - llvmir = """ - %res = sub <$N x $S> zeroinitializer, %0 - ret <$N x $S> %res - """ - end - - quote - $(Expr(:meta, :inline)) - ret = Base.llvmcall($llvmir, $VT, Tuple{$VT}, $VT(tup)) - Base.@ntuple $N i->ret[i].value - end +@generated function minus_tuple(tup::NTuple{N,T}) where {N,T} + (HAS_FLEXIABLE_VECTOR_LENGTH && T <: SIMDType) || return tupexpr(i -> :(-tup[$i]), N) + + S = julia_type_to_llvm_type(T) + VT = NTuple{N, VecElement{T}} + if T <: SIMDFloat + llvmir = """ + %res = fneg nsz contract <$N x $S> %0 + ret <$N x $S> %res + """ + else + llvmir = """ + %res = sub <$N x $S> zeroinitializer, %0 + ret <$N x $S> %res + """ end -else - @generated function minus_tuple(tup::NTuple{N,T}) where {N,T} - return tupexpr(i -> :(-tup[$i]), N) + + quote + $(Expr(:meta, :inline)) + ret = Base.llvmcall($llvmir, $VT, Tuple{$VT}, $VT(tup)) + Base.@ntuple $N i->ret[i].value end end @generated function mul_tuples(a::NTuple{N,V1}, b::NTuple{N,V2}, afactor::S1, bfactor::S2) where {N,V1,V2,S1,S2} - if !(V1 === V2 === S1 === S2 && S2 <: SIMDFloat) + if !(HAS_FLEXIABLE_VECTOR_LENGTH && V1 === V2 === S1 === S2 && S2 <: SIMDFloat) return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N) end