@@ -205,6 +205,9 @@ const SIMDInt = Union{
205205 }
206206const SIMDType = Union{SIMDFloat, SIMDInt}
207207
208+ # This may not be a sharp bound, but at least people won't get worse result.
209+ const HAS_FLEXIABLE_VECTOR_LENGTH = VERSION >= v " 1.6"
210+
208211function julia_type_to_llvm_type (@nospecialize (T:: DataType ))
209212 T === Float64 ? " double" :
210213 T === Float32 ? " float" :
@@ -217,7 +220,7 @@ function julia_type_to_llvm_type(@nospecialize(T::DataType))
217220end
218221
219222@generated function scale_tuple (tup:: NTuple{N,T} , x:: S ) where {N,T,S}
220- if ! (T === S && S <: SIMDType )
223+ if ! (HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType )
221224 return tupexpr (i -> :(tup[$ i] * x), N)
222225 end
223226
239242end
240243
241244@generated function div_tuple_by_scalar (tup:: NTuple{N,T} , x:: S ) where {N,T,S}
242- if ! (T === S === typeof (one (T) / one (S)) && S <: SIMDType )
245+ if ! (HAS_FLEXIABLE_VECTOR_LENGTH && T === S === typeof (one (T) / one (S)) && S <: SIMDType )
243246 return tupexpr (i -> :(tup[$ i] / x), N)
244247 end
245248
261264end
262265
263266@generated function add_tuples (a:: NTuple{N,T} , b:: NTuple{N,S} ) where {N,T,S}
264- if ! (T === S && S <: SIMDType )
267+ if ! (HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType )
265268 return tupexpr (i -> :(a[$ i] + b[$ i]), N)
266269 end
267270
281284end
282285
283286@generated function sub_tuples (a:: NTuple{N,T} , b:: NTuple{N,S} ) where {N,T,S}
284- if ! (T === S && S <: SIMDType )
287+ if ! (HAS_FLEXIABLE_VECTOR_LENGTH && T === S && S <: SIMDType )
285288 return tupexpr (i -> :(a[$ i] - b[$ i]), N)
286289 end
287290
@@ -300,38 +303,32 @@ end
300303 end
301304end
302305
303- if VERSION >= v " 1.4" # fsub requires LLVM 8 (Julia 1.4)
304- @generated function minus_tuple (tup:: NTuple{N,T} ) where {N,T}
305- T <: SIMDType || return tupexpr (i -> :(- tup[$ i]), N)
306-
307- S = julia_type_to_llvm_type (T)
308- VT = NTuple{N, VecElement{T}}
309- if T <: SIMDFloat
310- llvmir = """
311- %res = fneg nsz contract <$N x $S > %0
312- ret <$N x $S > %res
313- """
314- else
315- llvmir = """
316- %res = sub <$N x $S > zeroinitializer, %0
317- ret <$N x $S > %res
318- """
319- end
320-
321- quote
322- $ (Expr (:meta , :inline ))
323- ret = Base. llvmcall ($ llvmir, $ VT, Tuple{$ VT}, $ VT (tup))
324- Base. @ntuple $ N i-> ret[i]. value
325- end
306+ @generated function minus_tuple (tup:: NTuple{N,T} ) where {N,T}
307+ (HAS_FLEXIABLE_VECTOR_LENGTH && T <: SIMDType ) || return tupexpr (i -> :(- tup[$ i]), N)
308+
309+ S = julia_type_to_llvm_type (T)
310+ VT = NTuple{N, VecElement{T}}
311+ if T <: SIMDFloat
312+ llvmir = """
313+ %res = fneg nsz contract <$N x $S > %0
314+ ret <$N x $S > %res
315+ """
316+ else
317+ llvmir = """
318+ %res = sub <$N x $S > zeroinitializer, %0
319+ ret <$N x $S > %res
320+ """
326321 end
327- else
328- @generated function minus_tuple (tup:: NTuple{N,T} ) where {N,T}
329- return tupexpr (i -> :(- tup[$ i]), N)
322+
323+ quote
324+ $ (Expr (:meta , :inline ))
325+ ret = Base. llvmcall ($ llvmir, $ VT, Tuple{$ VT}, $ VT (tup))
326+ Base. @ntuple $ N i-> ret[i]. value
330327 end
331328end
332329
333330@generated function mul_tuples (a:: NTuple{N,V1} , b:: NTuple{N,V2} , afactor:: S1 , bfactor:: S2 ) where {N,V1,V2,S1,S2}
334- if ! (V1 === V2 === S1 === S2 && S2 <: SIMDFloat )
331+ if ! (HAS_FLEXIABLE_VECTOR_LENGTH && V1 === V2 === S1 === S2 && S2 <: SIMDFloat )
335332 return tupexpr (i -> :((afactor * a[$ i]) + (bfactor * b[$ i])), N)
336333 end
337334
0 commit comments