From ad09dec86750558dc8c0a9c90296dfd3eb6ec183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20M=C3=BCller-Widmann?=
 <devmotion@users.noreply.github.com>
Date: Sun, 19 Oct 2025 08:52:55 -0600
Subject: [PATCH] Fix implicit imports

---
 Project.toml                                |   4 +-
 docs/make.jl                                |   2 +-
 src/VectorizationBase.jl                    | 113 ++++------
 src/base_defs.jl                            |  30 +--
 src/cartesianvindex.jl                      |   2 +-
 src/early_definitions.jl                    |   2 +-
 src/lazymul.jl                              |  10 +-
 src/llvm_intrin/binary_ops.jl               |   2 +-
 src/llvm_intrin/conversion.jl               |  46 +++-
 src/llvm_intrin/intrin_funcs.jl             |  22 +-
 src/llvm_intrin/masks.jl                    |   2 +-
 src/llvm_intrin/memory_addr.jl              | 143 ++++++-------
 src/llvm_intrin/vector_ops.jl               |  62 +++---
 src/llvm_types.jl                           |  24 ++-
 src/promotion.jl                            |   4 +-
 src/ranges.jl                               |  24 +--
 src/special/double.jl                       |  33 ++-
 src/special/exp.jl                          |  17 +-
 src/special/misc.jl                         |   4 +-
 src/static.jl                               |   4 +-
 src/strided_pointers/cse_stridemultiples.jl |   5 +-
 src/strided_pointers/stridedpointers.jl     |   6 +-
 src/vecunroll/fmap.jl                       |   4 +-
 src/vecunroll/memory.jl                     |  53 +++--
 test/Project.toml                           |   8 +-
 test/accuracy.jl                            |   4 +-
 test/runtests.jl                            | 220 +++++++++++++-------
 test/testsetup.jl                           |   4 +-
 28 files changed, 419 insertions(+), 435 deletions(-)

diff --git a/Project.toml b/Project.toml
index 0a4cfd56..092a99a1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,11 @@
 name = "VectorizationBase"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
 authors = ["Chris Elrod <elrodc@gmail.com>"]
-version = "0.21.72"
+version = "0.21.73"
 
 [deps]
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b"
 CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
 HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
@@ -17,6 +18,7 @@ StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"
 
 [compat]
 ArrayInterface = "7"
+BitTwiddlingConvenienceFunctions = "0.1.6"
 CPUSummary = "0.1.1 - 0.1.8, 0.1.11, 0.2"
 HostCPUFeatures = "0.1"
 IfElse = "0.1"
diff --git a/docs/make.jl b/docs/make.jl
index 15edee97..1d11f9eb 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,7 +10,7 @@ makedocs(;
     prettyurls = get(ENV, "CI", "false") == "true",
     canonical = "https://JuliaSIMD.github.io/VectorizationBase.jl"
   ),
-  pages = ["Home" => "index.md"],
+  pages = ["Home" => "index.md"]
 )
 
 deploydocs(; repo = "github.com/JuliaSIMD/VectorizationBase.jl")
diff --git a/src/VectorizationBase.jl b/src/VectorizationBase.jl
index 0e0fe0e5..918979e8 100644
--- a/src/VectorizationBase.jl
+++ b/src/VectorizationBase.jl
@@ -6,61 +6,22 @@ end
 import StaticArrayInterface, LinearAlgebra, Libdl, IfElse, LayoutPointers
 const ArrayInterface = StaticArrayInterface
 using StaticArrayInterface:
-  contiguous_axis,
-  contiguous_axis_indicator,
-  contiguous_batch_size,
-  stride_rank,
-  device,
-  CPUPointer,
-  CPUIndex,
-  known_length,
-  known_first,
-  known_last,
-  static_size,
-  static_strides,
-  offsets,
-  static_first,
-  static_last,
-  static_length
+  stride_rank, static_strides, offsets, static_first, static_last, static_length
 import IfElse: ifelse
 
-using CPUSummary:
-  cache_type,
-  num_cache,
-  num_cache_levels,
-  num_cores,
-  num_l1cache,
-  num_l2cache,
-  cache_associativity,
-  num_l3cache,
-  sys_threads,
-  cache_inclusive,
-  num_l4cache,
-  cache_linesize,
-  num_machines,
-  cache_size,
-  num_sockets
+using CPUSummary: cache_linesize
 using HostCPUFeatures:
   register_size,
   static_sizeof,
-  fast_int64_to_double,
   pick_vector_width,
-  pick_vector_width_shift,
-  prevpow2,
   simd_integer_register_size,
   fma_fast,
   smax,
-  smin,
   has_feature,
-  has_opmask_registers,
-  register_count,
-  static_sizeof,
   cpu_name,
   register_size,
-  unwrap,
-  intlog2,
-  nextpow2,
   fast_half
+using BitTwiddlingConvenienceFunctions: intlog2, nextpow2, prevpow2
 
 import Base:
   Float16,
@@ -74,7 +35,7 @@ import Base:
   UInt16,
   UInt32,
   UInt64,
-  Bool 
+  Bool
 
 using SIMDTypes:
   Bit,
@@ -82,28 +43,35 @@ using SIMDTypes:
   SignedHW,
   UnsignedHW,
   IntegerTypesHW,
-  NativeTypesExceptBitandFloat16,
   NativeTypesExceptBit,
   NativeTypesExceptFloat16,
   NativeTypes,
   _Vec
 using LayoutPointers:
   AbstractStridedPointer,
-  StridedPointer,
   StridedBitPointer,
-  memory_reference,
   stridedpointer,
-  zstridedpointer,
   similar_no_offset,
   similar_with_offset,
-  grouped_strided_pointer,
-  stridedpointers,
   bytestrides,
-  DensePointerWrapper,
   zero_offsets
 
-using Static
-using Static: One, Zero, eq, ne, lt, le, gt, ge
+using Static:
+  Static,
+  One,
+  Zero,
+  eq,
+  lt,
+  le,
+  gt,
+  ge,
+  ne,
+  True,
+  False,
+  StaticBool,
+  StaticInt,
+  known,
+  static
 
 @inline function promote(x::X, y::Y) where {X,Y}
   T = promote_type(X, Y)
@@ -363,7 +331,7 @@ function Base.show(io::IO, v::AbstractSIMDVector{W,T}) where {W,T}
   end
   print(io, ">")
 end
-Base.bitstring(m::AbstractMask{W}) where {W} = bitstring(data(m))[end-W+1:end]
+Base.bitstring(m::AbstractMask{W}) where {W} = bitstring(data(m))[(end-W+1):end]
 function Base.show(io::IO, m::AbstractMask{W}) where {W}
   bits = data(m)
   if m isa EVLMask
@@ -371,7 +339,7 @@ function Base.show(io::IO, m::AbstractMask{W}) where {W}
   else
     print(io, "Mask{$W,Bit}<")
   end
-  for w ∈ 0:W-1
+  for w ∈ 0:(W-1)
     print(io, (bits & 0x01) % Int)
     bits >>= 0x01
     w < W - 1 && print(io, ", ")
@@ -381,7 +349,7 @@ end
 function Base.show(io::IO, vu::VecUnroll{N,W,T,V}) where {N,W,T,V}
   println(io, "$(N+1) x $V")
   d = data(vu)
-  for n = 1:N+1
+  for n = 1:(N+1)
     show(io, d[n])
     n > N || println(io)
   end
@@ -508,20 +476,23 @@ demoteint(::Type{Int64}, W::StaticInt) = gt(W, pick_vector_width(Int64))
   end
   meta = Expr(:meta, :inline)
   if VERSION >= v"1.8.0-beta"
-    purity = Expr(:purity,
-       #= consistent =# true,
-       #= effect_free =# true,
-       #= nothrow =# true,
-       #= terminates_globally =# true,
-       #= terminates_locally =# false)
+    purity = Expr(
+      :purity,
+      #= consistent =#true,
+      #= effect_free =#true,
+      #= nothrow =#true,
+      #= terminates_globally =#true,
+      #= terminates_locally =#false
+    )
     if VERSION >= v"1.11"
-      push!(purity.args,
-        #= notaskstate =# true,
-        #= inaccessiblememonly =# true,
-        #= noub =# true,
-        #= noub_if_noinbounds =# false,
-        #= consistent_overlay =# false,
-        #= nortcall =# true,
+      push!(
+        purity.args,
+        #= notaskstate =#true,
+        #= inaccessiblememonly =#true,
+        #= noub =#true,
+        #= noub_if_noinbounds =#false,
+        #= consistent_overlay =#false,
+        #= nortcall =#true
       )
     end
     push!(meta.args, purity)
@@ -537,9 +508,9 @@ function vec_quote(demote, W, Wpow2, offset::Int = 0)
   iszero(offset) && push!(call.args, :y)
   foreach(
     w -> push!(call.args, Expr(:call, getfield, :x, w, false)),
-    max(1, offset):min(W, Wpow2)-1
+    max(1, offset):(min(W, Wpow2)-1)
   )
-  foreach(w -> push!(call.args, Expr(:call, :zero, :T)), W+1:Wpow2)
+  foreach(w -> push!(call.args, Expr(:call, :zero, :T)), (W+1):Wpow2)
   call
 end
 @generated function _vec(
@@ -578,7 +549,7 @@ else
   end
 end
 @inline reduce_to_onevec(f::F, vu::VecUnroll) where {F} =
-  ArrayInterface.reduce_tup(f, data(vu))
+  Static.reduce_tup(f, data(vu))
 
 if VERSION >= v"1.7.0" && hasfield(Method, :recursion_relation)
   dont_limit = Returns(true)
diff --git a/src/base_defs.jl b/src/base_defs.jl
index 66f1dffa..981b6f2b 100644
--- a/src/base_defs.jl
+++ b/src/base_defs.jl
@@ -1,27 +1,3 @@
-const FASTDICT = Dict{Symbol,Expr}([
-  :(+) => :(Base.FastMath.add_fast),
-  :(-) => :(Base.FastMath.sub_fast),
-  :(*) => :(Base.FastMath.mul_fast),
-  :(/) => :(Base.FastMath.div_fast),
-  :(÷) => :(VectorizationBase.vdiv_fast), # VectorizationBase.vdiv == integer, VectorizationBase.vfdiv == float
-  :(%) => :(Base.FastMath.rem_fast),
-  :abs2 => :(Base.FastMath.abs2_fast),
-  :inv => :(Base.FastMath.inv_fast), # this is slower in most benchmarks
-  :hypot => :(Base.FastMath.hypot_fast),
-  :max => :(Base.FastMath.max_fast),
-  :min => :(Base.FastMath.min_fast),
-  :muladd => :(VectorizationBase.vmuladd_fast),
-  :fma => :(VectorizationBase.vfma_fast),
-  :vfmadd => :(VectorizationBase.vfmadd_fast),
-  :vfnmadd => :(VectorizationBase.vfnmadd_fast),
-  :vfmsub => :(VectorizationBase.vfmsub_fast),
-  :vfnmsub => :(VectorizationBase.vfnmsub_fast),
-  :log => :(SLEEFPirates.log_fast),
-  :log2 => :(SLEEFPirates.log2_fast),
-  :log10 => :(SLEEFPirates.log10_fast),
-  :(^) => :(Base.FastMath.pow_fast)
-])
-
 for (op, f) ∈ [
   (:(Base.:-), :vsub),
   (:(Base.FastMath.sub_fast), :vsub_fast),
@@ -260,11 +236,7 @@ end
   x, y = promote(a, b)
   VecUnroll(fmap(ifelse, getfield(m, :data), unrolldata(x), unrolldata(y)))
 end
-@inline function IfElse.ifelse(
-  m::VecUnroll{<:Any,<:Any,Bool},
-  a::Real,
-  b::Real
-)
+@inline function IfElse.ifelse(m::VecUnroll{<:Any,<:Any,Bool}, a::Real, b::Real)
   x, y = promote(a, b)
   VecUnroll(fmap(ifelse, getfield(m, :data), unrolldata(x), unrolldata(y)))
 end
diff --git a/src/cartesianvindex.jl b/src/cartesianvindex.jl
index 7f7187fa..a3e400ad 100644
--- a/src/cartesianvindex.jl
+++ b/src/cartesianvindex.jl
@@ -8,7 +8,7 @@ struct CartesianVIndex{N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} <:
   ) where {N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} = new{N,T}(I)
 end
 Base.length(::CartesianVIndex{N}) where {N} = N
-ArrayInterface.known_length(::Type{<:CartesianVIndex{N}}) where {N} = N
+StaticArrayInterface.known_length(::Type{<:CartesianVIndex{N}}) where {N} = N
 Base.Tuple(i::CartesianVIndex) = getfield(i, :I)
 function Base.:(:)(I::CartesianVIndex{N}, J::CartesianVIndex{N}) where {N}
   CartesianIndices(map((i, j) -> i:j, getfield(I, :I), getfield(J, :I)))
diff --git a/src/early_definitions.jl b/src/early_definitions.jl
index 1965769a..eb2f1b23 100644
--- a/src/early_definitions.jl
+++ b/src/early_definitions.jl
@@ -56,7 +56,7 @@ end
 end
 
 @inline integer_preference(::StaticInt{B}) where {B} =
-  ifelse(ArrayInterface.ge(StaticInt{B}(), StaticInt{8}()), Int, Int32)
+  ifelse(ge(StaticInt{B}(), StaticInt{8}()), Int, Int32)
 
 @inline pick_integer(::Union{StaticInt{W},Val{W}}) where {W} =
   integer_preference(simd_integer_register_size() ÷ StaticInt{W}())
diff --git a/src/lazymul.jl b/src/lazymul.jl
index 285a3245..85522381 100644
--- a/src/lazymul.jl
+++ b/src/lazymul.jl
@@ -432,8 +432,7 @@ end
   if iszero(r)
     quote
       $(Expr(:meta, :inline))
-      p,
-      VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
+      p, LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
     end
   else
     quote
@@ -451,8 +450,7 @@ end
   if iszero(r)
     quote
       $(Expr(:meta, :inline))
-      p,
-      VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
+      p, LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
     end
   else
     quote
@@ -479,7 +477,7 @@ end
   if iszero(r)
     quote
       $(Expr(:meta, :inline))
-      VectorizationBase.LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
+      LazyMulAdd{$N,$(I * M)}(MM{$W,$d}(getfield(b, :data)))
     end
   else
     quote
@@ -509,7 +507,7 @@ end
   if iszero(r)
     quote
       $(Expr(:meta, :inline))
-      VectorizationBase.LazyMulAdd{$N,$(I * M)}(-MM{$W,$d}(getfield(b, :data)))
+      LazyMulAdd{$N,$(I * M)}(-MM{$W,$d}(getfield(b, :data)))
     end
   else
     quote
diff --git a/src/llvm_intrin/binary_ops.jl b/src/llvm_intrin/binary_ops.jl
index fc7019fe..5a0763d0 100644
--- a/src/llvm_intrin/binary_ops.jl
+++ b/src/llvm_intrin/binary_ops.jl
@@ -144,7 +144,7 @@ end
   v2::AbstractSIMD{W,T}
 ) where {W,T<:FloatingTypes} = trunc(vfdiv_fast(v1, v2))
 @inline vdiv_fast(v1::T, v2::T) where {T<:FloatingTypes} =
-  trunc(Base.FastMath.div_float_fast(v1, v2))
+  trunc(Core.Intrinsics.div_float_fast(v1, v2))
 @inline vdiv_fast(v1::T, v2::T) where {T<:Number} = v1 ÷ v2
 @inline vdiv(v1::T, v2::T) where {T<:Number} = v1 ÷ v2
 @inline vdiv(v1::T, v2::T) where {T<:FloatingTypes} = vdiv_fast(v1, v2)
diff --git a/src/llvm_intrin/conversion.jl b/src/llvm_intrin/conversion.jl
index f2d2a7d4..ec4ef10b 100644
--- a/src/llvm_intrin/conversion.jl
+++ b/src/llvm_intrin/conversion.jl
@@ -62,7 +62,7 @@ if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)
     ::False
   ) where {W,F}
     neg = v < 0
-    pos = ifelse(neg, -v, v) 
+    pos = ifelse(neg, -v, v)
     posf = _vconvert(Vec{W,F}, UInt64(pos), False())
     ifelse(neg, -posf, posf)
   end
@@ -85,7 +85,12 @@ if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)
   @inline function vconvert(
     ::Type{F},
     v::VecUnroll{N,W,T,Vec{W,T}}
-  )::VecUnroll{N,W,F,Vec{W,F}} where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
+  )::VecUnroll{
+    N,
+    W,
+    F,
+    Vec{W,F}
+  } where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
     _vconvert(
       Vec{W,F},
       v,
@@ -95,7 +100,12 @@ if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)
   @inline function vconvert(
     ::Type{Vec{W,F}},
     v::VecUnroll{N,W,T,Vec{W,T}}
-  )::VecUnroll{N,W,F,Vec{W,F}} where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
+  )::VecUnroll{
+    N,
+    W,
+    F,
+    Vec{W,F}
+  } where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
     _vconvert(
       Vec{W,F},
       v,
@@ -105,7 +115,12 @@ if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)
   @inline function vconvert(
     ::Type{VecUnroll{N,W,F,Vec{W,F}}},
     v::VecUnroll{N,W,T,Vec{W,T}}
-  )::VecUnroll{N,W,F,Vec{W,F}} where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
+  )::VecUnroll{
+    N,
+    W,
+    F,
+    Vec{W,F}
+  } where {N,W,F<:FloatingTypes,T<:Union{UInt64,Int64}}
     _vconvert(
       Vec{W,F},
       v,
@@ -165,13 +180,13 @@ end
 @inline vconvert(::Type{M}, v::Vec{W,Bool}) where {W,U,M<:AbstractMask{W,U}} =
   tomask(v)
 @inline vconvert(
-  ::Type{<:VectorizationBase.AbstractMask{W,U} where {U}},
+  ::Type{<:AbstractMask{W,U} where {U}},
   v::Vec{W,Bool}
-) where {W} = VectorizationBase.tomask(v)
+) where {W} = tomask(v)
 @inline vconvert(
-  ::Type{<:VectorizationBase.AbstractMask{L,U} where {L,U}},
+  ::Type{<:AbstractMask{L,U} where {L,U}},
   v::Vec{W,Bool}
-) where {W} = VectorizationBase.tomask(v)
+) where {W} = tomask(v)
 # @inline vconvert(::Type{Mask}, v::Vec{W,Bool}) where {W} = tomask(v)
 # @generated function vconvert(::Type{<:AbstractMask{W}}, v::Vec{W,Bool}) where {W}
 #     instrs = String[]
@@ -229,9 +244,18 @@ end
 ### `vconvert(::Type{<:NativeTypes}, x)` methods. These forward to `vconvert(::Type{Vec{W,T}}, x)`
 @inline vconvert(::Type{T}, s::T) where {T<:NativeTypes} = s
 @inline vconvert(::Type{T}, s::T) where {T<:IntegerTypesHW} = s
-@inline vconvert(::Type{T}, s::Union{Float16,Float32,Float64}) where {T<:IntegerTypesHW} = Base.fptosi(T, Base.trunc_llvm(s))
-@inline vconvert(::Type{T}, s::IntegerTypesHW) where {T<:Union{Float16,Float32,Float64}} = convert(T, s)::T
-@inline vconvert(::Type{T}, s::Union{Float16,Float32,Float64}) where {T<:Union{Float16,Float32,Float64}} = convert(T, s)::T
+@inline vconvert(
+  ::Type{T},
+  s::Union{Float16,Float32,Float64}
+) where {T<:IntegerTypesHW} = Base.fptosi(T, Base.trunc_llvm(s))
+@inline vconvert(
+  ::Type{T},
+  s::IntegerTypesHW
+) where {T<:Union{Float16,Float32,Float64}} = convert(T, s)::T
+@inline vconvert(
+  ::Type{T},
+  s::Union{Float16,Float32,Float64}
+) where {T<:Union{Float16,Float32,Float64}} = convert(T, s)::T
 @inline vconvert(::Type{T}, s::T) where {T<:Union{Float16,Float32,Float64}} = s
 @inline vconvert(::Type{T}, s::IntegerTypesHW) where {T<:IntegerTypesHW} = s % T
 @inline vconvert(::Type{T}, v::AbstractSIMD{W,T}) where {T<:NativeTypes,W} = v
diff --git a/src/llvm_intrin/intrin_funcs.jl b/src/llvm_intrin/intrin_funcs.jl
index 536b5b71..c7f79845 100644
--- a/src/llvm_intrin/intrin_funcs.jl
+++ b/src/llvm_intrin/intrin_funcs.jl
@@ -156,9 +156,10 @@ for (op, f) ∈ [
   ("nearbyint", :vround)#,("roundeven",:roundeven)
 ]
   # @eval @generated Base.$f(v1::Vec{W,T}) where {W, T <: Union{Float32,Float64}} = llvmcall_expr($op, W, T, (W,), (T,), "nsz arcp contract afn reassoc")
-  @eval @generated $f(v1::Vec{W,T}) where {W,T<:Union{Float32,Float64}} =
-    (TS = T === Float32 ? :Float32 : :Float64;
-    build_llvmcall_expr($op, W, TS, [W], [TS], "fast"))
+  @eval @generated $f(v1::Vec{W,T}) where {W,T<:Union{Float32,Float64}} = (
+    TS = T === Float32 ? :Float32 : :Float64;
+    build_llvmcall_expr($op, W, TS, [W], [TS], "fast")
+  )
 end
 @inline vsqrt(v::AbstractSIMD{W,T}) where {W,T<:IntegerTypes} = vsqrt(float(v))
 @inline vsqrt(v::FloatingTypes) = Base.sqrt_llvm_fast(v)
@@ -459,16 +460,16 @@ end
 @inline vfma_fast(a::NativeTypes, b::NativeTypes, c::NativeTypes) =
   muladd(a, b, c)
 @inline vmuladd_fast(a::Float32, b::Float32, c::Float32) =
-  Base.FastMath.add_float_fast(Base.FastMath.mul_float_fast(a, b), c)
+  Core.Intrinsics.add_float_fast(Core.Intrinsics.mul_float_fast(a, b), c)
 @inline vmuladd_fast(a::Float64, b::Float64, c::Float64) =
-  Base.FastMath.add_float_fast(Base.FastMath.mul_float_fast(a, b), c)
+  Core.Intrinsics.add_float_fast(Core.Intrinsics.mul_float_fast(a, b), c)
 @inline vmuladd_fast(a::NativeTypes, b::NativeTypes, c::NativeTypes) =
-  Base.FastMath.add_fast(Base.FastMath.mul_fast(a, b), c)
+  Core.Intrinsics.add_fast(Core.Intrinsics.mul_fast(a, b), c)
 @inline vfma(a, b, c) = fma(a, b, c)
 @inline vmuladd(a, b, c) = muladd(a, b, c)
 @inline vfma_fast(a, b, c) = fma(a, b, c)
 @inline vmuladd_fast(a, b, c) =
-  Base.FastMath.add_fast(Base.FastMath.mul_fast(a, b), c)
+  Core.Intrinsics.add_fast(Core.Intrinsics.mul_fast(a, b), c)
 for f ∈ [:vfma, :vmuladd, :vfma_fast, :vmuladd_fast]
   @eval @inline function $f(
     v1::AbstractSIMD{W,T},
@@ -604,7 +605,7 @@ function collapse_mirror_expr(N, op, final)
     2final
   end
   while N > _final
-    for n ∈ 1:N>>>1
+    for n ∈ 1:(N>>>1)
       push!(q.args, Expr(:(=), cmp[n], Expr(:call, op, s[n], s[n+(N>>>1)])))
       push!(
         q.args,
@@ -623,7 +624,7 @@ function collapse_mirror_expr(N, op, final)
     N >>>= 1
   end
   if final ≠ 1
-    for n ∈ final+1:N
+    for n ∈ (final+1):N
       push!(q.args, Expr(:(=), cmp[n-final], Expr(:call, op, s[n-final], s[n])))
       push!(
         q.args,
@@ -741,8 +742,7 @@ for (op, f, S) ∈ [
 end
 if Sys.ARCH == :aarch64 # TODO: maybe the default definition will stop segfaulting some day?
   for I ∈ (:Int64, :UInt64), (f, op) ∈ ((:vmaximum, :max), (:vminimum, :min))
-    @eval @inline $f(v::Vec{W,$I}) where {W} =
-      ArrayInterface.reduce_tup($op, Tuple(v))
+    @eval @inline $f(v::Vec{W,$I}) where {W} = Static.reduce_tup($op, Tuple(v))
   end
 end
 
diff --git a/src/llvm_intrin/masks.jl b/src/llvm_intrin/masks.jl
index 46e3eed8..39463713 100644
--- a/src/llvm_intrin/masks.jl
+++ b/src/llvm_intrin/masks.jl
@@ -372,7 +372,7 @@ end
 @inline vzero(::EVLMask{W,U}) where {W,U} = EVLMask{W}(zero(U), 0x00000000)
 @inline Base.zero(::Type{M}) where {W,M<:AbstractMask{W}} = vzero(M)
 @inline zero_mask(::Union{Val{W},StaticInt{W}}) where {W} =
-  EVLMask{W}(zero(VectorizationBase.mask_type(Val{W}())), 0x00000000)
+  EVLMask{W}(zero(mask_type(Val{W}())), 0x00000000)
 
 @generated function max_mask(::Union{Val{W},StaticInt{W}}) where {W}
   U = mask_type(W)
diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl
index 8042e9f4..93f5147f 100644
--- a/src/llvm_intrin/memory_addr.jl
+++ b/src/llvm_intrin/memory_addr.jl
@@ -168,16 +168,13 @@ function offset_ptr(
   # after this block, we will have a index_gep_typ pointer
   if iszero(O)
     @static if USE_OPAQUE_PTR
-      push!(
-        instrs,
-        "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr"
-      )
+      push!(instrs, "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr")
     else
       push!(
         instrs,
         "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(index_gep_typ)*"
-        )
-      end
+      )
+    end
     i += 1
   else # !iszero(O)
     if !iszero(O & (tzf - 1)) # then index_gep_typ works for the constant offset
@@ -188,15 +185,12 @@ function offset_ptr(
       offset = O >> tz
     end
     @static if USE_OPAQUE_PTR
+      push!(instrs, "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr")
+    else
       push!(
         instrs,
-        "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr"
+        "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(offset_gep_typ)*"
       )
-    else
-      push!(
-      instrs,
-      "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(offset_gep_typ)*"
-    ) 
     end
     i += 1
     @static if USE_OPAQUE_PTR
@@ -206,9 +200,9 @@ function offset_ptr(
       )
     else
       push!(
-      instrs,
-      "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), $(offset_gep_typ)* %ptr.$(i-1), i32 $(offset)"
-    )
+        instrs,
+        "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), $(offset_gep_typ)* %ptr.$(i-1), i32 $(offset)"
+      )
     end
     i += 1
     if forgep && iszero(M) && (iszero(X) || isone(X))
@@ -216,26 +210,23 @@ function offset_ptr(
         push!(
           instrs,
           "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $(JULIAPOINTERTYPE)"
-          )
+        )
       else
         push!(
-        instrs,
-        "%ptr.$(i) = ptrtoint $(offset_gep_typ)* %ptr.$(i-1) to $(JULIAPOINTERTYPE)"
+          instrs,
+          "%ptr.$(i) = ptrtoint $(offset_gep_typ)* %ptr.$(i-1) to $(JULIAPOINTERTYPE)"
         )
-      end  
+      end
       i += 1
       return instrs, i
     elseif offset_gep_typ != index_gep_typ
       @static if USE_OPAQUE_PTR
+        push!(instrs, "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr")
+      else
         push!(
           instrs,
-          "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr"
+          "%ptr.$(i) = bitcast $(offset_gep_typ)* %ptr.$(i-1) to $(index_gep_typ)*"
         )
-      else
-        push!(
-        instrs,
-        "%ptr.$(i) = bitcast $(offset_gep_typ)* %ptr.$(i-1) to $(index_gep_typ)*"
-      )
       end
       i += 1
     end
@@ -270,12 +261,12 @@ function offset_ptr(
           instrs,
           "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>"
         )
-      else 
+      else
         push!(
-        instrs,
-        "%ptr.$(i) = ptrtoint <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>"
-      )
-    end 
+          instrs,
+          "%ptr.$(i) = ptrtoint <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>"
+        )
+      end
       i += 1
     elseif index_gep_typ != vtyp
       @static if USE_OPAQUE_PTR
@@ -288,7 +279,7 @@ function offset_ptr(
           instrs,
           "%ptr.$(i) = bitcast <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $typ*>"
         )
-      end 
+      end
       i += 1
     end
     return instrs, i
@@ -353,18 +344,15 @@ function offset_ptr(
     # to avoid overflow
     vibytes = max(min(4, rs ÷ W), nextpow2(intlog2(X * W - 1) + 2) >> 3)
     vityp = "i$(8vibytes)"
-    vi = join((X * w for w ∈ 0:W-1), ", $vityp ")
+    vi = join((X * w for w ∈ 0:(W-1)), ", $vityp ")
     if typ !== index_gep_typ
       @static if USE_OPAQUE_PTR
+        push!(instrs, "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr")
+      else
         push!(
           instrs,
-          "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr"
+          "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(typ)*"
         )
-      else 
-        push!(
-        instrs,
-        "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(typ)*"
-      )
       end
       i += 1
     end
@@ -375,8 +363,8 @@ function offset_ptr(
       )
     else
       push!(
-      instrs,
-      "%ptr.$(i) = getelementptr inbounds $(typ), $(typ)* %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>"
+        instrs,
+        "%ptr.$(i) = getelementptr inbounds $(typ), $(typ)* %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>"
       )
     end
     i += 1
@@ -388,8 +376,8 @@ function offset_ptr(
         )
       else
         push!(
-        instrs,
-        "%ptr.$(i) = ptrtoint <$W x $typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>"
+          instrs,
+          "%ptr.$(i) = ptrtoint <$W x $typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>"
         )
       end
       i += 1
@@ -398,11 +386,8 @@ function offset_ptr(
   end
   if forgep # if forgep, just return now
     @static if USE_OPAQUE_PTR
-      push!(
-        instrs,
-        "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $JULIAPOINTERTYPE"
-      )
-    else 
+      push!(instrs, "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $JULIAPOINTERTYPE")
+    else
       push!(
         instrs,
         "%ptr.$(i) = ptrtoint $(index_gep_typ)* %ptr.$(i-1) to $JULIAPOINTERTYPE"
@@ -411,14 +396,11 @@ function offset_ptr(
     i += 1
   elseif index_gep_typ != vtyp
     @static if USE_OPAQUE_PTR
-      push!(
-        instrs,
-        "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr"
-      )
+      push!(instrs, "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr")
     else
       push!(
-      instrs,
-      "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(vtyp)*"
+        instrs,
+        "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(vtyp)*"
       )
     end
     i += 1
@@ -566,7 +548,7 @@ end
 ) where {N,C,B,R}
   stridedpointer(
     pointer(ptr),
-    ArrayInterface.StrideIndex{N,R,C}(static_strides(ptr), offs),
+    StaticArrayInterface.StrideIndex{N,R,C}(static_strides(ptr), offs),
     StaticInt{B}()
   )
 end
@@ -583,7 +565,7 @@ end
 ) where {N,C,B,R,K}
   stridedpointer(
     pointer(ptr),
-    ArrayInterface.StrideIndex{N,R,C}(
+    StaticArrayInterface.StrideIndex{N,R,C}(
       static_strides(ptr),
       increment_ptr(ptr, i)
     ),
@@ -917,12 +899,11 @@ function vload_quote_llvmcall_core(
   end
   if grv
     @static if USE_OPAQUE_PTR
-      loadinstr =
-        "$vtyp @llvm.masked.gather." *
-        suffix(W, T_sym)
+      loadinstr = "$vtyp @llvm.masked.gather." * suffix(W, T_sym)
       decl *= "declare $loadinstr(<$W x ptr>, i32, <$W x i1>, $vtyp)"
     else
-      loadinstr = "$vtyp @llvm.masked.gather." *
+      loadinstr =
+        "$vtyp @llvm.masked.gather." *
         suffix(W, T_sym) *
         '.' *
         ptr_suffix(W, T_sym)
@@ -939,9 +920,9 @@ function vload_quote_llvmcall_core(
       )
     else
       push!(
-      instrs,
-      "%res = call $loadinstr(<$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" *
-      LOAD_SCOPE_TBAA_FLAGS
+        instrs,
+        "%res = call $loadinstr(<$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" *
+        LOAD_SCOPE_TBAA_FLAGS
       )
     end
   elseif mask
@@ -972,9 +953,9 @@ function vload_quote_llvmcall_core(
       )
     else
       push!(
-      instrs,
-      "%res = load $vtyp, $vtyp* %ptr.$(i-1), align $alignment" *
-      LOAD_SCOPE_TBAA_FLAGS
+        instrs,
+        "%res = load $vtyp, $vtyp* %ptr.$(i-1), align $alignment" *
+        LOAD_SCOPE_TBAA_FLAGS
       )
     end
   end
@@ -1381,10 +1362,8 @@ function vstore_quote(
     argtostore = "%1"
   end
   if grv
-    @static if USE_OPAQUE_PTR    
-      storeinstr =
-      "void @llvm.masked.scatter." *
-      suffix(W, T_sym)
+    @static if USE_OPAQUE_PTR
+      storeinstr = "void @llvm.masked.scatter." * suffix(W, T_sym)
       decl *= "declare $storeinstr($vtyp, <$W x ptr>, i32, <$W x i1>)"
     else
       storeinstr =
@@ -1394,7 +1373,7 @@ function vstore_quote(
         ptr_suffix(W, T_sym)
       decl *= "declare $storeinstr($vtyp, <$W x $typ*>, i32, <$W x i1>)"
     end
-      m = mask ? m = "%mask.0" : llvmconst(W, "i1 1")
+    m = mask ? m = "%mask.0" : llvmconst(W, "i1 1")
     @static if USE_OPAQUE_PTR
       push!(
         instrs,
@@ -1403,9 +1382,9 @@ function vstore_quote(
       )
     else
       push!(
-      instrs,
-      "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" *
-      metadata
+        instrs,
+        "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" *
+        metadata
       )
     end
     # push!(instrs, "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)")
@@ -1422,7 +1401,7 @@ function vstore_quote(
     else
       storeinstr = "void @llvm.masked.store." * suff
       decl *= "declare $storeinstr($vtyp, $vtyp*, i32, <$W x i1>)"
-    push!(
+      push!(
         instrs,
         "call $storeinstr($vtyp $(argtostore), $vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" *
         metadata
@@ -2357,7 +2336,7 @@ end
       ret void
     """
   end
-  
+
   llvmcall_expr(
     decl,
     instrs,
@@ -2433,7 +2412,7 @@ end
   )
 end
 @generated function lifetime_end!(ptr::Ptr{T}, ::Val{L}) where {L,T}
-  @static if USE_OPAQUE_PTR  
+  @static if USE_OPAQUE_PTR
     decl = "declare void @llvm.lifetime.end(i64, ptr nocapture)"
     instrs = """
       call void @llvm.lifetime.end(i64 $L, ptr %0)
@@ -2601,13 +2580,11 @@ end
     Expr(
       :block,
       Expr(:meta, :inline),
-      :(
-        unsafe_store!(
-          Base.unsafe_convert(Ptr{Ptr{Cvoid}}, p) + convert(Int, i),
-          Base.pointer_from_objref(v)
-        );
-        return nothing
-      )
+      :(unsafe_store!(
+        Base.unsafe_convert(Ptr{Ptr{Cvoid}}, p) + convert(Int, i),
+        Base.pointer_from_objref(v)
+      );
+      return nothing)
     )
   end
 end
diff --git a/src/llvm_intrin/vector_ops.jl b/src/llvm_intrin/vector_ops.jl
index 154ef5b9..d359fdd7 100644
--- a/src/llvm_intrin/vector_ops.jl
+++ b/src/llvm_intrin/vector_ops.jl
@@ -17,11 +17,10 @@ function shufflevector_instrs(
   mask::String = '<' * join(I, ", ")::String * '>'
   if ((W2 == 0) | (W2 == W))
     v2 = W2 == 0 ? "undef" : "%1"
-    M,
+    M, """
+        %res = shufflevector $vtyp1 %0, $vtyp1 $v2, $vtyp3 $mask
+        ret $vtypr %res
     """
-     %res = shufflevector $vtyp1 %0, $vtyp1 $v2, $vtyp3 $mask
-     ret $vtypr %res
- """
   else
     vtyp0 = "<$W2 x $typ>"
     maskpad =
@@ -31,12 +30,11 @@ function shufflevector_instrs(
         ", "
       ) *
       '>'
-    M,
+    M, """
+        %pad = shufflevector $vtyp0 %1, $vtyp0 undef, <$W x i32> $maskpad
+        %res = shufflevector $vtyp1 %0, $vtyp1 %pad, $vtyp3 $mask
+        ret $vtypr %res    
     """
-     %pad = shufflevector $vtyp0 %1, $vtyp0 undef, <$W x i32> $maskpad
-     %res = shufflevector $vtyp1 %0, $vtyp1 %pad, $vtyp3 $mask
-     ret $vtypr %res    
- """
   end
 end
 function tupletostringvector(@nospecialize(x::NTuple{N,Int})) where {N}
@@ -94,7 +92,7 @@ end
   typ = LLVM_TYPES[T]
   mask =
     '<' *
-    join(map(x -> string("i32 ", x ≥ L ? "undef" : string(x)), 0:W-1), ", ") *
+    join(map(x -> string("i32 ", x ≥ L ? "undef" : string(x)), 0:(W-1)), ", ") *
     '>'
   instrs = """
       %res = shufflevector <$L x $typ> %0, <$L x $typ> undef, <$W x i32> $mask
@@ -144,10 +142,10 @@ end
     )
   )
   mask = Vector{String}(undef, 2W1)
-  for w ∈ 0:W1+W2-1
+  for w ∈ 0:(W1+W2-1)
     mask[w+1] = string("i32 ", w)
   end
-  for w ∈ W1+W2:2W1-1
+  for w ∈ (W1+W2):(2W1-1)
     mask[w+1] = "i32 undef"
   end
   M, instrs = shufflevector_instrs(W1, T, mask, W2)
@@ -194,8 +192,8 @@ function transpose_vecunroll_quote(W)
   log2W = intlog2(W)
   q = Expr(:block, Expr(:meta, :inline), :(vud = data(vu)))
   N = W # N vectors of length W
-  vectors1 = [Symbol(:v_, n) for n ∈ 0:N-1]
-  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:N-1]
+  vectors1 = [Symbol(:v_, n) for n ∈ 0:(N-1)]
+  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:(N-1)]
   # z = Expr(:call, Expr(:curly, Expr(:(.), :VectorizationBase, QuoteNode(:MM)), W), 0)
   # for n ∈ 1:N
   #     push!(q.args, Expr(:(=), vectors1[n], Expr(:call, Expr(:(.), :VectorizationBase, QuoteNode(:vload)), :ptrA, Expr(:tuple, z, n-1))))
@@ -209,10 +207,10 @@ function transpose_vecunroll_quote(W)
   Nhalf = N >>> 1
   vecstride = 1
   partition_stride = 2
-  for nsplits = 0:log2W-1
+  for nsplits = 0:(log2W-1)
     shuffle0 = transposeshuffle(nsplits, W, false)
     shuffle1 = transposeshuffle(nsplits, W, true)
-    for partition ∈ 0:(W>>>(nsplits+1))-1
+    for partition ∈ 0:((W>>>(nsplits+1))-1)
       for _n1 ∈ 1:vecstride
         n1 = partition * partition_stride + _n1
         n2 = n1 + vecstride
@@ -243,7 +241,7 @@ function transpose_vecunroll_quote(W)
 end
 function subset_tup(W, o)
   t = Expr(:tuple)
-  for w ∈ o:W-1+o
+  for w ∈ o:(W-1+o)
     push!(t.args, w)
   end
   Expr(:call, Expr(:curly, :Val, t))
@@ -258,8 +256,8 @@ function transpose_vecunroll_quote_W_larger(N, W)
   log2N = intlog2(N)
   q = Expr(:block, Expr(:meta, :inline), :(vud = data(vu)))
   # N = W # N vectors of length W
-  vectors1 = [Symbol(:v_, n) for n ∈ 0:N-1]
-  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:N-1]
+  vectors1 = [Symbol(:v_, n) for n ∈ 0:(N-1)]
+  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:(N-1)]
   # z = Expr(:call, Expr(:curly, Expr(:(.), :VectorizationBase, QuoteNode(:MM)), W), 0)
   # for n ∈ 1:N
   #     push!(q.args, Expr(:(=), vectors1[n], Expr(:call, Expr(:(.), :VectorizationBase, QuoteNode(:vload)), :ptrA, Expr(:tuple, z, n-1))))
@@ -273,10 +271,10 @@ function transpose_vecunroll_quote_W_larger(N, W)
   Nhalf = N >>> 1
   vecstride = 1
   partition_stride = 2
-  for nsplits = 0:log2N-1
+  for nsplits = 0:(log2N-1)
     shuffle0 = transposeshuffle(nsplits, W, false)
     shuffle1 = transposeshuffle(nsplits, W, true)
-    for partition ∈ 0:(N>>>(nsplits+1))-1
+    for partition ∈ 0:((N>>>(nsplits+1))-1)
       for _n1 ∈ 1:vecstride
         n1 = partition * partition_stride + _n1
         n2 = n1 + vecstride
@@ -322,13 +320,13 @@ function transpose_vecunroll_quote_W_smaller(N, W)
   log2N = intlog2(N)
   q = Expr(:block, Expr(:meta, :inline), :(vud = data(vu)))
   # N = W # N vectors of length W
-  vectors1 = [Symbol(:v_, n) for n ∈ 0:N-1]
-  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:N-1]
+  vectors1 = [Symbol(:v_, n) for n ∈ 0:(N-1)]
+  vectors2 = [Symbol(:v_, n + N) for n ∈ 0:(N-1)]
   # z = Expr(:call, Expr(:curly, Expr(:(.), :VectorizationBase, QuoteNode(:MM)), W), 0)
   # for n ∈ 1:N
   #     push!(q.args, Expr(:(=), vectors1[n], Expr(:call, Expr(:(.), :VectorizationBase, QuoteNode(:vload)), :ptrA, Expr(:tuple, z, n-1))))
   # end
-  vectors3 = [Symbol(:vpiece_, w) for w ∈ 0:W-1]
+  vectors3 = [Symbol(:vpiece_, w) for w ∈ 0:(W-1)]
   for w ∈ 1:W
     push!(
       q.args,
@@ -345,7 +343,7 @@ function transpose_vecunroll_quote_W_smaller(N, W)
   Wratio = Wratio_init
   while Wratio > 1
     Wratioh = Wratio >>> 1
-    for w ∈ 0:(Wratioh)-1
+    for w ∈ 0:((Wratioh)-1)
       i = (2N) * w
       j = i + N
       for n ∈ 1:N
@@ -364,10 +362,10 @@ function transpose_vecunroll_quote_W_smaller(N, W)
   Nhalf = N >>> 1
   vecstride = 1
   partition_stride = 2
-  for nsplits = 0:log2N-1
+  for nsplits = 0:(log2N-1)
     shuffle0 = transposeshuffle(nsplits, W, false)
     shuffle1 = transposeshuffle(nsplits, W, true)
-    for partition ∈ 0:(N>>>(nsplits+1))-1
+    for partition ∈ 0:((N>>>(nsplits+1))-1)
       for _n1 ∈ 1:vecstride
         n1 = partition * partition_stride + _n1
         n2 = n1 + vecstride
@@ -445,7 +443,7 @@ end
 end
 @generated function vec_to_vecunroll(v::AbstractSIMDVector{W}) where {W}
   t = Expr(:tuple)
-  for w ∈ 0:W-1
+  for w ∈ 0:(W-1)
     push!(t.args, :(extractelement(v, $w)))
   end
   Expr(:block, Expr(:meta, :inline), :(VecUnroll($t)))
@@ -498,14 +496,14 @@ end
 
 @generated function uppervector(vx::AbstractSIMD{W}) where {W}
   s = Expr(:tuple)
-  for i ∈ W>>>1:W-1
+  for i ∈ (W>>>1):(W-1)
     push!(s.args, i)
   end
   shuffleexpr(s)
 end
 @generated function lowervector(vx::AbstractSIMD{W}) where {W}
   s = Expr(:tuple)
-  for i ∈ 0:(W>>>1)-1
+  for i ∈ 0:((W>>>1)-1)
     push!(s.args, i)
   end
   shuffleexpr(s)
@@ -514,14 +512,14 @@ end
 
 @generated function extractupper(vx::AbstractSIMD{W}) where {W}
   s = Expr(:tuple)
-  for i ∈ 0:(W>>>1)-1
+  for i ∈ 0:((W>>>1)-1)
     push!(s.args, 2i)
   end
   shuffleexpr(s)
 end
 @generated function extractlower(vx::AbstractSIMD{W}) where {W}
   s = Expr(:tuple)
-  for i ∈ 0:(W>>>1)-1
+  for i ∈ 0:((W>>>1)-1)
     push!(s.args, 2i + 1)
   end
   shuffleexpr(s)
diff --git a/src/llvm_types.jl b/src/llvm_types.jl
index 60409cda..9b85e41d 100644
--- a/src/llvm_types.jl
+++ b/src/llvm_types.jl
@@ -117,14 +117,15 @@ end
 """
 use opaque pointer
 Ref:
-- Switch LLVM codegen of Ptr{T} to an actual pointer type.
-  https://github.com/JuliaLang/julia/pull/53687
+
+  - Switch LLVM codegen of Ptr{T} to an actual pointer type.
+    https://github.com/JuliaLang/julia/pull/53687
 """
 const USE_OPAQUE_PTR = VERSION >= v"1.12-DEV"
 
 @static if !USE_OPAQUE_PTR
   const JULIAPOINTERTYPE = 'i' * string(8sizeof(Int))
-else 
+else
   const JULIAPOINTERTYPE = "ptr"
 end
 
@@ -160,10 +161,10 @@ suffix(@nospecialize(T))::String = suffix(JULIA_TYPES[T])
 @static if !USE_OPAQUE_PTR
   ptr_suffix(T) = "p0" * suffix(T)
   suffix(::Type{Ptr{T}}) where {T} = "p0" * suffix(T)
-else 
+else
   ptr_suffix(T) = "p0"
   suffix(::Type{Ptr{T}}) where {T} = "p0"
-end 
+end
 suffix(W::Int, T) = suffix(W, suffix(T))
 
 # Type-dependent LLVM constants
@@ -298,12 +299,13 @@ end
         Expr(:purity, true, true, true, true, false)
       end
       VERSION >= v"1.9.0-DEV.1019" && push!(purity.args, true)
-      VERSION >= v"1.11" && push!(purity.args,
-        #= inaccessiblememonly =# true,
-        #= noub =# true,
-        #= noub_if_noinbounds =# false,
-        #= consistent_overlay =# false,
-        #= nortcall =# true,
+      VERSION >= v"1.11" && push!(
+        purity.args,
+        #= inaccessiblememonly =#true,
+        #= noub =#true,
+        #= noub_if_noinbounds =#false,
+        #= consistent_overlay =#false,
+        #= nortcall =#true
       )
       Expr(:meta, purity, :inline)
     else
diff --git a/src/promotion.jl b/src/promotion.jl
index c5e2f5fe..1795f60f 100644
--- a/src/promotion.jl
+++ b/src/promotion.jl
@@ -227,7 +227,7 @@ maybethrow(::False) = nothing
   ::Type{V2}
 ) where {Nm1,Wsplit,T,V1,T2,W,V2<:AbstractSIMDVector{W,T2}}
   maybethrow(
-    ArrayInterface.ne(
+    ne(
       StaticInt{Nm1}() * StaticInt{Wsplit}() + StaticInt{Wsplit}(),
       StaticInt{W}()
     )
@@ -240,7 +240,7 @@ end
   ::Type{V2}
 ) where {Nm1,Wsplit,T,V1,W,V2<:AbstractMask{W}}
   maybethrow(
-    ArrayInterface.ne(
+    ne(
       StaticInt{Nm1}() * StaticInt{Wsplit}() + StaticInt{Wsplit}(),
       StaticInt{W}()
     )
diff --git a/src/ranges.jl b/src/ranges.jl
index e286be75..d5551c63 100644
--- a/src/ranges.jl
+++ b/src/ranges.jl
@@ -7,7 +7,7 @@
   t = Expr(:tuple)
   foreach(
     w -> push!(t.args, Expr(:call, :(Core.VecElement), T(F * w + O))),
-    0:W-1
+    0:(W-1)
   )
   Expr(:block, Expr(:meta, :inline), Expr(:call, :Vec, t))
 end
@@ -48,7 +48,7 @@ F - static multiplicative factor
   iexpr = bytes == sizeof(I) ? :i : Expr(:call, :%, :i, jtypesym)
   typ = "i$(bits)"
   vtyp = vtype(W, typ)
-  rangevec = join(("$typ $(F*w + O)" for w ∈ 0:W-1), ", ")
+  rangevec = join(("$typ $(F*w + O)" for w ∈ 0:(W-1)), ", ")
   instrs = """
       %ie = insertelement $vtyp undef, $typ %0, i32 0
       %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer
@@ -81,7 +81,7 @@ end
   )
   typ = LLVM_TYPES[T]
   vtyp = vtype(W, typ)
-  rangevec = join(("$typ $(F*w+O).0" for w ∈ 0:W-1), ", ")
+  rangevec = join(("$typ $(F*w+O).0" for w ∈ 0:(W-1)), ", ")
   instrs = """
       %ie = insertelement $vtyp undef, $typ %0, i32 0
       %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer
@@ -207,18 +207,12 @@ end
 @inline vfdiv_fast(i::MM, j::T) where {T<:Real} = vfdiv_fast(float(i), j)
 @inline vfdiv_fast(j::T, i::MM) where {T<:Real} = vfdiv_fast(j, float(i))
 
-@inline vfdiv(x::AbstractSIMDVector{W}, y::VectorizationBase.MM{W}) where {W} =
-  x / float(y)
-@inline vfdiv(y::VectorizationBase.MM{W}, x::AbstractSIMDVector{W}) where {W} =
-  float(y) / x
-@inline vfdiv_fast(
-  x::AbstractSIMDVector{W},
-  y::VectorizationBase.MM{W}
-) where {W} = vfiv_fast(x, float(y))
-@inline vfdiv_fast(
-  y::VectorizationBase.MM{W},
-  x::AbstractSIMDVector{W}
-) where {W} = vfdiv_fast(float(y), x)
+@inline vfdiv(x::AbstractSIMDVector{W}, y::MM{W}) where {W} = x / float(y)
+@inline vfdiv(y::MM{W}, x::AbstractSIMDVector{W}) where {W} = float(y) / x
+@inline vfdiv_fast(x::AbstractSIMDVector{W}, y::MM{W}) where {W} =
+  vfiv_fast(x, float(y))
+@inline vfdiv_fast(y::MM{W}, x::AbstractSIMDVector{W}) where {W} =
+  vfdiv_fast(float(y), x)
 
 @inline vfdiv(i::MM, j::VecUnroll{N,W,T,V}) where {N,W,T,V} = float(i) / j
 @inline vfdiv(j::VecUnroll{N,W,T,V}, i::MM) where {N,W,T,V} = j / float(i)
diff --git a/src/special/double.jl b/src/special/double.jl
index 019e758c..0c37e094 100644
--- a/src/special/double.jl
+++ b/src/special/double.jl
@@ -27,7 +27,7 @@
 
 #     - [SLEEF](https://github.com/shibatch/SLEEF) [public domain] Author Naoki Shibata
 
-using Base.Math: IEEEFloat
+using Base: IEEEFloat
 for (op, f, ff) ∈ [
   ("fadd", :add_ieee, :(+)),
   ("fsub", :sub_ieee, :(-)),
@@ -39,17 +39,11 @@ for (op, f, ff) ∈ [
     @generated $f(
       v1::Vec{W,T},
       v2::Vec{W,T}
-    ) where {W,T<:Union{Float32,Float64}} =
-      VectorizationBase.binary_op($op, W, T)
+    ) where {W,T<:Union{Float32,Float64}} = binary_op($op, W, T)
     @inline $f(s1::T, s2::T) where {T<:Union{Float32,Float64}} = $ff(s1, s2)
     @inline $f(args::Vararg{Any,K}) where {K} = $f(promote(args...)...)
-    @inline $f(a::VecUnroll, b::VecUnroll) = VecUnroll(
-      VectorizationBase.fmap(
-        $f,
-        VectorizationBase.data(a),
-        VectorizationBase.data(b)
-      )
-    )
+    @inline $f(a::VecUnroll, b::VecUnroll) =
+      VecUnroll(fmap($f, data(a), data(b)))
   end
 end
 @inline add_ieee(a, b, c) = add_ieee(add_ieee(a, b), c)
@@ -62,15 +56,15 @@ function sub_ieee!(ex)
     if _f isa Symbol
       f::Symbol = _f
       if f === :(+)
-        ex.args[1] = :(VectorizationBase.add_ieee)
+        ex.args[1] = :($(VectorizationBase).add_ieee)
       elseif f === :(-)
-        ex.args[1] = :(VectorizationBase.sub_ieee)
+        ex.args[1] = :($(VectorizationBase).sub_ieee)
       elseif f === :(*)
-        ex.args[1] = :(VectorizationBase.mul_ieee)
+        ex.args[1] = :($(VectorizationBase).mul_ieee)
       elseif f === :(/)
-        ex.args[1] = :(VectorizationBase.fdiv_ieee)
+        ex.args[1] = :($(VectorizationBase).fdiv_ieee)
       elseif f === :(%)
-        ex.args[1] = :(VectorizationBase.rem_ieee)
+        ex.args[1] = :($(VectorizationBase).rem_ieee)
       end
     end
   end
@@ -81,11 +75,8 @@ macro ieee(ex)
   sub_ieee!(ex)
 end
 
-const vIEEEFloat = Union{
-  IEEEFloat,
-  Vec{<:Any,<:IEEEFloat},
-  VectorizationBase.VecUnroll{<:Any,<:Any,<:IEEEFloat}
-}
+const vIEEEFloat =
+  Union{IEEEFloat,Vec{<:Any,<:IEEEFloat},VecUnroll{<:Any,<:Any,<:IEEEFloat}}
 
 struct Double{T<:vIEEEFloat} <: Number
   hi::T
@@ -143,7 +134,7 @@ Base.issubnormal(d::Double) = issubnormal(d.hi) | issubnormal(d.lo)
   th = Expr(:tuple)
   tl = Expr(:tuple)
   gf = GlobalRef(Core, :getfield)
-  for n ∈ 1:N+1
+  for n ∈ 1:(N+1)
     ifelseₕ = Expr(:call, :ifelse, Expr(:call, gf, :md, n, false))
     ifelseₗ = Expr(:call, :ifelse, Expr(:call, gf, :md, n, false))
     if V1 <: VecUnroll
diff --git a/src/special/exp.jl b/src/special/exp.jl
index a337a7bd..7cc743ee 100644
--- a/src/special/exp.jl
+++ b/src/special/exp.jl
@@ -405,10 +405,9 @@ end
   const TABLE_EXP_64_1 =
     Vec(ntuple(j -> Core.VecElement(Float64(2.0^(big(j + 7) / 16))), Val(8)))
 
-  @inline target_trunc(v, ::VectorizationBase.True) = v
-  @inline target_trunc(v, ::VectorizationBase.False) = v % UInt32
-  @inline target_trunc(v) =
-    target_trunc(v, VectorizationBase.has_feature(Val(:x86_64_avx512dq)))
+  @inline target_trunc(v, ::True) = v
+  @inline target_trunc(v, ::False) = v % UInt32
+  @inline target_trunc(v) = target_trunc(v, has_feature(Val(:x86_64_avx512dq)))
 
   # @inline function vexp2_v1(x::AbstractSIMD{8,Float64})
   #     x16 = x
@@ -618,10 +617,7 @@ end
     r = fma(N_float, LogBo256L(Val{B}(), Float64), r)
     # @show (N & 0x000000ff) % Int
     # @show N N & 0x000000ff
-    js = vload(
-      VectorizationBase.zero_offsets(stridedpointer(J_TABLE)),
-      (N & 0x000000ff,)
-    )
+    js = vload(zero_offsets(stridedpointer(J_TABLE)), (N & 0x000000ff,))
     # k = N >>> 0x00000008
     # small_part = reinterpret(UInt64, vfmadd(js, expm1b_kernel(Val{B}(), r), js))
     small_part = vfmadd(js, expm1b_kernel(Val{B}(), r), js)
@@ -784,10 +780,7 @@ end
   r = fast_fma(N_float, LogBo256U(Val{B}(), Float64), x, fma_fast())
   r = fast_fma(N_float, LogBo256L(Val{B}(), Float64), r, fma_fast())
   # @show (N & 0x000000ff) % Int
-  js = vload(
-    VectorizationBase.zero_offsets(stridedpointer(J_TABLE)),
-    (N & 0x000000ff,)
-  )
+  js = vload(zero_offsets(stridedpointer(J_TABLE)), (N & 0x000000ff,))
   k = N >>> 0x00000008
   small_part = reinterpret(UInt64, vfmadd(js, expm1b_kernel(Val{B}(), r), js))
   # return reinterpret(Float64, small_part), r, k, N_float, js
diff --git a/src/special/misc.jl b/src/special/misc.jl
index ed1665c7..ed30782a 100644
--- a/src/special/misc.jl
+++ b/src/special/misc.jl
@@ -220,7 +220,7 @@ end
   vload(stridedpointer(A), (i, j...))
 end
 
-@inline Base.Sort.midpoint(
+@inline Base.midpoint(
   lo::AbstractSIMDVector{W,I},
   hi::AbstractSIMDVector{W,I}
 ) where {W,I<:Integer} = lo + ((hi - lo) >>> 0x01)
@@ -238,7 +238,7 @@ for TType in [:Integer, :(AbstractSIMDVector{W,<:Integer})]
       hi = hi + u
       st = lo < hi - u
       @inbounds while vany(st)
-        m = Base.Sort.midpoint(lo, hi)
+        m = Base.midpoint(lo, hi)
         b = Base.Order.lt(o, x, v[m]) & st
         hi = ifelse(b, m, hi)
         lo = ifelse(b, lo, m)
diff --git a/src/static.jl b/src/static.jl
index 7cc0f4f0..32499495 100644
--- a/src/static.jl
+++ b/src/static.jl
@@ -7,7 +7,7 @@
   last(a) - first(a) + oneunit(T)
 
 @inline maybestaticrange(r::Base.OneTo{T}) where {T} =
-  ArrayInterface.OptionallyStaticUnitRange(StaticInt{1}(), last(r))
+  Static.OptionallyStaticUnitRange(StaticInt{1}(), last(r))
 @inline maybestaticrange(r::UnitRange) = r
 @inline maybestaticrange(r) = maybestaticfirst(r):maybestaticlast(r)
 
@@ -21,7 +21,7 @@
   ::Val{1}
 ) where {T,V<:AbstractVector{T}} = One()
 @inline maybestaticsize(A, ::Val{N}) where {N} =
-  ArrayInterface.static_size(A)[N]
+  StaticArrayInterface.static_size(A)[N]
 
 # These have versions that may allow for more optimizations, so we override base methods with a single `StaticInt` argument.
 for (f, ff) ∈ [
diff --git a/src/strided_pointers/cse_stridemultiples.jl b/src/strided_pointers/cse_stridemultiples.jl
index 0a2f83ae..ab8cc8cb 100644
--- a/src/strided_pointers/cse_stridemultiples.jl
+++ b/src/strided_pointers/cse_stridemultiples.jl
@@ -22,10 +22,11 @@ end
 @inline offsetprecalc(x::StridedBitPointer, ::Val) = x
 # @inline pointerforcomparison(p::AbstractStridedPointer) = pointer(p)
 # @inline pointerforcomparison(p::AbstractStridedPointer, i) = gep(p, i)
-@inline ArrayInterface.offsets(p::OffsetPrecalc) = offsets(getfield(p, :ptr))
+@inline StaticArrayInterface.offsets(p::OffsetPrecalc) =
+  offsets(getfield(p, :ptr))
 
 @inline Base.strides(p::OffsetPrecalc) = static_strides(getfield(p, :ptr))
-@inline ArrayInterface.static_strides(p::OffsetPrecalc) =
+@inline StaticArrayInterface.static_strides(p::OffsetPrecalc) =
   static_strides(getfield(p, :ptr))
 
 @inline function LayoutPointers.similar_no_offset(sptr::OffsetPrecalc, ptr::Ptr)
diff --git a/src/strided_pointers/stridedpointers.jl b/src/strided_pointers/stridedpointers.jl
index 32cd1e12..ddf2926e 100644
--- a/src/strided_pointers/stridedpointers.jl
+++ b/src/strided_pointers/stridedpointers.jl
@@ -8,8 +8,6 @@
     register_size()
   )
 
-using LayoutPointers: nopromote_axis_indicator
-
 @inline _vload(
   ptr::AbstractStridedPointer{T,0},
   i::Tuple{},
@@ -112,7 +110,7 @@ end
   ::StaticInt{RS}
 ) where {T,Nm1,I<:VecUnroll{Nm1},A<:StaticBool,RS}
   t = Expr(:tuple)
-  for n = 1:Nm1+1
+  for n = 1:(Nm1+1)
     push!(
       t.args,
       :(_vload(
@@ -572,7 +570,7 @@ function llvmptr_comp_quote(cmp, Tsym)
   else
     instrs = "%cmpi1 = icmp $cmp i8* %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8"
   end
-    Expr(
+  Expr(
     :block,
     Expr(:meta, :inline),
     :($(Base.llvmcall)($instrs, Bool, Tuple{$pt,$pt}, p1, p2))
diff --git a/src/vecunroll/fmap.jl b/src/vecunroll/fmap.jl
index c6ab2738..fc4f94e5 100644
--- a/src/vecunroll/fmap.jl
+++ b/src/vecunroll/fmap.jl
@@ -362,14 +362,14 @@ function collapse_expr(N, op, final)
     2final
   end
   while N > _final
-    for n ∈ 1:N>>>1
+    for n ∈ 1:(N>>>1)
       push!(q.args, Expr(:(=), s[n], Expr(:call, op, s[n], s[n+(N>>>1)])))
     end
     isodd(N) && push!(q.args, Expr(:(=), s[1], Expr(:call, op, s[1], s[N])))
     N >>>= 1
   end
   if final != 1
-    for n ∈ final+1:N
+    for n ∈ (final+1):N
       push!(q.args, Expr(:(=), s[n-final], Expr(:call, op, s[n-final], s[n])))
     end
     t = Expr(:tuple)
diff --git a/src/vecunroll/memory.jl b/src/vecunroll/memory.jl
index 630f6537..487c7819 100644
--- a/src/vecunroll/memory.jl
+++ b/src/vecunroll/memory.jl
@@ -23,7 +23,7 @@ function unrolled_indicies(
   end
   inds = Vector{Expr}(undef, N)
   inds[1] = baseind
-  for n = 1:N-1
+  for n = 1:(N-1)
     ind = copy(baseind)
     i = Expr(:call, Expr(:curly, :StaticInt, n * F))
     if AU == AV && W > 1
@@ -180,7 +180,7 @@ function _shuffle_load_quote(
     return nothing
     if X > 0
       mask_expr = :(mask(StaticInt{$W}(), 0, vmul_nw($UN, getfield(sm, :evl))))
-      for n ∈ 1:UN-1
+      for n ∈ 1:(UN-1)
         mask_expr = :(vcat(
           $mask_expr,
           mask(StaticInt{$W}(), $(n * W), vmul_nw($UN, getfield(sm, :evl)))
@@ -190,14 +190,14 @@ function _shuffle_load_quote(
     else
       # FIXME
       return nothing
-      vrange = :(VectorizationBase.vrange(
+      vrange = :(vrange(
         Val{$W}(),
         $(integer_of_bytes(min(size_T, rs ÷ W))),
         Val{0}(),
         Val{-1}()
       ))
       mask_expr = :(($vrange + $(UN * W)) ≤ vmul_nw($UN, getfield(sm, :evl)))
-      for n ∈ UN-1:-1:1
+      for n ∈ (UN-1):-1:1
         mask_expr = :(vcat(
           $mask_expr,
           ($vrange + $(n * W)) ≤ vmul_nw($UN, getfield(sm, :evl))
@@ -208,8 +208,8 @@ function _shuffle_load_quote(
   end
   push!(q.args, :(v = $vloadexpr))
   vut = Expr(:tuple)
-  Wrange = X > 0 ? (0:1:W-1) : (W-1:-1:0)
-  for n ∈ 0:UN-1
+  Wrange = X > 0 ? (0:1:(W-1)) : ((W-1):-1:0)
+  for n ∈ 0:(UN-1)
     shufftup = Expr(:tuple)
     for w ∈ Wrange
       push!(shufftup.args, n + UN * w)
@@ -256,8 +256,7 @@ function push_transpose_mask!(
       mm_evl_cmp = Symbol(:mm_evl_cmp_, n)
       if w == 1
         isym = integer_of_bytes_symbol(min(4, RS ÷ n))
-        vmmtyp =
-          :(VectorizationBase._vrange(Val{$n}(), $isym, Val{0}(), Val{1}()))
+        vmmtyp = :(_vrange(Val{$n}(), $isym, Val{0}(), Val{1}()))
         push!(q.args, :($mm_evl_cmp = $vmmtyp))
         push!(q.args, :($mw_w = vmul_nw(_evl, $(UInt32(n))) > $mm_evl_cmp))
       else
@@ -1001,8 +1000,8 @@ function _shuffle_store_quote(
     Wtemp = Wnext
   end
   shufftup = Expr(:tuple)
-  for w ∈ ((X > 0) ? (0:1:W-1) : (W-1:-1:0))
-    for n ∈ 0:UN-1
+  for w ∈ ((X > 0) ? (0:1:(W-1)) : ((W-1):-1:0))
+    for n ∈ 0:(UN-1)
       push!(shufftup.args, W * n + w)
     end
   end
@@ -1117,7 +1116,7 @@ function vstore_transpose_quote(
       for nn ∈ 1:npartial
         push!(t.args, vds[i+nn])
       end
-      for nn ∈ npartial+1:n
+      for nn ∈ (npartial+1):n
         # if W == 1
         #     push!(t.args, :(zero($Tsym)))
         # else
@@ -2252,7 +2251,7 @@ function vload_double_unroll_quote(
     unroll = :(Unroll{$AUO,$FO,$NO,$AV,$W,$MO,$X}(Zero()))
     # tupvec = Vector{Expr}(undef, NI)
     vds = Vector{Symbol}(undef, NI)
-    for ui ∈ 0:NI-1
+    for ui ∈ 0:(NI-1)
       if ui == 0
         loadq = :(_vload_unroll(gptr, $unroll)) # VecUnroll($tup)
       else
@@ -2286,7 +2285,7 @@ function vload_double_unroll_quote(
   else # we loop over `UO+1` and do the loads
     unroll = :(Unroll{$AUI,$FI,$NI,$AV,$W,$MI,$X}(Zero()))
     tup = Expr(:tuple)
-    for uo ∈ 0:NO-1
+    for uo ∈ 0:(NO-1)
       if uo == 0
         loadq = :(_vload_unroll(gptr, $unroll))
       else
@@ -2473,7 +2472,7 @@ function vstore_double_unroll_quote(
       push!(q.args, :($vdt = getfield(getfield(vd, $t, false), 1)))
     end
     # tupvec = Vector{Expr}(undef, NI)
-    for ui ∈ 0:NI-1
+    for ui ∈ 0:(NI-1)
       tup = Expr(:tuple)
       # tup = ui == 0 ? Expr(:tuple) : tupvec[ui+1]
       for t ∈ 1:NO
@@ -2501,7 +2500,7 @@ function vstore_double_unroll_quote(
     end
   else # we loop over `UO+1` and do the stores
     unroll = :(Unroll{$AUI,$FI,$NI,$AV,$W,$MI,$X}(Zero()))
-    for uo ∈ 0:NO-1
+    for uo ∈ 0:(NO-1)
       if uo == 0
         storeq = :(_vstore_unroll!(gptr, getfield(vd, 1, false), $unroll))
       else
@@ -2993,10 +2992,10 @@ function transposeshuffle(split, W, offset::Bool)
   S = 1 << split
   i = offset ? S : 0
   while w < W
-    for s ∈ 0:S-1
+    for s ∈ 0:(S-1)
       push!(tup.args, w + s + i)
     end
-    for s ∈ 0:S-1
+    for s ∈ 0:(S-1)
       # push!(tup.args, w + W + s)
       push!(tup.args, w + W + s + i)
     end
@@ -3030,7 +3029,7 @@ function horizontal_reduce_store_expr(
     push!(q.args, :(gptr = gesp(ptr, $gf(u, :i))))
     push!(q.args, :(bptr = pointer(gptr)))
     extractblock = Expr(:block)
-    vectors = [Symbol(:v_, n) for n ∈ 0:N-1]
+    vectors = [Symbol(:v_, n) for n ∈ 0:(N-1)]
     for n ∈ 1:N
       push!(
         extractblock.args,
@@ -3090,7 +3089,7 @@ function horizontal_reduce_store_expr(
                 v0,
                 Expr(
                   :call,
-                  Expr(:curly, :Val, Expr(:tuple, [w for w ∈ 0:Wh-1]...))
+                  Expr(:curly, :Val, Expr(:tuple, [w for w ∈ 0:(Wh-1)]...))
                 )
               ),
               Expr(
@@ -3099,7 +3098,7 @@ function horizontal_reduce_store_expr(
                 v0,
                 Expr(
                   :call,
-                  Expr(:curly, :Val, Expr(:tuple, [w for w ∈ Wh:Wt-1]...))
+                  Expr(:curly, :Val, Expr(:tuple, [w for w ∈ Wh:(Wt-1)]...))
                 )
               )
             )
@@ -3120,7 +3119,7 @@ function horizontal_reduce_store_expr(
       end
       if mask
         boolmask = Expr(:call, :Vec)
-        for n ∈ ncomp+1:ncomp+minWN
+        for n ∈ (ncomp+1):(ncomp+minWN)
           push!(boolmask.args, Expr(:call, gf, :masktuple, n, false))
         end
         push!(storeexpr.args, Expr(:call, :tomask, boolmask))
@@ -3138,7 +3137,7 @@ function horizontal_reduce_store_expr(
     zeroexpr = Expr(:call, Expr(:curly, :StaticInt, 0))
     ind = Expr(:tuple)
     foreach(_ -> push!(ind.args, zeroexpr), 1:D)
-    for n ∈ N+1:Ntotal
+    for n ∈ (N+1):Ntotal
       (n > N + 1) && (ind = copy(ind)) # copy to avoid overwriting old
       ind.args[AU] = Expr(:call, Expr(:curly, :StaticInt, F * (n - 1)))
       scalar = Expr(:call, reduct, Expr(:call, gf, :v, n, false))
@@ -3346,7 +3345,7 @@ function lazymulunroll_load_quote(M, O, N, maskall, masklast, align, rs)
   alignval = Expr(:call, align ? :True : :False)
   rsexpr = Expr(:call, Expr(:curly, :StaticInt, rs))
   gf = GlobalRef(Core, :getfield)
-  for n = 1:N+1
+  for n = 1:(N+1)
     ind = if (M != 1) | (O != 0)
       :(LazyMulAdd{$M,$O}(u[$n]))
     else
@@ -3489,7 +3488,7 @@ function lazymulunroll_store_quote(
   noaliasval = Expr(:call, noalias ? :True : :False)
   nontemporalval = Expr(:call, nontemporal ? :True : :False)
   rsexpr = Expr(:call, Expr(:curly, :StaticInt, rs))
-  for n = 1:N+1
+  for n = 1:(N+1)
     push!(
       q.args,
       Expr(
@@ -3520,7 +3519,7 @@ end
     v = Base.FastMath.add_fast(s + mm)
   end
   t = Expr(:tuple, :v)
-  for n ∈ 1:N-1
+  for n ∈ 1:(N-1)
     # push!(t.args, :(MM{$W,$W}(Base.FastMath.add_fast(s, $(T(n*W))))))
     push!(
       t.args,
@@ -3548,7 +3547,7 @@ end
   else
     Expr(:tuple, :v)
   end
-  for n ∈ 1:N-1
+  for n ∈ 1:(N-1)
     M >>>= 1
     if M % Bool
       push!(
@@ -3583,7 +3582,7 @@ end
     z = zero(v)
   end
   t = Expr(:tuple, :(ifelse(getfield(m, $1, false), v, z)))
-  for n ∈ 1:N-1
+  for n ∈ 1:(N-1)
     push!(
       t.args,
       :(ifelse(
diff --git a/test/Project.toml b/test/Project.toml
index 30d4cd7c..b12914bd 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,9 +1,15 @@
 [deps]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
+HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 LayoutPointers = "10f19ff3-798f-405d-979b-55457f8fc047"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
\ No newline at end of file
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
+StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"
+
+[compat]
+ExplicitImports = "1.13.2"
diff --git a/test/accuracy.jl b/test/accuracy.jl
index 7c76aecf..20a8f849 100644
--- a/test/accuracy.jl
+++ b/test/accuracy.jl
@@ -115,8 +115,8 @@ function test_acc(
     reference = map(f2 ∘ big, xx)
     comp = similar(xx)
     i = 0
-    spc = VectorizationBase.zstridedpointer(comp)
-    spx = VectorizationBase.zstridedpointer(xx)
+    spc = LayoutPointers.zstridedpointer(comp)
+    spx = LayoutPointers.zstridedpointer(xx)
     GC.@preserve xx comp begin
       while i < length(xx)
         vstore!(spc, f1(vload(spx, (MM{W}(i),))), (MM{W}(i),))
diff --git a/test/runtests.jl b/test/runtests.jl
index c9cde993..baf81f78 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,5 @@
-import InteractiveUtils, Aqua, ArrayInterface
+import InteractiveUtils, Aqua, ArrayInterface, ExplicitImports
+import HostCPUFeatures, LayoutPointers, Static, StaticArrayInterface
 InteractiveUtils.versioninfo(stdout; verbose = true)
 
 include("testsetup.jl")
@@ -17,18 +18,57 @@ include("testsetup.jl")
   #        3. Use package extensions (still buggy in current Julia LTS v1.10.10)
 
   pirated_types = [
-            VectorizationBase.FastRange,
-            VectorizationBase.AbstractStridedPointer,
-            VectorizationBase.StridedBitPointer,
-            VectorizationBase.StaticInt,
-            VectorizationBase.AbstractSIMD,
-            VectorizationBase.Bit,
-        ]
-  Aqua.test_all(VectorizationBase; deps_compat = deps_compat, piracies=(treat_as_own = pirated_types,))
+    VectorizationBase.FastRange,
+    VectorizationBase.AbstractStridedPointer,
+    VectorizationBase.StridedBitPointer,
+    VectorizationBase.StaticInt,
+    VectorizationBase.AbstractSIMD,
+    VectorizationBase.Bit
+  ]
+  Aqua.test_all(
+    VectorizationBase;
+    deps_compat = deps_compat,
+    piracies = (treat_as_own = pirated_types,)
+  )
   println("Aqua took $((time_ns() - t0)*1e-9) seconds")
   # @test isempty(detect_unbound_args(VectorizationBase))
   # @test isempty(detect_ambiguities(VectorizationBase))
 
+  @testset "ExplicitImports" begin
+    # No implicit imports (`using XY`)
+    @test ExplicitImports.check_no_implicit_imports(VectorizationBase) ===
+          nothing
+
+    # All explicit imports (`using XY: Z`) are loaded via their owners
+    @test ExplicitImports.check_all_explicit_imports_via_owners(
+      VectorizationBase
+    ) === nothing
+
+    # No explicit imports (`using XY: Z`) of non-public names
+    @test_broken ExplicitImports.check_all_explicit_imports_are_public(
+      VectorizationBase
+    ) === nothing
+
+    # No explicit imports (`using XY: Z`) that are not used
+    @test ExplicitImports.check_no_stale_explicit_imports(VectorizationBase) ===
+          nothing
+
+    # Nothing is accessed via modules other than its owner
+    @test ExplicitImports.check_all_qualified_accesses_via_owners(
+      VectorizationBase
+    ) === nothing
+
+    # No accesses of non-public names
+    @test_broken ExplicitImports.check_all_qualified_accesses_are_public(
+      VectorizationBase
+    ) === nothing
+
+    # No self-qualified accesses
+    @test ExplicitImports.check_no_self_qualified_accesses(
+      VectorizationBase
+    ) === nothing
+  end
+
   W = Int(@inferred(VectorizationBase.pick_vector_width(Float64)))
   Sys.WORD_SIZE == 64 &&
     @test @inferred(VectorizationBase.pick_integer(Val(W))) == (
@@ -100,12 +140,14 @@ include("testsetup.jl")
       @test VectorizationBase.align(i) == VectorizationBase.register_size()
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.align(i) == 2VectorizationBase.register_size()
     end
     for i ∈
         (1:VectorizationBase.register_size()) .+
         9VectorizationBase.register_size()
+
       @test VectorizationBase.align(i) == 10VectorizationBase.register_size()
     end
     for i ∈ 1:VectorizationBase.register_size()
@@ -113,13 +155,15 @@ include("testsetup.jl")
             reinterpret(Ptr{Cvoid}, Int(VectorizationBase.register_size()))
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.align(reinterpret(Ptr{Cvoid}, i)) ==
             reinterpret(Ptr{Cvoid}, 2Int(VectorizationBase.register_size()))
     end
     for i ∈
         (1:VectorizationBase.register_size()) .+
         19VectorizationBase.register_size()
+
       @test VectorizationBase.align(reinterpret(Ptr{Cvoid}, i)) ==
             reinterpret(Ptr{Cvoid}, 20Int(VectorizationBase.register_size()))
     end
@@ -130,7 +174,8 @@ include("testsetup.jl")
             W32 * cld(i, W32)
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.align(i, W32) ==
             VectorizationBase.align(i, Float32) ==
             VectorizationBase.align(i, Int32) ==
@@ -139,6 +184,7 @@ include("testsetup.jl")
     for i ∈
         (1:VectorizationBase.register_size()) .+
         29VectorizationBase.register_size()
+
       @test VectorizationBase.align(i, W32) ==
             VectorizationBase.align(i, Float32) ==
             VectorizationBase.align(i, Int32) ==
@@ -152,7 +198,8 @@ include("testsetup.jl")
             W64 * cld(i, W64)
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.align(i, W64) ==
             VectorizationBase.align(i, Float64) ==
             VectorizationBase.align(i, Int64) ==
@@ -161,6 +208,7 @@ include("testsetup.jl")
     for i ∈
         (1:VectorizationBase.register_size()) .+
         29VectorizationBase.register_size()
+
       @test VectorizationBase.align(i, W64) ==
             VectorizationBase.align(i, Float64) ==
             VectorizationBase.align(i, Int64) ==
@@ -170,16 +218,18 @@ include("testsetup.jl")
     @test reinterpret(Int, VectorizationBase.align(pointer(A))) %
           VectorizationBase.register_size() === 0
 
-    for i ∈ 0:VectorizationBase.register_size()-1
+    for i ∈ 0:(VectorizationBase.register_size()-1)
       @test VectorizationBase.aligntrunc(i) == 0
     end
     for i ∈
-        VectorizationBase.register_size():2VectorizationBase.register_size()-1
+        VectorizationBase.register_size():(2VectorizationBase.register_size()-1)
+
       @test VectorizationBase.aligntrunc(i) == VectorizationBase.register_size()
     end
     for i ∈
-        (0:VectorizationBase.register_size()-1) .+
+        (0:(VectorizationBase.register_size()-1)) .+
         9VectorizationBase.register_size()
+
       @test VectorizationBase.aligntrunc(i) ==
             9VectorizationBase.register_size()
     end
@@ -191,7 +241,8 @@ include("testsetup.jl")
             W32 * div(i, W32)
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.aligntrunc(i, W32) ==
             VectorizationBase.aligntrunc(i, Float32) ==
             VectorizationBase.aligntrunc(i, Int32) ==
@@ -200,6 +251,7 @@ include("testsetup.jl")
     for i ∈
         (1:VectorizationBase.register_size()) .+
         29VectorizationBase.register_size()
+
       @test VectorizationBase.aligntrunc(i, W32) ==
             VectorizationBase.aligntrunc(i, Float32) ==
             VectorizationBase.aligntrunc(i, Int32) ==
@@ -213,7 +265,8 @@ include("testsetup.jl")
             W64 * div(i, W64)
     end
     for i ∈
-        1+VectorizationBase.register_size():2VectorizationBase.register_size()
+        (1+VectorizationBase.register_size()):2VectorizationBase.register_size()
+
       @test VectorizationBase.aligntrunc(i, W64) ==
             VectorizationBase.aligntrunc(i, Float64) ==
             VectorizationBase.aligntrunc(i, Int64) ==
@@ -222,6 +275,7 @@ include("testsetup.jl")
     for i ∈
         (1:VectorizationBase.register_size()) .+
         29VectorizationBase.register_size()
+
       @test VectorizationBase.aligntrunc(i, W64) ==
             VectorizationBase.aligntrunc(i, Float64) ==
             VectorizationBase.aligntrunc(i, Int64) ==
@@ -523,13 +577,13 @@ include("testsetup.jl")
     @test all(VectorizationBase._ispow2, 0:1)
     @test all(
       i ->
-        !any(VectorizationBase._ispow2, 1+(1<<(i-1)):(1<<i)-1) &&
-          VectorizationBase._ispow2(1 << i),
+        !any(VectorizationBase._ispow2, (1+(1<<(i-1))):((1<<i)-1)) &&
+        VectorizationBase._ispow2(1 << i),
       2:9
     )
     @test all(
       i -> VectorizationBase.intlog2(1 << i) == i,
-      0:(Int == Int64 ? 53 : 30)
+      0:(Int==Int64 ? 53 : 30)
     )
     FTypes = (Float32, Float64)
     Wv = ntuple(
@@ -546,7 +600,7 @@ include("testsetup.jl")
       while true
         W >>= VectorizationBase.One()
         W == 0 && break
-        W2, Wshift2 = @inferred(VectorizationBase.pick_vector_width_shift(W, T))
+        W2, Wshift2 = @inferred(HostCPUFeatures.pick_vector_width_shift(W, T))
         @test W2 ==
               VectorizationBase.One() << Wshift2 ==
               @inferred(VectorizationBase.pick_vector_width(W, T)) ==
@@ -555,9 +609,8 @@ include("testsetup.jl")
         @test StaticInt(W) ===
               VectorizationBase.pick_vector_width(Val(Int(W)), T) ===
               VectorizationBase.pick_vector_width(W, T)
-        for n = W+1:2W
-          W3, Wshift3 =
-            VectorizationBase.pick_vector_width_shift(StaticInt(n), T)
+        for n = (W+1):2W
+          W3, Wshift3 = HostCPUFeatures.pick_vector_width_shift(StaticInt(n), T)
           @test W2 << 1 ==
                 W3 ==
                 1 << (Wshift2 + 1) ==
@@ -602,7 +655,7 @@ include("testsetup.jl")
 
     dims = (41, 42, 43) .* 3
     # dims = (41,42,43);
-    A = reshape(collect(Float64(0):Float64(prod(dims) - 1)), dims)
+    A = reshape(collect(Float64(0):Float64(prod(dims)-1)), dims)
 
     P = PermutedDimsArray(A, (3, 1, 2))
     O = OffsetArray(P, (-4, -2, -3))
@@ -719,9 +772,9 @@ include("testsetup.jl")
           @test v2 === VectorizationBase.data(vu)[2]
           @test v3 === VectorizationBase.data(vu)[3]
 
-          ir = 0:(AV == 1 ? W64 - 1 : 0)
-          jr = 0:(AV == 2 ? W64 - 1 : 0)
-          kr = 0:(AV == 3 ? W64 - 1 : 0)
+          ir = 0:(AV==1 ? W64-1 : 0)
+          jr = 0:(AV==2 ? W64-1 : 0)
+          kr = 0:(AV==3 ? W64-1 : 0)
           x1 = getindex.(Ref(B), i .+ ir, j .+ jr, k .+ kr)
           if AU == 1
             ir = ir .+ length(ir)
@@ -757,9 +810,9 @@ include("testsetup.jl")
             VectorizationBase.Unroll{AU,1,5,0,1,zero(UInt)}((i, j, k))
           )
         end
-        ir = 0:(AU == 1 ? 4 : 0)
-        jr = 0:(AU == 2 ? 4 : 0)
-        kr = 0:(AU == 3 ? 4 : 0)
+        ir = 0:(AU==1 ? 4 : 0)
+        jr = 0:(AU==2 ? 4 : 0)
+        kr = 0:(AU==3 ? 4 : 0)
         xvs = getindex.(Ref(B), i .+ ir, j .+ jr, k .+ kr)
         @test xvs ≈ map(VectorizationBase.vsum, [v1, v2, v3, v4, v5])
       end
@@ -771,9 +824,9 @@ include("testsetup.jl")
         pointer(x),
         j,
         (i * VectorizationBase.static_sizeof(Int)),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
+        Static.False(),
+        Static.False(),
+        Static.False(),
         VectorizationBase.register_size()
       )
       i += 1
@@ -784,9 +837,9 @@ include("testsetup.jl")
         j,
         (VectorizationBase.static_sizeof(Int) * i),
         Mask{1}(0xff),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
+        Static.False(),
+        Static.False(),
+        Static.False(),
         VectorizationBase.register_size()
       )
       i += 1
@@ -796,9 +849,9 @@ include("testsetup.jl")
         pointer(x),
         j,
         VectorizationBase.lazymul(i, VectorizationBase.static_sizeof(Int)),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
+        Static.False(),
+        Static.False(),
+        Static.False(),
         VectorizationBase.register_size()
       )
       i += 1
@@ -809,9 +862,9 @@ include("testsetup.jl")
         j,
         VectorizationBase.lazymul(VectorizationBase.static_sizeof(Int), i),
         Mask{1}(0xff),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
-        VectorizationBase.False(),
+        Static.False(),
+        Static.False(),
+        Static.False(),
         VectorizationBase.register_size()
       )
       i += 1
@@ -893,21 +946,21 @@ include("testsetup.jl")
     SizedWrapper{M,N}(A::AT) where {M,N,T,AT<:AbstractMatrix{T}} =
       SizedWrapper{M,N,T,AT}(A)
     Base.size(::SizedWrapper{M,N}) where {M,N} = (M, N)
-    VectorizationBase.static_size(::SizedWrapper{M,N}) where {M,N} =
+    StaticArrayInterface.static_size(::SizedWrapper{M,N}) where {M,N} =
       (StaticInt(M), StaticInt(N))
     Base.getindex(A::SizedWrapper, i...) = getindex(parent(A), i...)
     Base.parent(dw::SizedWrapper) = dw.A
     VectorizationBase.ArrayInterface.parent_type(
       ::Type{SizedWrapper{M,N,T,AT}}
     ) where {M,N,T,AT} = AT
-    VectorizationBase.memory_reference(dw::SizedWrapper) =
-      VectorizationBase.memory_reference(parent(dw))
-    VectorizationBase.contiguous_axis(::Type{A}) where {A<:SizedWrapper} =
-      VectorizationBase.contiguous_axis(
+    LayoutPointers.memory_reference(dw::SizedWrapper) =
+      LayoutPointers.memory_reference(parent(dw))
+    StaticArrayInterface.contiguous_axis(::Type{A}) where {A<:SizedWrapper} =
+      StaticArrayInterface.contiguous_axis(
         VectorizationBase.ArrayInterface.parent_type(A)
       )
-    VectorizationBase.contiguous_batch_size(dw::SizedWrapper) =
-      VectorizationBase.contiguous_batch_size(parent(dw))
+    StaticArrayInterface.contiguous_batch_size(dw::SizedWrapper) =
+      StaticArrayInterface.contiguous_batch_size(parent(dw))
     VectorizationBase.stride_rank(::Type{A}) where {A<:SizedWrapper} =
       VectorizationBase.stride_rank(
         VectorizationBase.ArrayInterface.parent_type(A)
@@ -936,11 +989,11 @@ include("testsetup.jl")
         At = ai ? A : (similar(A')')
         Bt = bi ? B : (similar(B')')
         Ct = ci ? C : (similar(C')')
-        spdw = VectorizationBase.DensePointerWrapper{(true, true)}(
+        spdw = LayoutPointers.DensePointerWrapper{(true, true)}(
           VectorizationBase.stridedpointer(At)
         )
         gsp, pres = @inferred(
-          VectorizationBase.grouped_strided_pointer(
+          LayoutPointers.grouped_strided_pointer(
             (spdw, Bt, Ct),
             Val{(((1, 1), (3, 1)), ((1, 2), (2, 1)), ((2, 2), (3, 2)))}()
           )
@@ -952,13 +1005,13 @@ include("testsetup.jl")
         @test sizeof(gsp) ==
               sizeof(Int) * (6 - (ai & ci) - ((!ai) & bi) - ((!bi) & (!ci)))
         @test sizeof(gsp.offsets) == 0
-        pA, pB, pC = @inferred(VectorizationBase.stridedpointers(gsp))
+        pA, pB, pC = @inferred(LayoutPointers.stridedpointers(gsp))
         @test pA === stridedpointer(At)
         @test pB === stridedpointer(Bt)
         @test pC === stridedpointer(Ct)
         Btsw = SizedWrapper{K,N}(Bt)
         gsp2, pres2 = @inferred(
-          VectorizationBase.grouped_strided_pointer(
+          LayoutPointers.grouped_strided_pointer(
             (At, Btsw, Ct),
             Val{(((1, 1), (3, 1)), ((1, 2), (2, 1)), ((2, 2), (3, 2)))}()
           )
@@ -966,7 +1019,7 @@ include("testsetup.jl")
         @test sizeof(gsp2) ==
               sizeof(Int) * (5 - (ai & ci) - ((!ai) & bi) - ((!bi) & (!ci)))
 
-        pA2, pB2, pC2 = @inferred(VectorizationBase.stridedpointers(gsp2))
+        pA2, pB2, pC2 = @inferred(LayoutPointers.stridedpointers(gsp2))
         @test pointer(pA2) == pointer(At)
         @test pointer(pB2) == pointer(Bt)
         @test pointer(pC2) == pointer(Ct)
@@ -977,10 +1030,10 @@ include("testsetup.jl")
     end
 
     data_in_large = Array{Float64}(undef, 4, 4, 4, 4, 1)
-    data_in = view(data_in_large, :, 1, :, :, 1)
+    data_in = view(data_in_large,:,1,:,:,1)
     tmp1 = Array{Float64}(undef, 4, 4, 4)
-    sp_data_in, sp_tmp1 = VectorizationBase.stridedpointers(
-      VectorizationBase.grouped_strided_pointer(
+    sp_data_in, sp_tmp1 = LayoutPointers.stridedpointers(
+      LayoutPointers.grouped_strided_pointer(
         (data_in, tmp1),
         Val((((1, 1), (2, 1)),))
       )[1]
@@ -1263,8 +1316,10 @@ include("testsetup.jl")
           Vec(ntuple(_ -> Core.VecElement(rand(I1)), Val(WI)))
         ))
         srange =
-          one(I2):(Bool(VectorizationBase.has_feature(Val(:x86_64_avx512dq))) ?
-                   I2(8sizeof(I1) - 1) : I2(31))
+          one(
+            I2
+          ):(Bool(VectorizationBase.has_feature(Val(:x86_64_avx512dq))) ?
+             I2(8sizeof(I1)-1) : I2(31))
         vi2 = VectorizationBase.VecUnroll((
           Vec(ntuple(_ -> Core.VecElement(rand(srange)), Val(WI))),
           Vec(ntuple(_ -> Core.VecElement(rand(srange)), Val(WI))),
@@ -1453,10 +1508,10 @@ include("testsetup.jl")
       end
 
       vi2 = VectorizationBase.VecUnroll((
-        Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(WI))),
-        Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(WI))),
-        Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(WI))),
-        Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(WI)))
+        Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(WI))),
+        Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(WI))),
+        Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(WI))),
+        Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(WI)))
       ))
       vones, vi2f, vtwos = promote(1.0, vi2, 2.0f0) # promotes a binary function, right? Even when used with three args?
       @test vones === VectorizationBase.VecUnroll((
@@ -1536,9 +1591,9 @@ include("testsetup.jl")
       @test tovector(clamp(m1, 2:i)) == clamp.(tovector(m1), 2, i)
       @test tovector(mod(m1, 1:i)) == mod1.(tovector(m1), i)
 
-      @test VectorizationBase.vdivrem.(1:30, 1:30') == divrem.(1:30, 1:30')
-      @test VectorizationBase.vcld.(1:30, 1:30') == cld.(1:30, 1:30')
-      @test VectorizationBase.vrem.(1:30, 1:30') == rem.(1:30, 1:30')
+      @test VectorizationBase.vdivrem.(1:30, 1:(30')) == divrem.(1:30, 1:(30'))
+      @test VectorizationBase.vcld.(1:30, 1:(30')) == cld.(1:30, 1:(30'))
+      @test VectorizationBase.vrem.(1:30, 1:(30')) == rem.(1:30, 1:(30'))
 
       @test gcd(Vec(42, 64, 0, -37), Vec(18, 96, -38, 0)) === Vec(6, 32, 38, 37)
       @test lcm(Vec(24, 16, 42, 0), Vec(18, 12, 18, 17)) === Vec(72, 48, 126, 0)
@@ -2028,27 +2083,30 @@ include("testsetup.jl")
         1
       ))
     ) === StaticInt{8}()
-    @test VectorizationBase.CartesianVIndex((StaticInt(-4), StaticInt(7))):VectorizationBase.CartesianVIndex((
-      StaticInt(14),
-      StaticInt(73)
-    )) === CartesianIndices((
+    @test VectorizationBase.CartesianVIndex((
+      StaticInt(-4),
+      StaticInt(7)
+    )):VectorizationBase.CartesianVIndex((StaticInt(14), StaticInt(73))) ===
+          CartesianIndices((
       StaticInt(-4):StaticInt(14),
       StaticInt(7):StaticInt(73)
     ))
-    @test VectorizationBase.maybestaticfirst(CartesianIndices(A)):VectorizationBase.maybestaticlast(
+    @test VectorizationBase.maybestaticfirst(
       CartesianIndices(A)
-    ) == CartesianIndices(A)
-    @test VectorizationBase.maybestaticfirst(CartesianIndices(A)):VectorizationBase.maybestaticlast(
+    ):VectorizationBase.maybestaticlast(CartesianIndices(A)) ==
+          CartesianIndices(A)
+    @test VectorizationBase.maybestaticfirst(
       CartesianIndices(A)
-    ) === CartesianIndices(map(i -> VectorizationBase.One():i, size(A)))
+    ):VectorizationBase.maybestaticlast(CartesianIndices(A)) ===
+          CartesianIndices(map(i -> VectorizationBase.One():i, size(A)))
   end
   println("Promotion")
   @time @testset "Promotion" begin
     vi2 = VectorizationBase.VecUnroll((
-      Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(W64))),
-      Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(W64))),
-      Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(W64))),
-      Vec(ntuple(_ -> Core.VecElement(rand(1:M-1)), Val(W64)))
+      Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(W64))),
+      Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(W64))),
+      Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(W64))),
+      Vec(ntuple(_ -> Core.VecElement(rand(1:(M-1))), Val(W64)))
     ))
     vones, vi2f, vtwos = @inferred(promote(1.0, vi2, 2.0f0)) # promotes a binary function, right? Even when used with three args?
     @test vones === VectorizationBase.VecUnroll((
@@ -2299,7 +2357,7 @@ include("testsetup.jl")
     ) === typemax(Int32)
     v = Vec(
       ntuple(
-        _ -> rand(typemax(UInt)>>1+one(UInt):typemax(UInt)),
+        _ -> rand((typemax(UInt)>>1+one(UInt)):typemax(UInt)),
         VectorizationBase.pick_vector_width(UInt)
       )...
     )
diff --git a/test/testsetup.jl b/test/testsetup.jl
index 5affe0cd..bb4568a7 100644
--- a/test/testsetup.jl
+++ b/test/testsetup.jl
@@ -15,14 +15,14 @@ function tovector(u::VectorizationBase.VecUnroll{_N,W,_T}) where {_N,W,_T}
   x = Vector{T}(undef, N * W)
   for n ∈ 1:N
     v = VectorizationBase.data(u)[n]
-    for w ∈ 0:W-1
+    for w ∈ 0:(W-1)
       x[(i+=1)] = VectorizationBase.extractelement(v, w)
     end
   end
   x
 end
 tovector(v::VectorizationBase.AbstractSIMDVector{W}) where {W} =
-  [VectorizationBase.extractelement(v, w) for w ∈ 0:W-1]
+  [VectorizationBase.extractelement(v, w) for w ∈ 0:(W-1)]
 tovector(v::VectorizationBase.LazyMulAdd) =
   tovector(VectorizationBase._materialize(v))
 tovector(x) = x