From 4466b7855751b3644ea8171c71a2ba7d72956a85 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Fri, 19 Nov 2021 11:59:10 +0100 Subject: [PATCH] use SIMD.jl for explicit vectorization of partial operations Alternative to https://github.com/JuliaDiff/ForwardDiff.jl/pull/555 Co-authored-by: Yingbo Ma --- .github/workflows/ci.yml | 1 - Project.toml | 4 +++- src/partials.jl | 47 +++++++++++++++++++++------------------- test/PartialsTest.jl | 2 +- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f83be7ce..23735ec5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,6 @@ jobs: fail-fast: false matrix: version: - - '1.0' - '1' - 'nightly' os: diff --git a/Project.toml b/Project.toml index 3318b46d..5c5fd7fc 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" @@ -24,9 +25,10 @@ DiffTests = "0.0.1, 0.1" LogExpFunctions = "0.3" NaNMath = "0.2.2, 0.3" Preferences = "1" +SIMD = "3" SpecialFunctions = "0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10, 1.0" StaticArrays = "0.8.3, 0.9, 0.10, 0.11, 0.12, 1.0" -julia = "1" +julia = "1.6" [extras] Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" diff --git a/src/partials.jl b/src/partials.jl index fce67b0a..26c26c01 100644 --- a/src/partials.jl +++ b/src/partials.jl @@ -197,29 +197,32 @@ end return tupexpr(i -> :(rand(V)), N) end -@generated function scale_tuple(tup::NTuple{N}, x) where N - return tupexpr(i -> :(tup[$i] * x), N) -end - -@generated function div_tuple_by_scalar(tup::NTuple{N}, x) where N - return tupexpr(i -> :(tup[$i] / x), N) -end - -@generated function add_tuples(a::NTuple{N}, b::NTuple{N}) where N - return tupexpr(i -> :(a[$i] + b[$i]), N) -end -@generated function sub_tuples(a::NTuple{N}, b::NTuple{N}) where N - return tupexpr(i -> :(a[$i] - b[$i]), N) -end - -@generated function minus_tuple(tup::NTuple{N}) where N - return tupexpr(i -> :(-tup[$i]), N) -end - -@generated function mul_tuples(a::NTuple{N}, b::NTuple{N}, afactor, bfactor) where N - return tupexpr(i -> :((afactor * a[$i]) + (bfactor * b[$i])), N) -end +const SIMDFloat = Union{Float64, Float32} +const SIMDInt = Union{ + Int128, Int64, Int32, Int16, Int8, + UInt128, UInt64, UInt32, UInt16, UInt8, + } +const SIMDType = Union{SIMDFloat, SIMDInt} +const NT{N,T} = NTuple{N,T} +using SIMD + +# SIMD implementation +add_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) + Vec(b)) +sub_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) - Vec(b)) +scale_tuple(tup::NT{N,T}, x::T) where {N, T<:SIMDType} = Tuple(Vec(tup) * x) +div_tuple_by_scalar(tup::NT{N,T}, x::T) where {N, T<:SIMDFloat} = Tuple(Vec(tup) / x) +minus_tuple(tup::NT{N,T}) where {N, T<:SIMDType} = Tuple(-Vec(tup)) +mul_tuples(a::NT{N,T}, b::NT{N,T}, af::T, bf::T) where {N, T<:SIMDType} = Tuple(muladd(Vec{N,T}(af), Vec(a), Vec{N,T}(bf) * Vec(b))) + + +# Fallback implementations +@generated add_tuples(a::NT{N}, b::NT{N}) where N = tupexpr(i -> :(a[$i] + b[$i]), N) +@generated sub_tuples(a::NT{N}, b::NT{N}) where N = tupexpr(i -> :(a[$i] - b[$i]), N) +@generated scale_tuple(tup::NT{N}, x) where N = tupexpr(i -> :(tup[$i] * x), N) +@generated div_tuple_by_scalar(tup::NT{N}, x) where N = tupexpr(i -> :(tup[$i] / x), N) +@generated minus_tuple(tup::NT{N}) where N = tupexpr(i -> :(-tup[$i]), N) +@generated mul_tuples(a::NT{N}, b::NT{N}, af, bf) where N = tupexpr(i -> :((af * a[$i]) + (bf * b[$i])), N) ################### # Pretty Printing # diff --git a/test/PartialsTest.jl b/test/PartialsTest.jl index 39fb05d7..08e76bda 100644 --- a/test/PartialsTest.jl +++ b/test/PartialsTest.jl @@ -114,7 +114,7 @@ for N in (0, 3), T in (Int, Float32, Float64) if N > 0 @test ForwardDiff._div_partials(PARTIALS, PARTIALS2, X, Y) == ForwardDiff._mul_partials(PARTIALS, PARTIALS2, inv(Y), -X/(Y^2)) - @test ForwardDiff._mul_partials(PARTIALS, PARTIALS2, X, Y).values == map((a, b) -> (X * a) + (Y * b), VALUES, VALUES2) + @test all(isapprox.(ForwardDiff._mul_partials(PARTIALS, PARTIALS2, X, Y).values, map((a, b) -> (X * a) + (Y * b), VALUES, VALUES2))) @test ForwardDiff._mul_partials(ZERO_PARTIALS, PARTIALS, X, Y) == Y * PARTIALS @test ForwardDiff._mul_partials(PARTIALS, ZERO_PARTIALS, X, Y) == X * PARTIALS