From 7afeef30d3787c914af8b7293d92723dff162c86 Mon Sep 17 00:00:00 2001 From: Bond_009 Date: Thu, 10 Feb 2022 00:38:23 +0100 Subject: [PATCH 1/6] Optimize Crc32 for arm Adds a fast path in Crc32 for arm and arm64 --- src/ImageSharp/Compression/Zlib/Crc32.cs | 114 ++++++++++++++++++++++- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Compression/Zlib/Crc32.cs b/src/ImageSharp/Compression/Zlib/Crc32.cs index 075d6112a1..8ee0d5b009 100644 --- a/src/ImageSharp/Compression/Zlib/Crc32.cs +++ b/src/ImageSharp/Compression/Zlib/Crc32.cs @@ -7,6 +7,9 @@ #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +#if NET5_0_OR_GREATER +using ArmCrc32 = System.Runtime.Intrinsics.Arm.Crc32; +#endif #endif namespace SixLabors.ImageSharp.Compression.Zlib @@ -65,13 +68,19 @@ public static uint Calculate(uint crc, ReadOnlySpan buffer) { return ~CalculateSse(~crc, buffer); } - else + +#if NET5_0_OR_GREATER + if (ArmCrc32.Arm64.IsSupported) { - return ~CalculateScalar(~crc, buffer); + return ~CalculateArm64(~crc, buffer); } -#else - return ~CalculateScalar(~crc, buffer); + else if (ArmCrc32.IsSupported) + { + return ~CalculateArm(~crc, buffer); + } +#endif #endif + return ~CalculateScalar(~crc, buffer); } #if SUPPORTS_RUNTIME_INTRINSICS @@ -198,6 +207,103 @@ private static unsafe uint CalculateSse(uint crc, ReadOnlySpan buffer) } } } + +#if NET5_0_OR_GREATER + + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + private static unsafe uint CalculateArm(uint crc, ReadOnlySpan buffer) + { + fixed (byte* bufferPtr = buffer) + { + byte* localBufferPtr = bufferPtr; + int len = buffer.Length; + + while (len > 0 && ((ulong)localBufferPtr & 3) != 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + uint* intBufferPtr = (uint*)localBufferPtr; + + while (len >= 8 * sizeof(uint)) + { + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + len -= 8 * sizeof(uint); + } + + while (len >= sizeof(uint)) + { + crc = ArmCrc32.ComputeCrc32(crc, *intBufferPtr++); + len -= sizeof(uint); + } + + localBufferPtr = (byte*)intBufferPtr; + + while (len > 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + return crc; + } + } + + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + private static unsafe uint CalculateArm64(uint crc, ReadOnlySpan buffer) + { + fixed (byte* bufferPtr = buffer) + { + byte* localBufferPtr = bufferPtr; + int len = buffer.Length; + + while (len > 0 && ((ulong)localBufferPtr & 7) != 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + ulong* longBufferPtr = (ulong*)localBufferPtr; + + while (len >= 8 * sizeof(ulong)) + { + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + len -= 8 * sizeof(ulong); + } + + while (len >= sizeof(ulong)) + { + crc = ArmCrc32.Arm64.ComputeCrc32(crc, *longBufferPtr++); + len -= sizeof(ulong); + } + + localBufferPtr = (byte*)longBufferPtr; + + while (len > 0) + { + crc = ArmCrc32.ComputeCrc32(crc, *localBufferPtr++); + len--; + } + + return crc; + } + } +#endif #endif [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] From 866a0cd1a33418fa05c3df1ab7febb1465a9d274 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 15 Feb 2022 13:58:44 +0100 Subject: [PATCH 2/6] Add crc tests with and without hardware intrinsics --- .../Formats/Png/Crc32Tests.cs | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs index 6bdad6ed4d..f00f8a0616 100644 --- a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs @@ -3,6 +3,7 @@ using System; using SixLabors.ImageSharp.Compression.Zlib; +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using SharpCrc32 = ICSharpCode.SharpZipLib.Checksum.Crc32; @@ -15,10 +16,7 @@ public class Crc32Tests [InlineData(0)] [InlineData(1)] [InlineData(2)] - public void ReturnsCorrectWhenEmpty(uint input) - { - Assert.Equal(input, Crc32.Calculate(input, default)); - } + public void CalculateCrc_ReturnsCorrectResultWhenEmpty(uint input) => Assert.Equal(input, Crc32.Calculate(input, default)); [Theory] [InlineData(0)] @@ -28,24 +26,51 @@ public void ReturnsCorrectWhenEmpty(uint input) [InlineData(1024 + 15)] [InlineData(2034)] [InlineData(4096)] - public void MatchesReference(int length) + public void CalculateCrc_MatchesReference(int length) { - var data = GetBuffer(length); + // arrange + byte[] data = GetBuffer(length); var crc = new SharpCrc32(); crc.Update(data); - long expected = crc.Value; + + // act long actual = Crc32.Calculate(data); + // assert Assert.Equal(expected, actual); } private static byte[] GetBuffer(int length) { - var data = new byte[length]; + byte[] data = new byte[length]; new Random(1).NextBytes(data); return data; } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void RunCalculateCrcTest_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.AllowAll); + + [Fact] + public void RunCalculateCrcTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.DisableHWIntrinsic); + + private static void RunCalculateCrcTest() + { + // arrange + int length = 4096; + byte[] data = GetBuffer(length); + var crc = new SharpCrc32(); + crc.Update(data); + long expected = crc.Value; + + // act + long actual = Crc32.Calculate(data); + + // assert + Assert.Equal(expected, actual); + } +#endif } } From 3a40b38c6b002549f11db3fbcdaf0860d6680f98 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 15 Feb 2022 15:03:46 +0100 Subject: [PATCH 3/6] Remove redundant else --- src/ImageSharp/Compression/Zlib/Crc32.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Compression/Zlib/Crc32.cs b/src/ImageSharp/Compression/Zlib/Crc32.cs index 8ee0d5b009..16ec80285a 100644 --- a/src/ImageSharp/Compression/Zlib/Crc32.cs +++ b/src/ImageSharp/Compression/Zlib/Crc32.cs @@ -74,7 +74,8 @@ public static uint Calculate(uint crc, ReadOnlySpan buffer) { return ~CalculateArm64(~crc, buffer); } - else if (ArmCrc32.IsSupported) + + if (ArmCrc32.IsSupported) { return ~CalculateArm(~crc, buffer); } From f82a7d67e2d5dfbdfafb6011b15908f3428b764c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 15 Feb 2022 15:08:35 +0100 Subject: [PATCH 4/6] Avoid code duplication in crc test --- .../Formats/Png/Crc32Tests.cs | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs index f00f8a0616..0e2fca91e2 100644 --- a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs @@ -26,7 +26,9 @@ public class Crc32Tests [InlineData(1024 + 15)] [InlineData(2034)] [InlineData(4096)] - public void CalculateCrc_MatchesReference(int length) + public void CalculateCrc_MatchesReference(int length) => CalculateCrcAndCompareToReference(length); + + private static void CalculateCrcAndCompareToReference(int length) { // arrange byte[] data = GetBuffer(length); @@ -56,21 +58,7 @@ private static byte[] GetBuffer(int length) [Fact] public void RunCalculateCrcTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.DisableHWIntrinsic); - private static void RunCalculateCrcTest() - { - // arrange - int length = 4096; - byte[] data = GetBuffer(length); - var crc = new SharpCrc32(); - crc.Update(data); - long expected = crc.Value; - - // act - long actual = Crc32.Calculate(data); - - // assert - Assert.Equal(expected, actual); - } + private static void RunCalculateCrcTest() => CalculateCrcAndCompareToReference(4096); #endif } } From 0db17716b47e569529f4919cae71dc04af67d3ec Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sat, 19 Feb 2022 20:58:28 +0100 Subject: [PATCH 5/6] Execute crc intrinsics tests with different test data --- tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs index 0e2fca91e2..3115f8c2a8 100644 --- a/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/Crc32Tests.cs @@ -58,7 +58,14 @@ private static byte[] GetBuffer(int length) [Fact] public void RunCalculateCrcTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCalculateCrcTest, HwIntrinsics.DisableHWIntrinsic); - private static void RunCalculateCrcTest() => CalculateCrcAndCompareToReference(4096); + private static void RunCalculateCrcTest() + { + int[] testData = { 0, 8, 215, 1024, 1024 + 15, 2034, 4096 }; + for (int i = 0; i < testData.Length; i++) + { + CalculateCrcAndCompareToReference(testData[i]); + } + } #endif } } From b6bb0063fac824ad635e75e29dbf92ed7ff000f4 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 20 Feb 2022 19:42:11 +0100 Subject: [PATCH 6/6] Add support for disabling ARM intrinsics --- .../FeatureTesting/FeatureTestRunner.cs | 12 ++- .../Tests/FeatureTestRunnerTests.cs | 73 +++++++++++++++++-- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 0d2f3fcefb..2185f4bf34 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -14,7 +14,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities /// public static class FeatureTestRunner { - private static readonly char[] SplitChars = new[] { ',', ' ' }; + private static readonly char[] SplitChars = { ',', ' ' }; /// /// Allows the deserialization of parameters passed to the feature test. @@ -349,7 +349,7 @@ public static void RunWithHwIntrinsicsFeature( internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) { - // Loop through and translate the given values into COMPlus equivaluents + // Loop through and translate the given values into COMPlus equivalents. var features = new Dictionary(); foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) { @@ -407,6 +407,12 @@ public enum HwIntrinsics DisableBMI1 = 1 << 14, DisableBMI2 = 1 << 15, DisableLZCNT = 1 << 16, - AllowAll = 1 << 17 + DisableArm64AdvSimd = 1 << 17, + DisableArm64Crc32 = 1 << 18, + DisableArm64Dp = 1 << 19, + DisableArm64Aes = 1 << 20, + DisableArm64Sha1 = 1 << 21, + DisableArm64Sha256 = 1 << 22, + AllowAll = 1 << 23 } } diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index a2f36c85a8..a5ca246177 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -7,6 +7,10 @@ using System.Numerics; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics.X86; +using Aes = System.Runtime.Intrinsics.X86.Aes; +#if NET5_0_OR_GREATER +using System.Runtime.Intrinsics.Arm; +#endif #endif using Xunit; using Xunit.Abstractions; @@ -16,11 +20,11 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public class FeatureTestRunnerTests { public static TheoryData Intrinsics => - new TheoryData + new() { - { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, - { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, - { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new[] { "FeatureSIMD", "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new[] { "EnableSSE42", "EnableAVX" } } }; [Theory] @@ -56,12 +60,9 @@ public void AllowsAllHwIntrinsicFeatures() } [Fact] - public void CanLimitHwIntrinsicSIMDFeatures() - { - FeatureTestRunner.RunWithHwIntrinsicsFeature( + public void CanLimitHwIntrinsicSIMDFeatures() => FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.False(Vector.IsHardwareAccelerated), HwIntrinsics.DisableSIMD); - } #if SUPPORTS_RUNTIME_INTRINSICS [Fact] @@ -121,6 +122,14 @@ static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) Assert.False(Bmi1.IsSupported); Assert.False(Bmi2.IsSupported); Assert.False(Lzcnt.IsSupported); +#if NET5_0_OR_GREATER + Assert.False(AdvSimd.IsSupported); + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + Assert.False(Crc32.IsSupported); + Assert.False(Dp.IsSupported); + Assert.False(Sha1.IsSupported); + Assert.False(Sha256.IsSupported); +#endif break; case HwIntrinsics.DisableSSE: Assert.False(Sse.IsSupported); @@ -167,6 +176,26 @@ static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) case HwIntrinsics.DisableLZCNT: Assert.False(Lzcnt.IsSupported); break; +#if NET5_0_OR_GREATER + case HwIntrinsics.DisableArm64AdvSimd: + Assert.False(AdvSimd.IsSupported); + break; + case HwIntrinsics.DisableArm64Aes: + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + break; + case HwIntrinsics.DisableArm64Crc32: + Assert.False(Crc32.IsSupported); + break; + case HwIntrinsics.DisableArm64Dp: + Assert.False(Dp.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha1: + Assert.False(Sha1.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha256: + Assert.False(Sha256.IsSupported); + break; +#endif #endif } } @@ -226,6 +255,14 @@ static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrin Assert.False(Bmi1.IsSupported); Assert.False(Bmi2.IsSupported); Assert.False(Lzcnt.IsSupported); +#if NET5_0_OR_GREATER + Assert.False(AdvSimd.IsSupported); + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + Assert.False(Crc32.IsSupported); + Assert.False(Dp.IsSupported); + Assert.False(Sha1.IsSupported); + Assert.False(Sha256.IsSupported); +#endif break; case HwIntrinsics.DisableSSE: Assert.False(Sse.IsSupported); @@ -272,6 +309,26 @@ static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrin case HwIntrinsics.DisableLZCNT: Assert.False(Lzcnt.IsSupported); break; +#if NET5_0_OR_GREATER + case HwIntrinsics.DisableArm64AdvSimd: + Assert.False(AdvSimd.IsSupported); + break; + case HwIntrinsics.DisableArm64Aes: + Assert.False(System.Runtime.Intrinsics.Arm.Aes.IsSupported); + break; + case HwIntrinsics.DisableArm64Crc32: + Assert.False(Crc32.IsSupported); + break; + case HwIntrinsics.DisableArm64Dp: + Assert.False(Dp.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha1: + Assert.False(Sha1.IsSupported); + break; + case HwIntrinsics.DisableArm64Sha256: + Assert.False(Sha256.IsSupported); + break; +#endif #endif } }