diff --git a/Directory.Build.props b/Directory.Build.props index 73144201c7..bdca231554 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -114,4 +114,11 @@ true + + $(Configuration.EndsWith('-Intrinsics')) + + + + $(RepoRoot)build\AfterCommonTargets.targets + diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index 140c93753c..18d9d3867e 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -97,6 +97,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer.Tests", "test\Microsoft.ML.CodeAnalyzer.Tests\Microsoft.ML.CodeAnalyzer.Tests.csproj", "{3E4ABF07-7970-4BE6-B45B-A13D3C397545}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CpuMath.PerformanceTests", "test\Microsoft.ML.CpuMath.PerformanceTests\Microsoft.ML.CpuMath.PerformanceTests.csproj", "{7333EDEF-4144-405C-A5EC-6F42201857D8}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CpuMath.UnitTests.netstandard", "test\Microsoft.ML.CpuMath.UnitTests.netstandard\Microsoft.ML.CpuMath.UnitTests.netstandard.csproj", "{A0E562A9-0E6D-470D-B180-6EB44BA84D60}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CpuMath.UnitTests.netcoreapp", "test\Microsoft.ML.CpuMath.UnitTests.netcoreapp\Microsoft.ML.CpuMath.UnitTests.netcoreapp.csproj", "{5F81A2A4-73AD-494C-B387-07D605EC8826}" +EndProject + Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Microsoft.ML.FSharp.Tests", "test\Microsoft.ML.FSharp.Tests\Microsoft.ML.FSharp.Tests.fsproj", "{802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.ImageAnalytics", "src\Microsoft.ML.ImageAnalytics\Microsoft.ML.ImageAnalytics.csproj", "{00E38F77-1E61-4CDF-8F97-1417D4E85053}" @@ -335,6 +342,30 @@ Global {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release|Any CPU.Build.0 = Release|Any CPU {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Release|Any CPU.Build.0 = Release|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU + {7333EDEF-4144-405C-A5EC-6F42201857D8}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Release|Any CPU.Build.0 = Release|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU + {A0E562A9-0E6D-470D-B180-6EB44BA84D60}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Release|Any CPU.Build.0 = Release|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU + {5F81A2A4-73AD-494C-B387-07D605EC8826}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug|Any CPU.Build.0 = Debug|Any CPU {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU @@ -395,6 +426,9 @@ Global {001F3B4E-FBE4-4001-AFD2-A6A989CD1C25} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {BF66A305-DF10-47E4-8D81-42049B149D2B} = {D3D38B03-B557-484D-8348-8BADEE4DF592} + {7333EDEF-4144-405C-A5EC-6F42201857D8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} + {A0E562A9-0E6D-470D-B180-6EB44BA84D60} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} + {5F81A2A4-73AD-494C-B387-07D605EC8826} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {B4E55B2D-2A92-46E7-B72F-E76D6FD83440} = {7F13E156-3EBA-4021-84A5-CD56BA72F99E} {3E4ABF07-7970-4BE6-B45B-A13D3C397545} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {802233D6-8CC0-46AD-9F23-FEE1E9AED9B3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} diff --git a/build.proj b/build.proj index f8e4adaf5f..cb5c557cbe 100644 --- a/build.proj +++ b/build.proj @@ -41,7 +41,8 @@ + Targets="Restore" + Properties="MSBuildWarningsAsMessages=NU1503" /> + + $(MSBuildAllProjects);$(MSBuildThisFileFullPath) + + + + + \ No newline at end of file diff --git a/build/Empty.targets b/build/Empty.targets new file mode 100644 index 0000000000..72abf9cd60 --- /dev/null +++ b/build/Empty.targets @@ -0,0 +1,29 @@ + + + $(MSBuildAllProjects);$(MSBuildThisFileFullPath) + + ignore.targets + + + + + + + + + + + + \ No newline at end of file diff --git a/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs b/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs new file mode 100644 index 0000000000..6c6c1fe6ad --- /dev/null +++ b/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs @@ -0,0 +1,396 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Intrinsics.X86; +using System; + +namespace Microsoft.ML.Runtime.Internal.CpuMath +{ + public static partial class CpuMathUtils + { + public static void Scale(float a, float[] dst, int count) + { + Contracts.AssertNonEmpty(dst); + Contracts.Assert(0 < count && count <= dst.Length); + + Scale(a, new Span(dst, 0, count)); + } + + public static void Scale(float a, float[] dst, int offset, int count) + { + Contracts.AssertNonEmpty(dst); + Contracts.Assert(0 < count); + Contracts.Assert(0 <= offset && offset < dst.Length - count); + + Scale(a, new Span(dst, offset, count)); + } + + private static void Scale(float a, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.ScaleU(a, dst); + } + else + { + for (int i = 0; i < dst.Length; i++) + { + dst[i] *= a; + } + } + } + + public static void AddScale(float a, float[] src, float[] dst, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(count <= dst.Length); + + AddScale(a, new Span(src, 0, count), new Span(dst, 0, count)); + } + + public static void AddScale(float a, float[] src, float[] dst, int dstOffset, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(count <= src.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(0 <= dstOffset && dstOffset < dst.Length); + Contracts.Assert(0 < count && count <= dst.Length - dstOffset); + + AddScale(a, new Span(src, 0, count), new Span(dst, dstOffset, count)); + } + + private static void AddScale(float a, Span src, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.AddScaleU(a, src, dst); + } + else + { + for (int i = 0; i < dst.Length; i++) + { + dst[i] += a * src[i]; + } + } + } + + public static void AddScale(float a, float[] src, int[] indices, float[] dst, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(indices); + Contracts.Assert(count <= indices.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(count < dst.Length); + + AddScale(a, new Span(src), new Span(indices, 0, count), new Span(dst)); + } + + public static void AddScale(float a, float[] src, int[] indices, float[] dst, int dstOffset, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(indices); + Contracts.Assert(count <= indices.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(0 <= dstOffset && dstOffset < dst.Length); + Contracts.Assert(count < dst.Length - dstOffset); + + AddScale(a, new Span(src), new Span(indices, 0, count), + new Span(dst, dstOffset, dst.Length - dstOffset)); + } + + private static void AddScale(float a, Span src, Span indices, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.AddScaleSU(a, src, indices, dst); + } + else + { + for (int i = 0; i < indices.Length; i++) + { + int index = indices[i]; + dst[index] += a * src[i]; + } + } + } + + public static void Add(float[] src, float[] dst, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(count <= dst.Length); + + Add(new Span(src, 0, count), new Span(dst, 0, count)); + } + + private static void Add(Span src, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.AddU(src, dst); + } + else + { + for (int i = 0; i < dst.Length; i++) + { + dst[i] += src[i]; + } + } + } + + public static void Add(float[] src, int[] indices, float[] dst, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(indices); + Contracts.Assert(count <= indices.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(count < dst.Length); + + Add(new Span(src), new Span(indices, 0, count), new Span(dst)); + } + + public static void Add(float[] src, int[] indices, float[] dst, int dstOffset, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + Contracts.AssertNonEmpty(indices); + Contracts.Assert(count <= indices.Length); + Contracts.AssertNonEmpty(dst); + Contracts.Assert(0 <= dstOffset && dstOffset < dst.Length); + Contracts.Assert(count <= dst.Length - dstOffset); + + Add(new Span(src), new Span(indices, 0, count), + new Span(dst, dstOffset, dst.Length - dstOffset)); + } + + private static void Add(Span src, Span indices, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.AddSU(src, indices, dst); + } + else + { + for (int i = 0; i < indices.Length; i++) + { + int index = indices[i]; + dst[index] += src[i]; + } + } + } + + public static void MulElementWise(float[] src1, float[] src2, float[] dst, int count) + { + Contracts.AssertNonEmpty(src1); + Contracts.Assert(0 < count && count <= src1.Length); + Contracts.AssertNonEmpty(src2); + Contracts.Assert(0 < count && count <= src2.Length); + Contracts.AssertNonEmpty(dst); + + MulElementWise(new Span(src1, 0, count), new Span(src2, 0, count), + new Span(dst, 0, count)); + } + + private static void MulElementWise(Span src1, Span src2, Span dst) + { + if (Sse.IsSupported) + { + SseIntrinsics.MulElementWiseU(src1, src2, dst); + } + else + { + for (int i = 0; i < dst.Length; i++) + { + dst[i] = src1[i] * src2[i]; + } + } + } + + public static float SumSq(float[] src, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + + return SumSq(new Span(src, 0, count)); + } + + public static float SumSq(float[] src, int offset, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count); + Contracts.Assert(0 <= offset && offset <= src.Length - count); + + return SumSq(new Span(src, offset, count)); + } + + private static float SumSq(Span src) + { + if (Sse.IsSupported) + { + return SseIntrinsics.SumSqU(src); + } + else + { + float result = 0; + for (int i = 0; i < src.Length; i++) + { + result += src[i] * src[i]; + } + return result; + } + } + + public static float SumAbs(float[] src, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count && count <= src.Length); + + return SumAbs(new Span(src, 0, count)); + } + + public static float SumAbs(float[] src, int offset, int count) + { + Contracts.AssertNonEmpty(src); + Contracts.Assert(0 < count); + Contracts.Assert(0 <= offset && offset <= src.Length - count); + + return SumAbs(new Span(src, offset, count)); + } + + private static float SumAbs(Span src) + { + if (Sse.IsSupported) + { + return SseIntrinsics.SumAbsU(src); + } + else + { + float sum = 0; + for (int i = 0; i < src.Length; i++) + { + sum += Math.Abs(src[i]); + } + return sum; + } + } + + public static float DotProductDense(float[] a, float[] b, int count) + { + Contracts.AssertNonEmpty(a); + Contracts.AssertNonEmpty(b); + Contracts.Assert(0 < count); + Contracts.Assert(a.Length >= count); + Contracts.Assert(b.Length >= count); + + return DotProductDense(new Span(a, 0, count), new Span(b, 0, count)); + } + + public static float DotProductDense(float[] a, int offset, float[] b, int count) + { + Contracts.AssertNonEmpty(a); + Contracts.Assert(0 < count); + Contracts.Assert(0 <= offset && offset <= a.Length - count); + Contracts.AssertNonEmpty(b); + Contracts.Assert(b.Length >= count); + + return DotProductDense(new Span(a, offset, count), new Span(b, 0, count)); + } + + private static float DotProductDense(Span a, Span b) + { + if (Sse.IsSupported) + { + return SseIntrinsics.DotU(a, b); + } + else + { + float result = 0; + for (int i = 0; i < b.Length; i++) + { + result += a[i] * b[i]; + } + return result; + } + } + + public static float DotProductSparse(float[] a, float[] b, int[] indices, int count) + { + Contracts.AssertNonEmpty(a); + Contracts.AssertNonEmpty(b); + Contracts.Assert(0 < count); + Contracts.Assert(count < a.Length); + Contracts.Assert(count <= b.Length); + Contracts.Assert(count <= indices.Length); + + return DotProductSparse(new Span(a), new Span(b), + new Span(indices, 0, count)); + } + + public static float DotProductSparse(float[] a, int offset, float[] b, int[] indices, int count) + { + Contracts.AssertNonEmpty(a); + Contracts.Assert(0 < count); + Contracts.Assert(0 <= offset && offset < a.Length); + Contracts.Assert(a.Length - offset > count); + Contracts.AssertNonEmpty(b); + Contracts.Assert(count <= b.Length); + Contracts.Assert(count <= indices.Length); + + return DotProductSparse(new Span(a, offset, a.Length - offset), + new Span(b), new Span(indices, 0, count)); + } + + private static float DotProductSparse(Span a, Span b, Span indices) + { + if (Sse.IsSupported) + { + return SseIntrinsics.DotSU(a, b, indices); + } + else + { + float result = 0; + for (int i = 0; i < indices.Length; i++) + { + int index = indices[i]; + result += a[index] * b[i]; + } + return result; + } + } + + public static float L2DistSquared(float[] a, float[] b, int count) + { + Contracts.AssertNonEmpty(a); + Contracts.AssertNonEmpty(b); + Contracts.Assert(0 < count && count <= a.Length); + Contracts.Assert(count <= b.Length); + + return L2DistSquared(new Span(a, 0, count), new Span(b, 0, count)); + } + + private static float L2DistSquared(Span a, Span b) + { + if (Sse.IsSupported) + { + return SseIntrinsics.Dist2(a, b); + } + else + { + float norm = 0; + for (int i = 0; i < b.Length; i++) + { + float distance = a[i] - b[i]; + norm += distance * distance; + } + return norm; + } + } + } +} diff --git a/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs new file mode 100644 index 0000000000..501fc9082e --- /dev/null +++ b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Runtime.Internal.CpuMath +{ + public static partial class CpuMathUtils + { + public static void Scale(float a, float[] dst, int count) => SseUtils.Scale(a, dst, count); + + public static void Scale(float a, float[] dst, int offset, int count) => SseUtils.Scale(a, dst, offset, count); + + public static void AddScale(float a, float[] src, float[] dst, int count) => SseUtils.AddScale(a, src, dst, count); + + public static void AddScale(float a, float[] src, float[] dst, int dstOffset, int count) => SseUtils.AddScale(a, src, dst, dstOffset, count); + + public static void AddScale(float a, float[] src, int[] indices, float[] dst, int count) => SseUtils.AddScale(a, src, indices, dst, count); + + public static void AddScale(float a, float[] src, int[] indices, float[] dst, int dstOffset, int count) => SseUtils.AddScale(a, src, indices, dst, dstOffset, count); + + public static void Add(float[] src, float[] dst, int count) => SseUtils.Add(src, dst, count); + + public static void Add(float[] src, int[] indices, float[] dst, int count) => SseUtils.Add(src, indices, dst, count); + + public static void Add(float[] src, int[] indices, float[] dst, int dstOffset, int count) => SseUtils.Add(src, indices, dst, dstOffset, count); + + public static void MulElementWise(float[] src1, float[] src2, float[] dst, int count) => SseUtils.MulElementWise(src1, src2, dst, count); + + public static float SumSq(float[] src, int count) => SseUtils.SumSq(src, count); + + public static float SumSq(float[] src, int offset, int count) => SseUtils.SumSq(src, offset, count); + + public static float SumAbs(float[] src, int count) => SseUtils.SumAbs(src, count); + + public static float SumAbs(float[] src, int offset, int count) => SseUtils.SumAbs(src, offset, count); + + public static float DotProductDense(float[] a, float[] b, int count) => SseUtils.DotProductDense(a, b, count); + + public static float DotProductDense(float[] a, int offset, float[] b, int count) => SseUtils.DotProductDense(a, offset, b, count); + + public static float DotProductSparse(float[] a, float[] b, int[] indices, int count) => SseUtils.DotProductSparse(a, b, indices, count); + + public static float DotProductSparse(float[] a, int offset, float[] b, int[] indices, int count) => SseUtils.DotProductSparse(a, offset, b, indices, count); + + public static float L2DistSquared(float[] a, float[] b, int count) => SseUtils.L2DistSquared(a, b, count); + } +} diff --git a/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj b/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj index bde7ae89f5..b6c95b93f4 100644 --- a/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj +++ b/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj @@ -2,21 +2,29 @@ Debug;Release;Debug-Intrinsics;Release-Intrinsics - $(Configuration.EndsWith('-Intrinsics')) - netstandard2.0 netstandard2.0;netcoreapp3.0 Microsoft.ML.CpuMath true $(DefineConstants);CORECLR;PRIVATE_CONTRACTS + 7.3 + + + + - + + + + + + \ No newline at end of file diff --git a/src/Microsoft.ML.CpuMath/Sse.cs b/src/Microsoft.ML.CpuMath/Sse.cs index 68e6ee906b..13de22dd5b 100644 --- a/src/Microsoft.ML.CpuMath/Sse.cs +++ b/src/Microsoft.ML.CpuMath/Sse.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; - namespace Microsoft.ML.Runtime.Internal.CpuMath { /// diff --git a/src/Microsoft.ML.CpuMath/SseIntrinsics.cs b/src/Microsoft.ML.CpuMath/SseIntrinsics.cs new file mode 100644 index 0000000000..d11676f283 --- /dev/null +++ b/src/Microsoft.ML.CpuMath/SseIntrinsics.cs @@ -0,0 +1,476 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// The exported function names need to be unique (can't be disambiguated based on signature), hence +// we introduce suffix letters to indicate the general patterns used. +// * U suffix means unaligned and unpadded. +// * S suffix means sparse (unaligned) vector. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Microsoft.ML.Runtime.Internal.CpuMath +{ + internal static class SseIntrinsics + { + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector128 Load1(float* src, int* idx) + { + return Sse.SetScalarVector128(src[idx[0]]); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector128 Load4(float* src, int* idx) + { + return Sse.SetVector128(src[idx[3]], src[idx[2]], src[idx[1]], src[idx[0]]); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static Vector128 Rotate(Vector128 x) + { + // The control byte shuffles the four 32-bit floats of x: ABCD -> BCDA. + return Sse.Shuffle(x, x, 0x39); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static Vector128 RotateReverse(Vector128 x) + { + // The control byte shuffles the four 32-bit floats of x: ABCD -> DABC. + return Sse.Shuffle(x, x, 0x93); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static unsafe void Store4(Vector128 x, float* dst, int* idx) + { + Sse.StoreScalar(dst + idx[0], x); + x = Rotate(x); + Sse.StoreScalar(dst + idx[1], x); + x = Rotate(x); + Sse.StoreScalar(dst + idx[2], x); + x = Rotate(x); + Sse.StoreScalar(dst + idx[3], x); + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private static Vector128 VectorSum(in Vector128 vector) + { + if (Sse3.IsSupported) + { + Vector128 tmp = Sse3.HorizontalAdd(vector, vector); + return Sse3.HorizontalAdd(tmp, tmp); + } + else + { + // SSE3 is not supported. + Vector128 tmp = Sse.Add(vector, Sse.MoveHighToLow(vector, vector)); + // The control byte shuffles the four 32-bit floats of tmp: ABCD -> BADC. + return Sse.Add(tmp, Sse.Shuffle(tmp, tmp, 0xb1)); + } + } + + internal static unsafe void ScaleU(float scale, Span dst) + { + Vector128 scaleVector = Sse.SetAllVector128(scale); + + fixed (float* pdst = dst) + { + float* pDstCurrent = pdst; + float* pEnd = pdst + dst.Length; + + while (pDstCurrent + 4 <= pEnd) + { + Vector128 dstVector = Sse.LoadVector128(pDstCurrent); + + dstVector = Sse.Multiply(scaleVector, dstVector); + Sse.Store(pDstCurrent, dstVector); + + pDstCurrent += 4; + } + + while (pDstCurrent < pEnd) + { + Vector128 dstVector = Sse.LoadScalarVector128(pDstCurrent); + + dstVector = Sse.MultiplyScalar(scaleVector, dstVector); + Sse.StoreScalar(pDstCurrent, dstVector); + + pDstCurrent++; + } + } + } + + internal static unsafe void AddScaleU(float scale, Span src, Span dst) + { + Vector128 scaleVector = Sse.SetAllVector128(scale); + + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + float* pDstCurrent = pdst; + float* pEnd = pdst + dst.Length; + + while (pDstCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadVector128(pDstCurrent); + + srcVector = Sse.Multiply(srcVector, scaleVector); + dstVector = Sse.Add(dstVector, srcVector); + Sse.Store(pDstCurrent, dstVector); + + pDstCurrent += 4; + pSrcCurrent += 4; + } + + while (pDstCurrent < pEnd) + { + Vector128 srcVector = Sse.LoadScalarVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadScalarVector128(pDstCurrent); + + srcVector = Sse.MultiplyScalar(srcVector, scaleVector); + dstVector = Sse.AddScalar(dstVector, srcVector); + Sse.StoreScalar(pDstCurrent, dstVector); + + pDstCurrent++; + pSrcCurrent++; + } + } + } + + internal static unsafe void AddScaleSU(float scale, Span src, Span idx, Span dst) + { + Vector128 scaleVector = Sse.SetAllVector128(scale); + + fixed (float* psrc = src) + fixed (int* pidx = idx) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + int* pIdxCurrent = pidx; + float* pDstCurrent = pdst; + int* pEnd = pidx + idx.Length; + + while (pIdxCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + Vector128 dstVector = Load4(pDstCurrent, pIdxCurrent); + + srcVector = Sse.Multiply(srcVector, scaleVector); + dstVector = Sse.Add(dstVector, srcVector); + Store4(dstVector, pDstCurrent, pIdxCurrent); + + pIdxCurrent += 4; + pSrcCurrent += 4; + } + + while (pIdxCurrent < pEnd) + { + pDstCurrent[*pIdxCurrent] += scale * (*pSrcCurrent); + + pIdxCurrent++; + pSrcCurrent++; + } + } + } + + internal static unsafe void AddU(Span src, Span dst) + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + float* pDstCurrent = pdst; + float* pEnd = psrc + src.Length; + + while (pSrcCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadVector128(pDstCurrent); + + Vector128 result = Sse.Add(srcVector, dstVector); + Sse.Store(pDstCurrent, result); + + pSrcCurrent += 4; + pDstCurrent += 4; + } + + while (pSrcCurrent < pEnd) + { + Vector128 srcVector = Sse.LoadScalarVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadScalarVector128(pDstCurrent); + + Vector128 result = Sse.AddScalar(srcVector, dstVector); + Sse.StoreScalar(pDstCurrent, result); + + pSrcCurrent++; + pDstCurrent++; + } + } + } + + internal static unsafe void AddSU(Span src, Span idx, Span dst) + { + fixed (float* psrc = src) + fixed (int* pidx = idx) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + int* pIdxCurrent = pidx; + float* pDstCurrent = pdst; + int* pEnd = pidx + idx.Length; + + while (pIdxCurrent + 4 <= pEnd) + { + Vector128 srcVector = Load4(pDstCurrent, pIdxCurrent); + Vector128 dstVector = Sse.LoadVector128(pSrcCurrent); + + srcVector = Sse.Add(srcVector, dstVector); + Store4(srcVector, pDstCurrent, pIdxCurrent); + + pIdxCurrent += 4; + pSrcCurrent += 4; + } + + while (pIdxCurrent < pEnd) + { + pDstCurrent[*pIdxCurrent] += *pSrcCurrent; + + pIdxCurrent++; + pSrcCurrent++; + } + } + } + + internal static unsafe void MulElementWiseU(Span src1, Span src2, Span dst) + { + fixed (float* psrc1 = &src1[0]) + fixed (float* psrc2 = &src2[0]) + fixed (float* pdst = dst) + { + float* pSrc1Current = psrc1; + float* pSrc2Current = psrc2; + float* pDstCurrent = pdst; + float* pEnd = pdst + dst.Length; + + while (pDstCurrent + 4 <= pEnd) + { + Vector128 src1Vector = Sse.LoadVector128(pSrc1Current); + Vector128 src2Vector = Sse.LoadVector128(pSrc2Current); + src2Vector = Sse.Multiply(src1Vector, src2Vector); + Sse.Store(pDstCurrent, src2Vector); + + pSrc1Current += 4; + pSrc2Current += 4; + pDstCurrent += 4; + } + + while (pDstCurrent < pEnd) + { + Vector128 src1Vector = Sse.LoadScalarVector128(pSrc1Current); + Vector128 src2Vector = Sse.LoadScalarVector128(pSrc2Current); + src2Vector = Sse.MultiplyScalar(src1Vector, src2Vector); + Sse.StoreScalar(pDstCurrent, src2Vector); + + pSrc1Current++; + pSrc2Current++; + pDstCurrent++; + } + } + } + + internal static unsafe float SumSqU(Span src) + { + Vector128 result = Sse.SetZeroVector128(); + + fixed (float* psrc = src) + { + float* pSrcCurrent = psrc; + float* pEnd = psrc + src.Length; + + while (pSrcCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + result = Sse.Add(result, Sse.Multiply(srcVector, srcVector)); + + pSrcCurrent += 4; + } + + result = VectorSum(in result); + + while (pSrcCurrent < pEnd) + { + Vector128 srcVector = Sse.LoadScalarVector128(pSrcCurrent); + result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, srcVector)); + + pSrcCurrent++; + } + } + + return Sse.ConvertToSingle(result); + } + + internal static unsafe float SumAbsU(Span src) + { + Vector128 result = Sse.SetZeroVector128(); + Vector128 mask; + + if (Sse2.IsSupported) + { + mask = Sse.StaticCast(Sse2.SetAllVector128(0x7FFFFFFF)); + } + else + { + mask = Sse.SetAllVector128(BitConverter.Int32BitsToSingle(0x7FFFFFFF)); + } + + fixed (float* psrc = src) + { + float* pSrcCurrent = psrc; + float* pEnd = psrc + src.Length; + + while (pSrcCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + result = Sse.Add(result, Sse.And(srcVector, mask)); + + pSrcCurrent += 4; + } + + result = VectorSum(in result); + + while (pSrcCurrent < pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + result = Sse.Add(result, Sse.And(srcVector, mask)); + + pSrcCurrent++; + } + } + + return Sse.ConvertToSingle(result); + } + + internal static unsafe float DotU(Span src, Span dst) + { + Vector128 result = Sse.SetZeroVector128(); + + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + float* pDstCurrent = pdst; + float* pEnd = psrc + src.Length; + + while (pSrcCurrent + 4 <= pEnd) + { + Vector128 srcVector = Sse.LoadVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadVector128(pDstCurrent); + + result = Sse.Add(result, Sse.Multiply(srcVector, dstVector)); + + pSrcCurrent += 4; + pDstCurrent += 4; + } + + result = VectorSum(in result); + + while (pSrcCurrent < pEnd) + { + Vector128 srcVector = Sse.LoadScalarVector128(pSrcCurrent); + Vector128 dstVector = Sse.LoadScalarVector128(pDstCurrent); + + result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, dstVector)); + + pSrcCurrent++; + pDstCurrent++; + } + } + + return Sse.ConvertToSingle(result); + } + + internal static unsafe float DotSU(Span src, Span dst, Span idx) + { + Vector128 result = Sse.SetZeroVector128(); + + fixed (float* psrc = src) + fixed (float* pdst = dst) + fixed (int* pidx = idx) + { + float* pSrcCurrent = psrc; + float* pDstCurrent = pdst; + int* pIdxCurrent = pidx; + int* pEnd = pidx + idx.Length; + + while (pIdxCurrent + 4 <= pEnd) + { + Vector128 srcVector = Load4(pSrcCurrent, pIdxCurrent); + Vector128 dstVector = Sse.LoadVector128(pDstCurrent); + + result = Sse.Add(result, Sse.Multiply(srcVector, dstVector)); + + pIdxCurrent += 4; + pDstCurrent += 4; + } + + result = VectorSum(in result); + + while (pIdxCurrent < pEnd) + { + Vector128 srcVector = Load1(pSrcCurrent, pIdxCurrent); + Vector128 dstVector = Sse.LoadScalarVector128(pDstCurrent); + + result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, dstVector)); + + pIdxCurrent++; + pDstCurrent++; + } + } + + return Sse.ConvertToSingle(result); + } + + internal static unsafe float Dist2(Span src, Span dst) + { + Vector128 sqDistanceVector = Sse.SetZeroVector128(); + + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + float* pSrcCurrent = psrc; + float* pDstCurrent = pdst; + float* pEnd = psrc + src.Length; + + while (pSrcCurrent + 4 <= pEnd) + { + Vector128 distanceVector = Sse.Subtract(Sse.LoadVector128(pSrcCurrent), + Sse.LoadVector128(pDstCurrent)); + sqDistanceVector = Sse.Add(sqDistanceVector, + Sse.Multiply(distanceVector, distanceVector)); + + pSrcCurrent += 4; + pDstCurrent += 4; + } + + sqDistanceVector = VectorSum(in sqDistanceVector); + + float norm = Sse.ConvertToSingle(sqDistanceVector); + while (pSrcCurrent < pEnd) + { + float distance = (*pSrcCurrent) - (*pDstCurrent); + norm += distance * distance; + + pSrcCurrent++; + pDstCurrent++; + } + + return norm; + } + } + + } +} diff --git a/src/Microsoft.ML.CpuMath/Thunk.cs b/src/Microsoft.ML.CpuMath/Thunk.cs index d7082c8313..1053f75b75 100644 --- a/src/Microsoft.ML.CpuMath/Thunk.cs +++ b/src/Microsoft.ML.CpuMath/Thunk.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Security; diff --git a/src/Native/build.cmd b/src/Native/build.cmd index e2bbc3a4dc..3f533dc353 100644 --- a/src/Native/build.cmd +++ b/src/Native/build.cmd @@ -17,7 +17,9 @@ set CMAKE_BUILD_TYPE=Debug :Arg_Loop if [%1] == [] goto :ToolsVersion if /i [%1] == [Release] ( set CMAKE_BUILD_TYPE=Release&&shift&goto Arg_Loop) +if /i [%1] == [Release-Intrinsics] ( set CMAKE_BUILD_TYPE=Release-Intrinsics&&shift&goto Arg_Loop) if /i [%1] == [Debug] ( set CMAKE_BUILD_TYPE=Debug&&shift&goto Arg_Loop) +if /i [%1] == [Debug-Intrinsics] ( set CMAKE_BUILD_TYPE=Debug-Intrinsics&&shift&goto Arg_Loop) if /i [%1] == [x86] ( set __BuildArch=x86&&set __VCBuildArch=x86&&shift&goto Arg_Loop) if /i [%1] == [x64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) @@ -86,6 +88,10 @@ if %__IntermediatesDir% == "" ( set "__CMakeBinDir=%__CMakeBinDir:\=/%" set "__IntermediatesDir=%__IntermediatesDir:\=/%" +:: Strip the "-Intrinsics" suffix from the build type +if [%CMAKE_BUILD_TYPE:~-11%] == [-Intrinsics] ( + set CMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE:~0,-11% +) :: Check that the intermediate directory exists so we can place our cmake build tree there if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%" diff --git a/test/Directory.Build.props b/test/Directory.Build.props index ee5d507566..2e16be2f2b 100644 --- a/test/Directory.Build.props +++ b/test/Directory.Build.props @@ -20,7 +20,7 @@ $(ToolsDir)Test.snk - + diff --git a/test/Microsoft.ML.CpuMath.PerformanceTests/CpuMathNativeUtils.cs b/test/Microsoft.ML.CpuMath.PerformanceTests/CpuMathNativeUtils.cs new file mode 100644 index 0000000000..90f362de3e --- /dev/null +++ b/test/Microsoft.ML.CpuMath.PerformanceTests/CpuMathNativeUtils.cs @@ -0,0 +1,45 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.InteropServices; +using System.Security; + +namespace Microsoft.ML.CpuMath.PerformanceTests +{ + internal static class CpuMathNativeUtils + { + [DllImport("CpuMathNative", EntryPoint = "DotU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe float DotU(/*const*/ float* pa, /*const*/ float* pb, int c); + + [DllImport("CpuMathNative", EntryPoint = "DotSU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe float DotSU(/*const*/ float* pa, /*const*/ float* pb, /*const*/ int* pi, int c); + + [DllImport("CpuMathNative", EntryPoint = "SumSqU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe float SumSqU(/*const*/ float* ps, int c); + + [DllImport("CpuMathNative", EntryPoint = "AddU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void AddU(/*_In_ const*/ float* ps, /*_Inout_*/ float* pd, int c); + + [DllImport("CpuMathNative", EntryPoint = "AddSU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void AddSU(/*_In_ const*/ float* ps, /*_In_ const*/ int* pi, /*_Inout_*/ float* pd, int c); + + [DllImport("CpuMathNative", EntryPoint = "AddScaleU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void AddScaleU(float a, /*_In_ const*/ float* ps, /*_Inout_*/ float* pd, int c); + + [DllImport("CpuMathNative", EntryPoint = "AddScaleSU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void AddScaleSU(float a, /*_In_ const*/ float* ps, /*_In_ const*/ int* pi, /*_Inout_*/ float* pd, int c); + + [DllImport("CpuMathNative", EntryPoint = "ScaleU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void ScaleU(float a, /*_Inout_*/ float* pd, int c); + + [DllImport("CpuMathNative", EntryPoint = "Dist2"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe float Dist2(/*const*/ float* px, /*const*/ float* py, int c); + + [DllImport("CpuMathNative", EntryPoint = "SumAbsU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe float SumAbsU(/*const*/ float* ps, int c); + + [DllImport("CpuMathNative", EntryPoint = "MulElementWiseU"), SuppressUnmanagedCodeSecurity] + internal static extern unsafe void MulElementWiseU(/*_In_ const*/ float* ps1, /*_In_ const*/ float* ps2, /*_Inout_*/ float* pd, int c); + } +} diff --git a/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj b/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj new file mode 100644 index 0000000000..61d22bbfbb --- /dev/null +++ b/test/Microsoft.ML.CpuMath.PerformanceTests/Microsoft.ML.CpuMath.PerformanceTests.csproj @@ -0,0 +1,27 @@ + + + Exe + 7.2 + Microsoft.ML.CpuMath.PerformanceTests.Program + netcoreapp3.0 + true + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/Microsoft.ML.CpuMath.PerformanceTests/Program.cs b/test/Microsoft.ML.CpuMath.PerformanceTests/Program.cs new file mode 100644 index 0000000000..cd731e8cf3 --- /dev/null +++ b/test/Microsoft.ML.CpuMath.PerformanceTests/Program.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Jobs; +using BenchmarkDotNet.Running; +using BenchmarkDotNet.Toolchains.InProcess; + +namespace Microsoft.ML.CpuMath.PerformanceTests +{ + class Program + { + public static void Main(string[] args) + { + BenchmarkSwitcher + .FromAssembly(typeof(Program).Assembly) + .Run(null, CreateClrVsCoreConfig()); + } + + private static IConfig CreateClrVsCoreConfig() + { + var config = DefaultConfig.Instance.With( + Job.ShortRun. + With(InProcessToolchain.Instance)); + return config; + } + } +} diff --git a/test/Microsoft.ML.CpuMath.PerformanceTests/SsePerformanceTests.cs b/test/Microsoft.ML.CpuMath.PerformanceTests/SsePerformanceTests.cs new file mode 100644 index 0000000000..92752a0018 --- /dev/null +++ b/test/Microsoft.ML.CpuMath.PerformanceTests/SsePerformanceTests.cs @@ -0,0 +1,238 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Running; +using Microsoft.ML.Runtime.Internal.CpuMath; + +namespace Microsoft.ML.CpuMath.PerformanceTests +{ + public class SsePerformanceTests + { + private const int EXP_MAX = 127; + private const int EXP_MIN = 0; + + private const int IDXLEN = 1000003; + private const int LEN = 1000003; + private const int EXP_RANGE = EXP_MAX / 2; + private const int DEFAULT_SEED = 253421; + private const float DEFAULT_SCALE = 1.11f; + + private float[] src, dst, original, src1, src2; + private int[] idx; + private int seed = DEFAULT_SEED; + + private static float NextFloat(Random rand, int expRange) + { + double mantissa = (rand.NextDouble() * 2.0) - 1.0; + double exponent = Math.Pow(2.0, rand.Next(-expRange + 1, expRange + 1)); + return (float)(mantissa * exponent); + } + + private static int GetSeed() + { + int seed = DEFAULT_SEED; + + if (Environment.GetEnvironmentVariable("CPUMATH_SEED") != null) + { + string CPUMATH_SEED = Environment.GetEnvironmentVariable("CPUMATH_SEED"); + + if (!int.TryParse(CPUMATH_SEED, out seed)) + { + if(string.Equals(CPUMATH_SEED, "random", StringComparison.OrdinalIgnoreCase)) + { + seed = new Random().Next(); + } + else + { + seed = DEFAULT_SEED; + } + } + } + + Console.WriteLine("Random seed: " + seed + "; set environment variable CPUMATH_SEED to this value to reproduce results"); + + return seed; + } + + [GlobalSetup] + public void Setup() + { + src = new float[LEN]; + dst = new float[LEN]; + src1 = new float[LEN]; + src2 = new float[LEN]; + original = new float[LEN]; + idx = new int[IDXLEN]; + + seed = GetSeed(); + Random rand = new Random(seed); + + for (int i = 0; i < LEN; i++) + { + src[i] = NextFloat(rand, EXP_RANGE); + dst[i] = NextFloat(rand, EXP_RANGE); + original[i] = dst[i]; + src1[i] = NextFloat(rand, EXP_RANGE); + src2[i] = NextFloat(rand, EXP_RANGE); + } + + for (int i = 0; i < IDXLEN; i++) + { + idx[i] = rand.Next(0, LEN); + } + } + + [GlobalCleanup] + public void GlobalCleanup() + { + original.CopyTo(dst, 0); + } + + [Benchmark] + public unsafe float NativeDotUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + return CpuMathNativeUtils.DotU(psrc, pdst, LEN); + } + } + + [Benchmark] + public float ManagedDotUPerf() => CpuMathUtils.DotProductDense(src, dst, LEN); + + [Benchmark] + public unsafe float NativeDotSUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + fixed (int* pidx = idx) + { + return CpuMathNativeUtils.DotSU(psrc, pdst, pidx, IDXLEN); + } + } + + [Benchmark] + public float ManagedDotSUPerf() => CpuMathUtils.DotProductSparse(src, dst, idx, IDXLEN); + + [Benchmark] + public unsafe float NativeSumSqUPerf() + { + fixed (float* psrc = src) + { + return CpuMathNativeUtils.SumSqU(psrc, LEN); + } + } + + [Benchmark] + public float ManagedSumSqUPerf() => CpuMathUtils.SumSq(src, LEN); + + [Benchmark] + public unsafe void NativeAddUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + CpuMathNativeUtils.AddU(psrc, pdst, LEN); + } + } + + [Benchmark] + public void ManagedAddUPerf() => CpuMathUtils.Add(src, dst, LEN); + + [Benchmark] + public unsafe void NativeAddSUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + fixed (int* pidx = idx) + { + CpuMathNativeUtils.AddSU(psrc, pidx, pdst, IDXLEN); + } + } + + [Benchmark] + public void ManagedAddSUPerf() => CpuMathUtils.Add(src, idx, dst, IDXLEN); + + [Benchmark] + public unsafe void NativeAddScaleUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + CpuMathNativeUtils.AddScaleU(DEFAULT_SCALE, psrc, pdst, LEN); + } + } + + [Benchmark] + public void ManagedAddScaleUPerf() => CpuMathUtils.AddScale(DEFAULT_SCALE, src, dst, LEN); + + [Benchmark] + public unsafe void NativeAddScaleSUPerf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + fixed (int* pidx = idx) + { + CpuMathNativeUtils.AddScaleSU(DEFAULT_SCALE, psrc, pidx, pdst, IDXLEN); + } + } + + [Benchmark] + public void ManagedAddScaleSUPerf() => CpuMathUtils.AddScale(DEFAULT_SCALE, src, idx, dst, IDXLEN); + + [Benchmark] + public unsafe void NativeScaleUPerf() + { + fixed (float* pdst = dst) + { + CpuMathNativeUtils.ScaleU(DEFAULT_SCALE, pdst, LEN); + } + } + + [Benchmark] + public void ManagedScaleUPerf() => CpuMathUtils.Scale(DEFAULT_SCALE, dst, LEN); + + [Benchmark] + public unsafe float NativeDist2Perf() + { + fixed (float* psrc = src) + fixed (float* pdst = dst) + { + return CpuMathNativeUtils.Dist2(psrc, pdst, LEN); + } + } + + [Benchmark] + public float ManagedDist2Perf() => CpuMathUtils.L2DistSquared(src, dst, LEN); + + [Benchmark] + public unsafe float NativeSumAbsUPerf() + { + fixed (float* psrc = src) + { + return CpuMathNativeUtils.SumAbsU(psrc, LEN); + } + } + + [Benchmark] + public float ManagedSumAbsqUPerf() => CpuMathUtils.SumAbs(src, LEN); + + [Benchmark] + public unsafe void NativeMulElementWiseUPerf() + { + fixed (float* psrc1 = src1) + fixed (float* psrc2 = src2) + fixed (float* pdst = dst) + { + CpuMathNativeUtils.MulElementWiseU(psrc1, psrc2, pdst, LEN); + } + } + + [Benchmark] + public void ManagedMulElementWiseUPerf() => CpuMathUtils.MulElementWise(src1, src2, dst, LEN); + } +} diff --git a/test/Microsoft.ML.CpuMath.UnitTests.netcoreapp/Microsoft.ML.CpuMath.UnitTests.netcoreapp.csproj b/test/Microsoft.ML.CpuMath.UnitTests.netcoreapp/Microsoft.ML.CpuMath.UnitTests.netcoreapp.csproj new file mode 100644 index 0000000000..e611b15032 --- /dev/null +++ b/test/Microsoft.ML.CpuMath.UnitTests.netcoreapp/Microsoft.ML.CpuMath.UnitTests.netcoreapp.csproj @@ -0,0 +1,16 @@ + + + + netcoreapp3.0 + false + + + + + + + + + + + diff --git a/test/Microsoft.ML.CpuMath.UnitTests.netstandard/Microsoft.ML.CpuMath.UnitTests.netstandard.csproj b/test/Microsoft.ML.CpuMath.UnitTests.netstandard/Microsoft.ML.CpuMath.UnitTests.netstandard.csproj new file mode 100644 index 0000000000..9552f688a8 --- /dev/null +++ b/test/Microsoft.ML.CpuMath.UnitTests.netstandard/Microsoft.ML.CpuMath.UnitTests.netstandard.csproj @@ -0,0 +1,16 @@ + + + + netcoreapp2.0 + false + + + + + + + + + + + diff --git a/test/Microsoft.ML.CpuMath.UnitTests.netstandard/UnitTests.cs b/test/Microsoft.ML.CpuMath.UnitTests.netstandard/UnitTests.cs new file mode 100644 index 0000000000..6fc2596ef7 --- /dev/null +++ b/test/Microsoft.ML.CpuMath.UnitTests.netstandard/UnitTests.cs @@ -0,0 +1,246 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Xunit; +using Microsoft.ML.Runtime.Internal.CpuMath; + +namespace Microsoft.ML.CpuMath.UnitTests +{ + public class CpuMathUtilsUnitTests + { + private readonly float[][] testArrays; + private readonly int[] testIndexArray; + private const float DEFAULT_SCALE = 1.7f; + private FloatEqualityComparer comparer; + + public CpuMathUtilsUnitTests() + { + // Padded array whose length is a multiple of 4 + float[] testArray1 = new float[8] { 1.96f, -2.38f, -9.76f, 13.84f, -106.37f, -26.93f, 32.45f, 3.29f }; + // Unpadded array whose length is not a multiple of 4. + float[] testArray2 = new float[7] { 1.96f, -2.38f, -9.76f, 13.84f, -106.37f, -26.93f, 32.45f }; + testArrays = new float[][] { testArray1, testArray2 }; + testIndexArray = new int[4] { 0, 2, 5, 6 }; + comparer = new FloatEqualityComparer(); + } + + [Theory] + [InlineData(0, 13306.0376f)] + [InlineData(1, 13291.9235f)] + public void DotUTest(int test, float expected) + { + float[] src = (float[]) testArrays[test].Clone(); + float[] dst = (float[]) src.Clone(); + + for (int i = 0; i < dst.Length; i++) + { + dst[i] += 1; + } + + var actual = CpuMathUtils.DotProductDense(src, dst, dst.Length); + Assert.Equal(expected, actual, 2); + } + + [Theory] + [InlineData(0, 736.7352f)] + [InlineData(1, 736.7352f)] + public void DotSUTest(int test, float expected) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + int[] idx = testIndexArray; + + // Ensures src and dst are different arrays + for (int i = 0; i < dst.Length; i++) + { + dst[i] += 1; + } + + var actual = CpuMathUtils.DotProductSparse(src, dst, idx, idx.Length); + Assert.Equal(expected, actual, 4); + } + + [Theory] + [InlineData(0, 13399.9376f)] + [InlineData(1, 13389.1135f)] + public void SumSqUTest(int test, float expected) + { + float[] src = (float[])testArrays[test].Clone(); + var actual = CpuMathUtils.SumSq(src, src.Length); + Assert.Equal(expected, actual, 2); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void AddUTest(int test) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + float[] expected = (float[])src.Clone(); + + // Ensures src and dst are different arrays + for (int i = 0; i < dst.Length; i++) + { + dst[i] += 1; + } + + for (int i = 0; i < expected.Length; i++) + { + expected[i] = 2 * expected[i] + 1; + } + + CpuMathUtils.Add(src, dst, dst.Length); + var actual = dst; + Assert.Equal(expected, actual, comparer); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void AddSUTest(int test) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + int[] idx = testIndexArray; + float[] expected = (float[])dst.Clone(); + + expected[0] = 3.92f; + expected[2] = -12.14f; + expected[5] = -36.69f; + expected[6] = 46.29f; + + CpuMathUtils.Add(src, idx, dst, idx.Length); + var actual = dst; + Assert.Equal(expected, actual, comparer); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void AddScaleUTest(int test) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + float[] expected = (float[])dst.Clone(); + + for (int i = 0; i < expected.Length; i++) + { + expected[i] *= (1 + DEFAULT_SCALE); + } + + CpuMathUtils.AddScale(DEFAULT_SCALE, src, dst, dst.Length); + var actual = dst; + Assert.Equal(expected, actual, comparer); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void AddScaleSUTest(int test) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + int[] idx = testIndexArray; + float[] expected = (float[])dst.Clone(); + + expected[0] = 5.292f; + expected[2] = -13.806f; + expected[5] = -43.522f; + expected[6] = 55.978f; + + CpuMathUtils.AddScale(DEFAULT_SCALE, src, idx, dst, idx.Length); + var actual = dst; + Assert.Equal(expected, actual, comparer); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void ScaleUTest(int test) + { + float[] dst = (float[])testArrays[test].Clone(); + float[] expectedOutput = (float[])dst.Clone(); + + for (int i = 0; i < expectedOutput.Length; i++) + { + expectedOutput[i] *= DEFAULT_SCALE; + } + + CpuMathUtils.Scale(DEFAULT_SCALE, dst, dst.Length); + var managedOutput = dst; + Assert.Equal(expectedOutput, managedOutput, comparer); + } + + [Theory] + [InlineData(0, 8.0f)] + [InlineData(1, 7.0f)] + public void Dist2Test(int test, float expected) + { + float[] src = (float[])testArrays[test].Clone(); + float[] dst = (float[])src.Clone(); + + // Ensures src and dst are different arrays + for (int i = 0; i < dst.Length; i++) + { + dst[i] += 1; + } + + var actual = CpuMathUtils.L2DistSquared(src, dst, dst.Length); + Assert.Equal(expected, actual, 0); + } + + [Theory] + [InlineData(0, 196.98f)] + [InlineData(1, 193.69f)] + public void SumAbsUTest(int test, float expected) + { + float[] src = (float[])testArrays[test].Clone(); + var actual = CpuMathUtils.SumAbs(src, src.Length); + Assert.Equal(expected, actual, 2); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public void MulElementWiseUTest(int test) + { + float[] src1 = (float[])testArrays[test].Clone(); + float[] src2 = (float[])src1.Clone(); + float[] dst = (float[])src1.Clone(); + + // Ensures src1 and src2 are different arrays + for (int i = 0; i < src2.Length; i++) + { + src2[i] += 1; + } + + float[] expected = (float[])src1.Clone(); + + for (int i = 0; i < expected.Length; i++) + { + expected[i] *= (1 + expected[i]); + } + + CpuMathUtils.MulElementWise(src1, src2, dst, dst.Length); + var actual = dst; + Assert.Equal(expected, actual, comparer); + } + } + + internal class FloatEqualityComparer : IEqualityComparer + { + public bool Equals(float a, float b) + { + return Math.Abs(a - b) < 1e-5f; + } + + public int GetHashCode(float a) + { + throw new NotImplementedException(); + } + } +}