Skip to content

Commit 08b397b

Browse files
author
Eugene Gusarov
committed
Revert "CpuMath Enhancement: Make bound checking of loops in hardware intrinsics more efficient"
This reverts commit 65e42bb.
1 parent 65e42bb commit 08b397b

File tree

2 files changed

+37
-37
lines changed

2 files changed

+37
-37
lines changed

src/Microsoft.ML.CpuMath/AvxIntrinsics.cs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ public static unsafe void AddScalarU(float scalar, Span<float> dst)
425425

426426
Vector256<float> scalarVector256 = Vector256.Create(scalar);
427427

428-
while (pDstCurrent <= pDstEnd - 8)
428+
while (pDstCurrent + 8 <= pDstEnd)
429429
{
430430
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
431431
dstVector = Avx.Add(dstVector, scalarVector256);
@@ -577,7 +577,7 @@ public static unsafe void ScaleSrcU(float scale, ReadOnlySpan<float> src, Span<f
577577

578578
Vector256<float> scaleVector256 = Vector256.Create(scale);
579579

580-
while (pDstCurrent <= pDstEnd - 8)
580+
while (pDstCurrent + 8 <= pDstEnd)
581581
{
582582
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
583583
srcVector = Avx.Multiply(srcVector, scaleVector256);
@@ -623,7 +623,7 @@ public static unsafe void ScaleAddU(float a, float b, Span<float> dst)
623623
Vector256<float> a256 = Vector256.Create(a);
624624
Vector256<float> b256 = Vector256.Create(b);
625625

626-
while (pDstCurrent <= pDstEnd - 8)
626+
while (pDstCurrent + 8 <= pDstEnd)
627627
{
628628
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
629629
dstVector = Avx.Add(dstVector, b256);
@@ -671,7 +671,7 @@ public static unsafe void AddScaleU(float scale, ReadOnlySpan<float> src, Span<f
671671

672672
Vector256<float> scaleVector256 = Vector256.Create(scale);
673673

674-
while (pDstCurrent <= pEnd - 8)
674+
while (pDstCurrent + 8 <= pEnd)
675675
{
676676
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
677677

@@ -728,7 +728,7 @@ public static unsafe void AddScaleCopyU(float scale, ReadOnlySpan<float> src, Re
728728

729729
Vector256<float> scaleVector256 = Vector256.Create(scale);
730730

731-
while (pResCurrent <= pResEnd - 8)
731+
while (pResCurrent + 8 <= pResEnd)
732732
{
733733
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
734734
dstVector = MultiplyAdd(pSrcCurrent, scaleVector256, dstVector);
@@ -785,7 +785,7 @@ public static unsafe void AddScaleSU(float scale, ReadOnlySpan<float> src, ReadO
785785

786786
Vector256<float> scaleVector256 = Vector256.Create(scale);
787787

788-
while (pIdxCurrent <= pEnd - 8)
788+
while (pIdxCurrent + 8 <= pEnd)
789789
{
790790
Vector256<float> dstVector = Load8(pDstCurrent, pIdxCurrent);
791791
dstVector = MultiplyAdd(pSrcCurrent, scaleVector256, dstVector);
@@ -831,7 +831,7 @@ public static unsafe void AddU(ReadOnlySpan<float> src, Span<float> dst, int cou
831831
float* pDstCurrent = pdst;
832832
float* pEnd = psrc + count;
833833

834-
while (pSrcCurrent <= pEnd - 8)
834+
while (pSrcCurrent + 8 <= pEnd)
835835
{
836836
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
837837
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
@@ -883,7 +883,7 @@ public static unsafe void AddSU(ReadOnlySpan<float> src, ReadOnlySpan<int> idx,
883883
float* pDstCurrent = pdst;
884884
int* pEnd = pidx + count;
885885

886-
while (pIdxCurrent <= pEnd - 8)
886+
while (pIdxCurrent + 8 <= pEnd)
887887
{
888888
Vector256<float> dstVector = Load8(pDstCurrent, pIdxCurrent);
889889
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
@@ -931,7 +931,7 @@ public static unsafe void MulElementWiseU(ReadOnlySpan<float> src1, ReadOnlySpan
931931
float* pDstCurrent = pdst;
932932
float* pEnd = pdst + count;
933933

934-
while (pDstCurrent <= pEnd - 8)
934+
while (pDstCurrent + 8 <= pEnd)
935935
{
936936
Vector256<float> src1Vector = Avx.LoadVector256(pSrc1Current);
937937
Vector256<float> src2Vector = Avx.LoadVector256(pSrc2Current);
@@ -1066,7 +1066,7 @@ public static unsafe float SumSqU(ReadOnlySpan<float> src)
10661066

10671067
Vector256<float> result256 = Vector256<float>.Zero;
10681068

1069-
while (pSrcCurrent <= pSrcEnd - 8)
1069+
while (pSrcCurrent + 8 <= pSrcEnd)
10701070
{
10711071
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
10721072
result256 = MultiplyAdd(srcVector, srcVector, result256);
@@ -1111,7 +1111,7 @@ public static unsafe float SumSqDiffU(float mean, ReadOnlySpan<float> src)
11111111
Vector256<float> result256 = Vector256<float>.Zero;
11121112
Vector256<float> meanVector256 = Vector256.Create(mean);
11131113

1114-
while (pSrcCurrent <= pSrcEnd - 8)
1114+
while (pSrcCurrent + 8 <= pSrcEnd)
11151115
{
11161116
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
11171117
srcVector = Avx.Subtract(srcVector, meanVector256);
@@ -1158,7 +1158,7 @@ public static unsafe float SumAbsU(ReadOnlySpan<float> src)
11581158

11591159
Vector256<float> result256 = Vector256<float>.Zero;
11601160

1161-
while (pSrcCurrent <= pSrcEnd - 8)
1161+
while (pSrcCurrent + 8 <= pSrcEnd)
11621162
{
11631163
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
11641164
result256 = Avx.Add(result256, Avx.And(srcVector, _absMask256));
@@ -1203,7 +1203,7 @@ public static unsafe float SumAbsDiffU(float mean, ReadOnlySpan<float> src)
12031203
Vector256<float> result256 = Vector256<float>.Zero;
12041204
Vector256<float> meanVector256 = Vector256.Create(mean);
12051205

1206-
while (pSrcCurrent <= pSrcEnd - 8)
1206+
while (pSrcCurrent + 8 <= pSrcEnd)
12071207
{
12081208
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
12091209
srcVector = Avx.Subtract(srcVector, meanVector256);
@@ -1251,7 +1251,7 @@ public static unsafe float MaxAbsU(ReadOnlySpan<float> src)
12511251

12521252
Vector256<float> result256 = Vector256<float>.Zero;
12531253

1254-
while (pSrcCurrent <= pSrcEnd - 8)
1254+
while (pSrcCurrent + 8 <= pSrcEnd)
12551255
{
12561256
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
12571257
result256 = Avx.Max(result256, Avx.And(srcVector, _absMask256));
@@ -1296,7 +1296,7 @@ public static unsafe float MaxAbsDiffU(float mean, ReadOnlySpan<float> src)
12961296
Vector256<float> result256 = Vector256<float>.Zero;
12971297
Vector256<float> meanVector256 = Vector256.Create(mean);
12981298

1299-
while (pSrcCurrent <= pSrcEnd - 8)
1299+
while (pSrcCurrent + 8 <= pSrcEnd)
13001300
{
13011301
Vector256<float> srcVector = Avx.LoadVector256(pSrcCurrent);
13021302
srcVector = Avx.Subtract(srcVector, meanVector256);
@@ -1348,7 +1348,7 @@ public static unsafe float DotU(ReadOnlySpan<float> src, ReadOnlySpan<float> dst
13481348

13491349
Vector256<float> result256 = Vector256<float>.Zero;
13501350

1351-
while (pSrcCurrent <= pSrcEnd - 8)
1351+
while (pSrcCurrent + 8 <= pSrcEnd)
13521352
{
13531353
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
13541354
result256 = MultiplyAdd(pSrcCurrent, dstVector, result256);
@@ -1405,7 +1405,7 @@ public static unsafe float DotSU(ReadOnlySpan<float> src, ReadOnlySpan<float> ds
14051405

14061406
Vector256<float> result256 = Vector256<float>.Zero;
14071407

1408-
while (pIdxCurrent <= pIdxEnd - 8)
1408+
while (pIdxCurrent + 8 <= pIdxEnd)
14091409
{
14101410
Vector256<float> srcVector = Load8(pSrcCurrent, pIdxCurrent);
14111411
result256 = MultiplyAdd(pDstCurrent, srcVector, result256);
@@ -1459,7 +1459,7 @@ public static unsafe float Dist2(ReadOnlySpan<float> src, ReadOnlySpan<float> ds
14591459

14601460
Vector256<float> sqDistanceVector256 = Vector256<float>.Zero;
14611461

1462-
while (pSrcCurrent <= pSrcEnd - 8)
1462+
while (pSrcCurrent + 8 <= pSrcEnd)
14631463
{
14641464
Vector256<float> distanceVector = Avx.Subtract(Avx.LoadVector256(pSrcCurrent),
14651465
Avx.LoadVector256(pDstCurrent));
@@ -1514,7 +1514,7 @@ public static unsafe void SdcaL1UpdateU(float primalUpdate, int count, ReadOnlyS
15141514
Vector256<float> xPrimal256 = Vector256.Create(primalUpdate);
15151515
Vector256<float> xThreshold256 = Vector256.Create(threshold);
15161516

1517-
while (pSrcCurrent <= pSrcEnd - 8)
1517+
while (pSrcCurrent + 8 <= pSrcEnd)
15181518
{
15191519
Vector256<float> xDst1 = Avx.LoadVector256(pDst1Current);
15201520
xDst1 = MultiplyAdd(pSrcCurrent, xPrimal256, xDst1);
@@ -1574,7 +1574,7 @@ public static unsafe void SdcaL1UpdateSU(float primalUpdate, int count, ReadOnly
15741574
Vector256<float> xPrimal256 = Vector256.Create(primalUpdate);
15751575
Vector256<float> xThreshold = Vector256.Create(threshold);
15761576

1577-
while (pIdxCurrent <= pIdxEnd - 8)
1577+
while (pIdxCurrent + 8 <= pIdxEnd)
15781578
{
15791579
Vector256<float> xDst1 = Load8(pdst1, pIdxCurrent);
15801580
xDst1 = MultiplyAdd(pSrcCurrent, xPrimal256, xDst1);

src/Microsoft.ML.CpuMath/SseIntrinsics.cs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ public static unsafe void AddScaleU(float scale, ReadOnlySpan<float> src, Span<f
565565

566566
Vector128<float> scaleVector = Vector128.Create(scale);
567567

568-
while (pDstCurrent <= pEnd - 4)
568+
while (pDstCurrent + 4 <= pEnd)
569569
{
570570
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
571571
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
@@ -609,7 +609,7 @@ public static unsafe void AddScaleCopyU(float scale, ReadOnlySpan<float> src, Re
609609

610610
Vector128<float> scaleVector = Vector128.Create(scale);
611611

612-
while (pResCurrent <= pResEnd - 4)
612+
while (pResCurrent + 4 <= pResEnd)
613613
{
614614
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
615615
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
@@ -653,7 +653,7 @@ public static unsafe void AddScaleSU(float scale, ReadOnlySpan<float> src, ReadO
653653

654654
Vector128<float> scaleVector = Vector128.Create(scale);
655655

656-
while (pIdxCurrent <= pEnd - 4)
656+
while (pIdxCurrent + 4 <= pEnd)
657657
{
658658
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
659659
Vector128<float> dstVector = Load4(pDstCurrent, pIdxCurrent);
@@ -687,7 +687,7 @@ public static unsafe void AddU(ReadOnlySpan<float> src, Span<float> dst, int cou
687687
float* pDstCurrent = pdst;
688688
float* pEnd = psrc + count;
689689

690-
while (pSrcCurrent <= pEnd - 4)
690+
while (pSrcCurrent + 4 <= pEnd)
691691
{
692692
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
693693
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
@@ -727,7 +727,7 @@ public static unsafe void AddSU(ReadOnlySpan<float> src, ReadOnlySpan<int> idx,
727727
float* pDstCurrent = pdst;
728728
int* pEnd = pidx + count;
729729

730-
while (pIdxCurrent <= pEnd - 4)
730+
while (pIdxCurrent + 4 <= pEnd)
731731
{
732732
Vector128<float> dstVector = Load4(pDstCurrent, pIdxCurrent);
733733
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
@@ -763,7 +763,7 @@ public static unsafe void MulElementWiseU(ReadOnlySpan<float> src1, ReadOnlySpan
763763
float* pDstCurrent = pdst;
764764
float* pEnd = pdst + count;
765765

766-
while (pDstCurrent <= pEnd - 4)
766+
while (pDstCurrent + 4 <= pEnd)
767767
{
768768
Vector128<float> src1Vector = Sse.LoadVector128(pSrc1Current);
769769
Vector128<float> src2Vector = Sse.LoadVector128(pSrc2Current);
@@ -883,7 +883,7 @@ public static unsafe float SumSqU(ReadOnlySpan<float> src)
883883

884884
Vector128<float> result = Vector128<float>.Zero;
885885

886-
while (pSrcCurrent <= pSrcEnd - 4)
886+
while (pSrcCurrent + 4 <= pSrcEnd)
887887
{
888888
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
889889
result = Sse.Add(result, Sse.Multiply(srcVector, srcVector));
@@ -915,7 +915,7 @@ public static unsafe float SumSqDiffU(float mean, ReadOnlySpan<float> src)
915915
Vector128<float> result = Vector128<float>.Zero;
916916
Vector128<float> meanVector = Vector128.Create(mean);
917917

918-
while (pSrcCurrent <= pSrcEnd - 4)
918+
while (pSrcCurrent + 4 <= pSrcEnd)
919919
{
920920
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
921921
srcVector = Sse.Subtract(srcVector, meanVector);
@@ -948,7 +948,7 @@ public static unsafe float SumAbsU(ReadOnlySpan<float> src)
948948

949949
Vector128<float> result = Vector128<float>.Zero;
950950

951-
while (pSrcCurrent <= pSrcEnd - 4)
951+
while (pSrcCurrent + 4 <= pSrcEnd)
952952
{
953953
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
954954
result = Sse.Add(result, Sse.And(srcVector, AbsMask128));
@@ -980,7 +980,7 @@ public static unsafe float SumAbsDiffU(float mean, ReadOnlySpan<float> src)
980980
Vector128<float> result = Vector128<float>.Zero;
981981
Vector128<float> meanVector = Vector128.Create(mean);
982982

983-
while (pSrcCurrent <= pSrcEnd - 4)
983+
while (pSrcCurrent + 4 <= pSrcEnd)
984984
{
985985
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
986986
srcVector = Sse.Subtract(srcVector, meanVector);
@@ -1013,7 +1013,7 @@ public static unsafe float MaxAbsU(ReadOnlySpan<float> src)
10131013

10141014
Vector128<float> result = Vector128<float>.Zero;
10151015

1016-
while (pSrcCurrent <= pSrcEnd - 4)
1016+
while (pSrcCurrent + 4 <= pSrcEnd)
10171017
{
10181018
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
10191019
result = Sse.Max(result, Sse.And(srcVector, AbsMask128));
@@ -1045,7 +1045,7 @@ public static unsafe float MaxAbsDiffU(float mean, ReadOnlySpan<float> src)
10451045
Vector128<float> result = Vector128<float>.Zero;
10461046
Vector128<float> meanVector = Vector128.Create(mean);
10471047

1048-
while (pSrcCurrent <= pSrcEnd - 4)
1048+
while (pSrcCurrent + 4 <= pSrcEnd)
10491049
{
10501050
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
10511051
srcVector = Sse.Subtract(srcVector, meanVector);
@@ -1082,7 +1082,7 @@ public static unsafe float DotU(ReadOnlySpan<float> src, ReadOnlySpan<float> dst
10821082

10831083
Vector128<float> result = Vector128<float>.Zero;
10841084

1085-
while (pSrcCurrent <= pSrcEnd - 4)
1085+
while (pSrcCurrent + 4 <= pSrcEnd)
10861086
{
10871087
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
10881088
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
@@ -1126,7 +1126,7 @@ public static unsafe float DotSU(ReadOnlySpan<float> src, ReadOnlySpan<float> ds
11261126

11271127
Vector128<float> result = Vector128<float>.Zero;
11281128

1129-
while (pIdxCurrent <= pIdxEnd - 4)
1129+
while (pIdxCurrent + 4 <= pIdxEnd)
11301130
{
11311131
Vector128<float> srcVector = Load4(pSrcCurrent, pIdxCurrent);
11321132
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
@@ -1167,7 +1167,7 @@ public static unsafe float Dist2(ReadOnlySpan<float> src, ReadOnlySpan<float> ds
11671167

11681168
Vector128<float> sqDistanceVector = Vector128<float>.Zero;
11691169

1170-
while (pSrcCurrent <= pSrcEnd - 4)
1170+
while (pSrcCurrent + 4 <= pSrcEnd)
11711171
{
11721172
Vector128<float> distanceVector = Sse.Subtract(Sse.LoadVector128(pSrcCurrent),
11731173
Sse.LoadVector128(pDstCurrent));
@@ -1210,7 +1210,7 @@ public static unsafe void SdcaL1UpdateU(float primalUpdate, int count, ReadOnlyS
12101210
Vector128<float> signMask = Vector128.Create(-0.0f); // 0x8000 0000
12111211
Vector128<float> xThreshold = Vector128.Create(threshold);
12121212

1213-
while (pSrcCurrent <= pSrcEnd - 4)
1213+
while (pSrcCurrent + 4 <= pSrcEnd)
12141214
{
12151215
Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent);
12161216

@@ -1255,7 +1255,7 @@ public static unsafe void SdcaL1UpdateSU(float primalUpdate, int count, ReadOnly
12551255
Vector128<float> signMask = Vector128.Create(-0.0f); // 0x8000 0000
12561256
Vector128<float> xThreshold = Vector128.Create(threshold);
12571257

1258-
while (pIdxCurrent <= pIdxEnd - 4)
1258+
while (pIdxCurrent + 4 <= pIdxEnd)
12591259
{
12601260
Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent);
12611261

0 commit comments

Comments
 (0)