Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit c8b6cb3

Browse files
committed
In decode avoided stack spill and hoisted zero-vector outside the loops
Cf. #34529 (comment)
1 parent 31c4741 commit c8b6cb3

File tree

1 file changed

+20
-6
lines changed

1 file changed

+20
-6
lines changed

src/System.Memory/src/System/Buffers/Text/Base64Decoder.cs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,12 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b
350350
Vector256<sbyte> lutHi = ReadVector<Vector256<sbyte>>(s_avxDecodeLutHi);
351351
Vector256<sbyte> lutLo = ReadVector<Vector256<sbyte>>(s_avxDecodeLutLo);
352352
Vector256<sbyte> lutShift = ReadVector<Vector256<sbyte>>(s_avxDecodeLutShift);
353-
Vector256<sbyte> mask2F = s_avxDecodeMask2F;
353+
Vector256<sbyte> mask2F = ReadVector<Vector256<sbyte>>(s_avxDecodeMask2F);
354354
Vector256<sbyte> shuffleConstant0 = Vector256.Create(0x01400140).AsSByte();
355355
Vector256<short> shuffleConstant1 = Vector256.Create(0x00011000).AsInt16();
356356
Vector256<sbyte> shuffleVec = ReadVector<Vector256<sbyte>>(s_avxDecodeShuffleVec);
357357
Vector256<int> permuteVec = ReadVector<Vector256<sbyte>>(s_avxDecodePermuteVec).AsInt32();
358+
Vector256<sbyte> zero = Vector256<sbyte>.Zero;
358359

359360
byte* src = srcBytes;
360361
byte* dest = destBytes;
@@ -369,7 +370,6 @@ private static unsafe void Avx2Decode(ref byte* srcBytes, ref byte* destBytes, b
369370
Vector256<sbyte> loNibbles = Avx2.And(str, mask2F);
370371
Vector256<sbyte> hi = Avx2.Shuffle(lutHi, hiNibbles);
371372
Vector256<sbyte> lo = Avx2.Shuffle(lutLo, loNibbles);
372-
Vector256<sbyte> zero = Vector256<sbyte>.Zero;
373373

374374
// https://github.com/dotnet/coreclr/issues/21247
375375
if (Avx2.MoveMask(Avx2.CompareGreaterThan(Avx2.And(lo, hi), zero)) != 0)
@@ -403,10 +403,11 @@ private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes,
403403
Vector128<sbyte> lutHi = ReadVector<Vector128<sbyte>>(s_sseDecodeLutHi);
404404
Vector128<sbyte> lutLo = ReadVector<Vector128<sbyte>>(s_sseDecodeLutLo);
405405
Vector128<sbyte> lutShift = ReadVector<Vector128<sbyte>>(s_sseDecodeLutShift);
406-
Vector128<sbyte> mask2F = s_sseDecodeMask2F;
406+
Vector128<sbyte> mask2F = ReadVector<Vector128<sbyte>>(s_sseDecodeMask2F);
407407
Vector128<sbyte> shuffleConstant0 = Vector128.Create(0x01400140).AsSByte();
408408
Vector128<short> shuffleConstant1 = Vector128.Create(0x00011000).AsInt16();
409409
Vector128<sbyte> shuffleVec = ReadVector<Vector128<sbyte>>(s_sseDecodeShuffleVec);
410+
Vector128<sbyte> zero = Vector128<sbyte>.Zero;
410411

411412
byte* src = srcBytes;
412413
byte* dest = destBytes;
@@ -421,7 +422,6 @@ private static unsafe void Ssse3Decode(ref byte* srcBytes, ref byte* destBytes,
421422
Vector128<sbyte> loNibbles = Sse2.And(str, mask2F);
422423
Vector128<sbyte> hi = Ssse3.Shuffle(lutHi, hiNibbles);
423424
Vector128<sbyte> lo = Ssse3.Shuffle(lutLo, loNibbles);
424-
Vector128<sbyte> zero = Vector128<sbyte>.Zero;
425425

426426
if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.And(lo, hi), zero)) != 0)
427427
break;
@@ -526,7 +526,12 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value)
526526
0, 0, 0, 0
527527
};
528528

529-
private static readonly Vector128<sbyte> s_sseDecodeMask2F = Sse3.IsSupported ? Vector128.Create((sbyte)0x2F) : default; // ASCII: /
529+
private static ReadOnlySpan<sbyte> s_sseDecodeMask2F => new sbyte[] { // ASCII: /
530+
0x2F, 0x2F, 0x2F, 0x2F,
531+
0x2F, 0x2F, 0x2F, 0x2F,
532+
0x2F, 0x2F, 0x2F, 0x2F,
533+
0x2F, 0x2F, 0x2F, 0x2F
534+
};
530535

531536
private static ReadOnlySpan<sbyte> s_avxDecodeShuffleVec => new sbyte[] {
532537
2, 1, 0, 6,
@@ -550,7 +555,16 @@ private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value)
550555
-1, -1, -1, -1
551556
};
552557

553-
private static readonly Vector256<sbyte> s_avxDecodeMask2F = Avx2.IsSupported ? Vector256.Create((sbyte)0x2F) : default; // ASCII: /
558+
private static ReadOnlySpan<sbyte> s_avxDecodeMask2F => new sbyte[] { // ASCII: /
559+
0x2F, 0x2F, 0x2F, 0x2F,
560+
0x2F, 0x2F, 0x2F, 0x2F,
561+
0x2F, 0x2F, 0x2F, 0x2F,
562+
0x2F, 0x2F, 0x2F, 0x2F,
563+
0x2F, 0x2F, 0x2F, 0x2F,
564+
0x2F, 0x2F, 0x2F, 0x2F,
565+
0x2F, 0x2F, 0x2F, 0x2F,
566+
0x2F, 0x2F, 0x2F, 0x2F
567+
};
554568

555569
private static ReadOnlySpan<sbyte> s_avxDecodeLutLo => new sbyte[] {
556570
0x15, 0x11, 0x11, 0x11,

0 commit comments

Comments
 (0)