@@ -59,31 +59,34 @@ public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan<byte> bytes, Span
5959
6060 byte * src = srcBytes ;
6161 byte * dest = destBytes ;
62- byte * maxSrc = srcBytes + ( uint ) maxSrcLength ;
62+ byte * srcEnd = srcBytes + ( uint ) srcLength ;
63+ byte * srcMax = srcBytes + ( uint ) maxSrcLength ;
6364
6465 if ( maxSrcLength >= 16 )
6566 {
66- if ( Avx2 . IsSupported && maxSrcLength >= 32 )
67+ byte * end = srcMax - 32 ;
68+ if ( Avx2 . IsSupported && ( end >= src ) )
6769 {
68- Avx2Encode ( ref src , ref dest , maxSrcLength , destLength , srcBytes , destBytes ) ;
70+ Avx2Encode ( ref src , ref dest , end , maxSrcLength , destLength , srcBytes , destBytes ) ;
6971
70- if ( srcBytes + ( uint ) srcLength == src )
72+ if ( src == srcEnd )
7173 goto DoneExit ;
7274 }
7375
74- if ( Ssse3 . IsSupported && ( maxSrc - src >= 16 ) )
76+ end = srcMax - 16 ;
77+ if ( Ssse3 . IsSupported && ( end >= src ) )
7578 {
76- Ssse3Encode ( ref src , ref dest , maxSrcLength , destLength , srcBytes , destBytes ) ;
79+ Ssse3Encode ( ref src , ref dest , end , maxSrcLength , destLength , srcBytes , destBytes ) ;
7780
78- if ( srcBytes + ( uint ) srcLength == src )
81+ if ( src == srcEnd )
7982 goto DoneExit ;
8083 }
8184 }
8285
8386 uint result = 0 ;
8487
85- maxSrc -= 2 ;
86- while ( src < maxSrc )
88+ srcMax -= 2 ;
89+ while ( src < srcMax )
8790 {
8891 result = Encode ( src , encodingMap ) ;
8992 Unsafe . WriteUnaligned ( dest , result ) ;
@@ -97,15 +100,15 @@ public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan<byte> bytes, Span
97100 if ( ! isFinalBlock )
98101 goto NeedMoreData ;
99102
100- maxSrc = srcBytes + ( uint ) srcLength ;
101- if ( src + 1 == maxSrc )
103+ srcMax = srcBytes + ( uint ) srcLength ;
104+ if ( src + 1 == srcMax )
102105 {
103106 result = EncodeAndPadTwo ( src , encodingMap ) ;
104107 Unsafe . WriteUnaligned ( dest , result ) ;
105108 src += 1 ;
106109 dest += 4 ;
107110 }
108- else if ( src + 2 == maxSrc )
111+ else if ( src + 2 == srcMax )
109112 {
110113 result = EncodeAndPadOne ( src , encodingMap ) ;
111114 Unsafe . WriteUnaligned ( dest , result ) ;
@@ -211,12 +214,8 @@ public static unsafe OperationStatus EncodeToUtf8InPlace(Span<byte> buffer, int
211214 }
212215
213216 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
214- private static unsafe void Avx2Encode ( ref byte * srcBytes , ref byte * destBytes , int sourceLength , int destLength , byte * srcStart , byte * destStart )
217+ private static unsafe void Avx2Encode ( ref byte * srcBytes , ref byte * destBytes , byte * srcEnd , int sourceLength , int destLength , byte * srcStart , byte * destStart )
215218 {
216- byte * src = srcBytes ;
217- byte * dest = destBytes ;
218- byte * srcEnd = srcStart + sourceLength - 28 ; // 28 = 32 - 4
219-
220219 // The JIT won't hoist these "constants", so help it
221220 Vector256 < sbyte > shuffleVec = s_avxEncodeShuffleVec ;
222221 Vector256 < sbyte > shuffleConstant0 = Vector256 . Create ( 0x0fc0fc00 ) . AsSByte ( ) ;
@@ -227,13 +226,19 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, i
227226 Vector256 < sbyte > translationContant1 = Vector256 . Create ( ( sbyte ) 25 ) ;
228227 Vector256 < sbyte > lut = s_avxEncodeLut ;
229228
229+ byte * src = srcBytes ;
230+ byte * dest = destBytes ;
231+
230232 // first load is done at c-0 not to get a segfault
231233 AssertRead < Vector256 < sbyte > > ( src , srcStart , sourceLength ) ;
232234 Vector256 < sbyte > str = Avx . LoadVector256 ( src ) . AsSByte ( ) ;
233235
234236 // shift by 4 bytes, as required by Reshuffle
235237 str = Avx2 . PermuteVar8x32 ( str . AsInt32 ( ) , s_avxEncodePermuteVec ) . AsSByte ( ) ;
236238
239+ // Next loads are done at src-4, as required by Reshuffle, so shift it once
240+ src -= 4 ;
241+
237242 while ( true )
238243 {
239244 // Reshuffle
@@ -259,22 +264,18 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, i
259264 if ( src > srcEnd )
260265 break ;
261266
262- // Load at c -4, as required by enc_reshuffle
263- AssertRead < Vector256 < sbyte > > ( src - 4 , srcStart , sourceLength ) ;
264- str = Avx . LoadVector256 ( src - 4 ) . AsSByte ( ) ;
267+ // Load at src -4, as required by Reshuffle (already shifted by -4)
268+ AssertRead < Vector256 < sbyte > > ( src , srcStart , sourceLength ) ;
269+ str = Avx . LoadVector256 ( src ) . AsSByte ( ) ;
265270 }
266271
267- srcBytes = src ;
272+ srcBytes = src + 4 ;
268273 destBytes = dest ;
269274 }
270275
271276 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
272- private static unsafe void Ssse3Encode ( ref byte * srcBytes , ref byte * destBytes , int sourceLength , int destLength , byte * srcStart , byte * destStart )
277+ private static unsafe void Ssse3Encode ( ref byte * srcBytes , ref byte * destBytes , byte * srcEnd , int sourceLength , int destLength , byte * srcStart , byte * destStart )
273278 {
274- byte * src = srcBytes ;
275- byte * dest = destBytes ;
276- byte * srcEnd = srcStart + sourceLength - 16 ;
277-
278279 // The JIT won't hoist these "constants", so help it
279280 Vector128 < sbyte > shuffleVec = s_sseEncodeShuffleVec ;
280281 Vector128 < sbyte > shuffleConstant0 = Vector128 . Create ( 0x0fc0fc00 ) . AsSByte ( ) ;
@@ -285,6 +286,9 @@ private static unsafe void Ssse3Encode(ref byte* srcBytes, ref byte* destBytes,
285286 Vector128 < sbyte > translationContant1 = Vector128 . Create ( ( sbyte ) 25 ) ;
286287 Vector128 < sbyte > lut = s_sseEncodeLut ;
287288
289+ byte * src = srcBytes ;
290+ byte * dest = destBytes ;
291+
288292 //while (remaining >= 16)
289293 while ( src <= srcEnd )
290294 {
0 commit comments