11using System ;
22using System . Collections . Generic ;
33using System . Diagnostics . CodeAnalysis ;
4+ using System . Runtime . CompilerServices ;
5+
46
57#if NET6_0_OR_GREATER
68using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . Arm ;
710using System . Runtime . Intrinsics . X86 ;
811#endif
912
@@ -61,6 +64,12 @@ public int EstimateFrequency(T value)
6164 {
6265 return EstimateFrequencyAvx ( value ) ;
6366 }
67+ #if NET6_0_OR_GREATER
68+ else if ( isa . IsArm64Supported )
69+ {
70+ return EstimateFrequencyArm ( value ) ;
71+ }
72+ #endif
6473 else
6574 {
6675 return EstimateFrequencyStd ( value ) ;
@@ -84,11 +93,16 @@ public void Increment(T value)
8493 {
8594 IncrementAvx ( value ) ;
8695 }
96+ #if NET6_0_OR_GREATER
97+ else if ( isa . IsArm64Supported )
98+ {
99+ IncrementArm ( value ) ;
100+ }
101+ #endif
87102 else
88103 {
89104 IncrementStd ( value ) ;
90105 }
91- #endif
92106 }
93107
94108 /// <summary>
@@ -314,5 +328,94 @@ private unsafe void IncrementAvx(T value)
314328 }
315329 }
316330#endif
331+
332+ #if NET6_0_OR_GREATER
333+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
334+ private unsafe void IncrementArm ( T value )
335+ {
336+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
337+ int counterHash = Rehash ( blockHash ) ;
338+ int block = ( blockHash & blockMask ) << 3 ;
339+
340+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
341+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
342+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
343+
344+ fixed ( long * tablePtr = table )
345+ {
346+ int t0 = AdvSimd . Extract ( blockOffset , 0 ) ;
347+ int t1 = AdvSimd . Extract ( blockOffset , 1 ) ;
348+ int t2 = AdvSimd . Extract ( blockOffset , 2 ) ;
349+ int t3 = AdvSimd . Extract ( blockOffset , 3 ) ;
350+
351+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t0 ) , AdvSimd . LoadVector64 ( tablePtr + t1 ) ) ;
352+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t2 ) , AdvSimd . LoadVector64 ( tablePtr + t3 ) ) ;
353+
354+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
355+
356+ Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ;
357+ Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ;
358+
359+ Vector128 < long > fifteen = Vector128 . Create ( 0xfL ) ;
360+ Vector128 < long > maskA = AdvSimd . ShiftArithmetic ( fifteen , longOffA . AsInt64 ( ) ) ;
361+ Vector128 < long > maskB = AdvSimd . ShiftArithmetic ( fifteen , longOffB . AsInt64 ( ) ) ;
362+
363+ Vector128 < long > maskedA = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ) ;
364+ Vector128 < long > maskedB = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ) ;
365+
366+ var one = Vector128 . Create ( 1L ) ;
367+ Vector128 < long > incA = AdvSimd . And ( maskedA , AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ) ;
368+ Vector128 < long > incB = AdvSimd . And ( maskedB , AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ) ;
369+
370+ tablePtr [ t0 ] += AdvSimd . Extract ( incA , 0 ) ;
371+ tablePtr [ t1 ] += AdvSimd . Extract ( incA , 1 ) ;
372+ tablePtr [ t2 ] += AdvSimd . Extract ( incB , 0 ) ;
373+ tablePtr [ t3 ] += AdvSimd . Extract ( incB , 1 ) ;
374+
375+ var max = AdvSimd . Arm64 . MaxAcross ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . MaxAcross ( incA . AsInt32 ( ) ) , 1 , AdvSimd . Arm64 . MaxAcross ( incB . AsInt32 ( ) ) , 0 ) . AsInt16 ( ) ) ;
376+
377+ if ( max . ToScalar ( ) != 0 && ( ++ size == sampleSize ) )
378+ {
379+ Reset ( ) ;
380+ }
381+ }
382+ }
383+
384+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
385+ private unsafe int EstimateFrequencyArm ( T value )
386+ {
387+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
388+ int counterHash = Rehash ( blockHash ) ;
389+ int block = ( blockHash & blockMask ) << 3 ;
390+
391+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
392+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
393+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
394+
395+ fixed ( long * tablePtr = table )
396+ {
397+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 0 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 1 ) ) ) ;
398+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 2 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 3 ) ) ) ;
399+
400+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
401+
402+ Vector128 < int > indexA = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ) ;
403+ Vector128 < int > indexB = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ) ;
404+
405+ var fifteen = Vector128 . Create ( 0xfL ) ;
406+ Vector128 < long > a = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) , fifteen ) ;
407+ Vector128 < long > b = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) , fifteen ) ;
408+
409+ // Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
410+ // After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
411+ var min = AdvSimd . Arm64 . VectorTableLookup ( a . AsByte ( ) , Vector128 . Create ( 0x0B0A090803020100 , 0xFFFFFFFFFFFFFFFF ) . AsByte ( ) ) ;
412+ min = AdvSimd . Arm64 . VectorTableLookupExtension ( min , b . AsByte ( ) , Vector128 . Create ( 0xFFFFFFFFFFFFFFFF , 0x0B0A090803020100 ) . AsByte ( ) ) ;
413+
414+ var min32 = AdvSimd . Arm64 . MinAcross ( min . AsInt32 ( ) ) ;
415+
416+ return min32 . ToScalar ( ) ;
417+ }
418+ }
419+ #endif
317420 }
318421}
0 commit comments