11using System ;
22using System . Collections . Generic ;
33using System . Diagnostics . CodeAnalysis ;
4+ using System . Runtime . CompilerServices ;
5+
46
57#if NET6_0_OR_GREATER
68using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . Arm ;
710using System . Runtime . Intrinsics . X86 ;
811#endif
912
@@ -61,6 +64,12 @@ public int EstimateFrequency(T value)
6164 {
6265 return EstimateFrequencyAvx ( value ) ;
6366 }
67+ #if NET6_0_OR_GREATER
68+ else if ( isa . IsArm64Supported )
69+ {
70+ return EstimateFrequencyArm ( value ) ;
71+ }
72+ #endif
6473 else
6574 {
6675 return EstimateFrequencyStd ( value ) ;
@@ -84,6 +93,12 @@ public void Increment(T value)
8493 {
8594 IncrementAvx ( value ) ;
8695 }
96+ #if NET6_0_OR_GREATER
97+ else if ( isa . IsArm64Supported )
98+ {
99+ IncrementArm ( value ) ;
100+ }
101+ #endif
87102 else
88103 {
89104 IncrementStd ( value ) ;
@@ -314,5 +329,94 @@ private unsafe void IncrementAvx(T value)
314329 }
315330 }
316331#endif
332+
333+ #if NET6_0_OR_GREATER
334+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
335+ private unsafe void IncrementArm ( T value )
336+ {
337+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
338+ int counterHash = Rehash ( blockHash ) ;
339+ int block = ( blockHash & blockMask ) << 3 ;
340+
341+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
342+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
343+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
344+
345+ fixed ( long * tablePtr = table )
346+ {
347+ int t0 = AdvSimd . Extract ( blockOffset , 0 ) ;
348+ int t1 = AdvSimd . Extract ( blockOffset , 1 ) ;
349+ int t2 = AdvSimd . Extract ( blockOffset , 2 ) ;
350+ int t3 = AdvSimd . Extract ( blockOffset , 3 ) ;
351+
352+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t0 ) , AdvSimd . LoadVector64 ( tablePtr + t1 ) ) ;
353+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t2 ) , AdvSimd . LoadVector64 ( tablePtr + t3 ) ) ;
354+
355+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
356+
357+ Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ;
358+ Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ;
359+
360+ Vector128 < long > fifteen = Vector128 . Create ( 0xfL ) ;
361+ Vector128 < long > maskA = AdvSimd . ShiftArithmetic ( fifteen , longOffA . AsInt64 ( ) ) ;
362+ Vector128 < long > maskB = AdvSimd . ShiftArithmetic ( fifteen , longOffB . AsInt64 ( ) ) ;
363+
364+ Vector128 < long > maskedA = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ) ;
365+ Vector128 < long > maskedB = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ) ;
366+
367+ var one = Vector128 . Create ( 1L ) ;
368+ Vector128 < long > incA = AdvSimd . And ( maskedA , AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ) ;
369+ Vector128 < long > incB = AdvSimd . And ( maskedB , AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ) ;
370+
371+ tablePtr [ t0 ] += AdvSimd . Extract ( incA , 0 ) ;
372+ tablePtr [ t1 ] += AdvSimd . Extract ( incA , 1 ) ;
373+ tablePtr [ t2 ] += AdvSimd . Extract ( incB , 0 ) ;
374+ tablePtr [ t3 ] += AdvSimd . Extract ( incB , 1 ) ;
375+
376+ var max = AdvSimd . Arm64 . MaxAcross ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . MaxAcross ( incA . AsInt32 ( ) ) , 1 , AdvSimd . Arm64 . MaxAcross ( incB . AsInt32 ( ) ) , 0 ) . AsInt16 ( ) ) ;
377+
378+ if ( max . ToScalar ( ) != 0 && ( ++ size == sampleSize ) )
379+ {
380+ Reset ( ) ;
381+ }
382+ }
383+ }
384+
385+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
386+ private unsafe int EstimateFrequencyArm ( T value )
387+ {
388+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
389+ int counterHash = Rehash ( blockHash ) ;
390+ int block = ( blockHash & blockMask ) << 3 ;
391+
392+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
393+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
394+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
395+
396+ fixed ( long * tablePtr = table )
397+ {
398+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 0 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 1 ) ) ) ;
399+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 2 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 3 ) ) ) ;
400+
401+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
402+
403+ Vector128 < int > indexA = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ) ;
404+ Vector128 < int > indexB = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ) ;
405+
406+ var fifteen = Vector128 . Create ( 0xfL ) ;
407+ Vector128 < long > a = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) , fifteen ) ;
408+ Vector128 < long > b = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) , fifteen ) ;
409+
410+ // Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
411+ // After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
412+ var min = AdvSimd . Arm64 . VectorTableLookup ( a . AsByte ( ) , Vector128 . Create ( 0x0B0A090803020100 , 0xFFFFFFFFFFFFFFFF ) . AsByte ( ) ) ;
413+ min = AdvSimd . Arm64 . VectorTableLookupExtension ( min , b . AsByte ( ) , Vector128 . Create ( 0xFFFFFFFFFFFFFFFF , 0x0B0A090803020100 ) . AsByte ( ) ) ;
414+
415+ var min32 = AdvSimd . Arm64 . MinAcross ( min . AsInt32 ( ) ) ;
416+
417+ return min32 . ToScalar ( ) ;
418+ }
419+ }
420+ #endif
317421 }
318422}
0 commit comments