Skip to content

Commit c972f72

Browse files
committed
cleanup
1 parent 3a5a72c commit c972f72

File tree

6 files changed

+134
-504
lines changed

6 files changed

+134
-504
lines changed

BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
<IsWindows Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'true'">true</IsWindows>
1010
<IsLinux Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Linux)))' == 'true'">true</IsLinux>
1111
<IsMacOS Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::OSX)))' == 'true'">true</IsMacOS>
12+
<IsArm64 Condition="$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture) == Arm64">true</IsArm64>
13+
<IsX64 Condition="$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture) == X64">true</IsX64>
1214
</PropertyGroup>
1315

1416
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
@@ -41,5 +43,11 @@
4143
<PropertyGroup Condition="'$(IsMacOS)'=='true'">
4244
<DefineConstants>MacOS</DefineConstants>
4345
</PropertyGroup>
46+
<PropertyGroup Condition="'$(IsArm64)'=='true'">
47+
<DefineConstants>Arm64</DefineConstants>
48+
</PropertyGroup>
49+
<PropertyGroup Condition="'$(IsX64)'=='true'">
50+
<DefineConstants>X64</DefineConstants>
51+
</PropertyGroup>
4452

4553
</Project>

BitFaster.Caching.Benchmarks/Lfu/CmSketchNoPin.cs

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Diagnostics.CodeAnalysis;
4+
using System.Runtime.CompilerServices;
5+
46

57
#if NET6_0_OR_GREATER
68
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.Arm;
710
using System.Runtime.Intrinsics.X86;
811
#endif
912

@@ -61,6 +64,12 @@ public int EstimateFrequency(T value)
6164
{
6265
return EstimateFrequencyAvx(value);
6366
}
67+
#if NET6_0_OR_GREATER
68+
else if (isa.IsArm64Supported)
69+
{
70+
return EstimateFrequencyArm(value);
71+
}
72+
#endif
6473
else
6574
{
6675
return EstimateFrequencyStd(value);
@@ -84,11 +93,16 @@ public void Increment(T value)
8493
{
8594
IncrementAvx(value);
8695
}
96+
#if NET6_0_OR_GREATER
97+
else if (isa.IsArm64Supported)
98+
{
99+
IncrementArm(value);
100+
}
101+
#endif
87102
else
88103
{
89104
IncrementStd(value);
90105
}
91-
#endif
92106
}
93107

94108
/// <summary>
@@ -314,5 +328,94 @@ private unsafe void IncrementAvx(T value)
314328
}
315329
}
316330
#endif
331+
332+
#if NET6_0_OR_GREATER
333+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
334+
private unsafe void IncrementArm(T value)
335+
{
336+
int blockHash = Spread(comparer.GetHashCode(value));
337+
int counterHash = Rehash(blockHash);
338+
int block = (blockHash & blockMask) << 3;
339+
340+
Vector128<int> h = AdvSimd.ShiftArithmetic(Vector128.Create(counterHash), Vector128.Create(0, -8, -16, -24));
341+
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
342+
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
343+
344+
fixed (long* tablePtr = table)
345+
{
346+
int t0 = AdvSimd.Extract(blockOffset, 0);
347+
int t1 = AdvSimd.Extract(blockOffset, 1);
348+
int t2 = AdvSimd.Extract(blockOffset, 2);
349+
int t3 = AdvSimd.Extract(blockOffset, 3);
350+
351+
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + t0), AdvSimd.LoadVector64(tablePtr + t1));
352+
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + t2), AdvSimd.LoadVector64(tablePtr + t3));
353+
354+
index = AdvSimd.ShiftLeftLogicalSaturate(index, 2);
355+
356+
Vector128<int> longOffA = AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0), 2, index, 1);
357+
Vector128<int> longOffB = AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2), 2, index, 3);
358+
359+
Vector128<long> fifteen = Vector128.Create(0xfL);
360+
Vector128<long> maskA = AdvSimd.ShiftArithmetic(fifteen, longOffA.AsInt64());
361+
Vector128<long> maskB = AdvSimd.ShiftArithmetic(fifteen, longOffB.AsInt64());
362+
363+
Vector128<long> maskedA = AdvSimd.Not(AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorA, maskA), maskA));
364+
Vector128<long> maskedB = AdvSimd.Not(AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorB, maskB), maskB));
365+
366+
var one = Vector128.Create(1L);
367+
Vector128<long> incA = AdvSimd.And(maskedA, AdvSimd.ShiftArithmetic(one, longOffA.AsInt64()));
368+
Vector128<long> incB = AdvSimd.And(maskedB, AdvSimd.ShiftArithmetic(one, longOffB.AsInt64()));
369+
370+
tablePtr[t0] += AdvSimd.Extract(incA, 0);
371+
tablePtr[t1] += AdvSimd.Extract(incA, 1);
372+
tablePtr[t2] += AdvSimd.Extract(incB, 0);
373+
tablePtr[t3] += AdvSimd.Extract(incB, 1);
374+
375+
var max = AdvSimd.Arm64.MaxAcross(AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.MaxAcross(incA.AsInt32()), 1, AdvSimd.Arm64.MaxAcross(incB.AsInt32()), 0).AsInt16());
376+
377+
if (max.ToScalar() != 0 && (++size == sampleSize))
378+
{
379+
Reset();
380+
}
381+
}
382+
}
383+
384+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
385+
private unsafe int EstimateFrequencyArm(T value)
386+
{
387+
int blockHash = Spread(comparer.GetHashCode(value));
388+
int counterHash = Rehash(blockHash);
389+
int block = (blockHash & blockMask) << 3;
390+
391+
Vector128<int> h = AdvSimd.ShiftArithmetic(Vector128.Create(counterHash), Vector128.Create(0, -8, -16, -24));
392+
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
393+
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
394+
395+
fixed (long* tablePtr = table)
396+
{
397+
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 0)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 1)));
398+
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 2)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 3)));
399+
400+
index = AdvSimd.ShiftLeftLogicalSaturate(index, 2);
401+
402+
Vector128<int> indexA = AdvSimd.Negate(AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0), 2, index, 1));
403+
Vector128<int> indexB = AdvSimd.Negate(AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2), 2, index, 3));
404+
405+
var fifteen = Vector128.Create(0xfL);
406+
Vector128<long> a = AdvSimd.And(AdvSimd.ShiftArithmetic(tableVectorA, indexA.AsInt64()), fifteen);
407+
Vector128<long> b = AdvSimd.And(AdvSimd.ShiftArithmetic(tableVectorB, indexB.AsInt64()), fifteen);
408+
409+
// Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
410+
// After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
411+
var min = AdvSimd.Arm64.VectorTableLookup(a.AsByte(), Vector128.Create(0x0B0A090803020100, 0xFFFFFFFFFFFFFFFF).AsByte());
412+
min = AdvSimd.Arm64.VectorTableLookupExtension(min, b.AsByte(), Vector128.Create(0xFFFFFFFFFFFFFFFF, 0x0B0A090803020100).AsByte());
413+
414+
var min32 = AdvSimd.Arm64.MinAcross(min.AsInt32());
415+
416+
return min32.ToScalar();
417+
}
418+
}
419+
#endif
317420
}
318421
}

BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ public class SketchFrequency
2424
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
2525
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
2626
private CmSketchCore<int, DetectIsa> blockVector;
27-
private CmSketchCore512<int, DetectIsa> blockVector512;
2827

2928
[Params(32_768, 524_288, 8_388_608, 134_217_728)]
3029
public int Size { get; set; }
@@ -38,7 +37,6 @@ public void Setup()
3837
blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
3938
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4039
blockVector = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
41-
blockVector512 = new CmSketchCore512<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4240
}
4341

4442
[Benchmark(Baseline = true, OperationsPerInvoke = iterations)]
@@ -50,7 +48,7 @@ public int FrequencyFlat()
5048

5149
return count;
5250
}
53-
51+
#if X64
5452
[Benchmark(OperationsPerInvoke = iterations)]
5553
public int FrequencyFlatAvx()
5654
{
@@ -60,7 +58,7 @@ public int FrequencyFlatAvx()
6058

6159
return count;
6260
}
63-
61+
#endif
6462
[Benchmark(OperationsPerInvoke = iterations)]
6563
public int FrequencyBlock()
6664
{
@@ -72,7 +70,11 @@ public int FrequencyBlock()
7270
}
7371

7472
[Benchmark(OperationsPerInvoke = iterations)]
73+
#if Arm64
74+
public int FrequencyBlockNeonNotPinned()
75+
#else
7576
public int FrequencyBlockAvxNotPinned()
77+
#endif
7678
{
7779
int count = 0;
7880
for (int i = 0; i < iterations; i++)
@@ -82,23 +84,18 @@ public int FrequencyBlockAvxNotPinned()
8284
}
8385

8486
[Benchmark(OperationsPerInvoke = iterations)]
87+
88+
#if Arm64
89+
public int FrequencyBlockNeonPinned()
90+
#else
8591
public int FrequencyBlockAvxPinned()
92+
#endif
8693
{
8794
int count = 0;
8895
for (int i = 0; i < iterations; i++)
8996
count += blockVector.EstimateFrequency(i) > blockVector.EstimateFrequency(i + 1) ? 1 : 0;
9097

9198
return count;
9299
}
93-
94-
[Benchmark(OperationsPerInvoke = iterations)]
95-
public int FrequencyBlockAvxPinned512()
96-
{
97-
int count = 0;
98-
for (int i = 0; i < iterations; i++)
99-
count += blockVector512.EstimateFrequency(i) > blockVector.EstimateFrequency(i + 1) ? 1 : 0;
100-
101-
return count;
102-
}
103100
}
104101
}

BitFaster.Caching.Benchmarks/Lfu/SketchIncrement.cs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ public class SketchIncrement
2323
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
2424
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
2525
private CmSketchCore<int, DetectIsa> blockAvx;
26-
private CmSketchCore512<int, DetectIsa> blockVector512;
2726

2827

2928
[Params(32_768, 524_288, 8_388_608, 134_217_728)]
@@ -38,7 +37,6 @@ public void Setup()
3837
blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
3938
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4039
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
41-
blockVector512 = new CmSketchCore512<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4240
}
4341

4442
[Benchmark(Baseline = true, OperationsPerInvoke = iterations)]
@@ -49,7 +47,7 @@ public void IncFlat()
4947
flatStd.Increment(i);
5048
}
5149
}
52-
50+
#if X64
5351
[Benchmark(OperationsPerInvoke = iterations)]
5452
public void IncFlatAvx()
5553
{
@@ -58,7 +56,7 @@ public void IncFlatAvx()
5856
flatAvx.Increment(i);
5957
}
6058
}
61-
59+
#endif
6260
[Benchmark(OperationsPerInvoke = iterations)]
6361
public void IncBlock()
6462
{
@@ -69,7 +67,11 @@ public void IncBlock()
6967
}
7068

7169
[Benchmark(OperationsPerInvoke = iterations)]
72-
public void IncBlockAvxNotPinned()
70+
#if Arm64
71+
public void IncBlockNeonNotPinned()
72+
#else
73+
public int IncBlockAvxNotPinned()
74+
#endif
7375
{
7476
for (int i = 0; i < iterations; i++)
7577
{
@@ -78,21 +80,16 @@ public void IncBlockAvxNotPinned()
7880
}
7981

8082
[Benchmark(OperationsPerInvoke = iterations)]
83+
#if Arm64
84+
public void IncBlockNeonPinned()
85+
#else
8186
public void IncBlockAvxPinned()
87+
#endif
8288
{
8389
for (int i = 0; i < iterations; i++)
8490
{
8591
blockAvx.Increment(i);
8692
}
8793
}
88-
89-
[Benchmark(OperationsPerInvoke = iterations)]
90-
public void IncBlockAvxPinned512()
91-
{
92-
for (int i = 0; i < iterations; i++)
93-
{
94-
blockVector512.Increment(i);
95-
}
96-
}
9794
}
9895
}

BitFaster.Caching/Lfu/CmSketchCore.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,6 @@ private unsafe void IncrementArm(T value)
392392
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
393393

394394
long* tablePtr = tableAddr;
395-
//fixed (long* tablePtr = table)
396395
{
397396
int t0 = AdvSimd.Extract(blockOffset, 0);
398397
int t1 = AdvSimd.Extract(blockOffset, 1);
@@ -444,7 +443,6 @@ private unsafe int EstimateFrequencyArm(T value)
444443
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
445444

446445
long* tablePtr = tableAddr;
447-
//fixed (long* tablePtr = table)
448446
{
449447
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 0)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 1)));
450448
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 2)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 3)));

0 commit comments

Comments
 (0)