|
8 | 8 | #if !NETSTANDARD2_0 |
9 | 9 | using System.Runtime.Intrinsics; |
10 | 10 | using System.Runtime.Intrinsics.X86; |
| 11 | + |
11 | 12 | #endif |
12 | 13 |
|
13 | 14 | #if NET6_0_OR_GREATER |
@@ -169,41 +170,61 @@ private void EnsureCapacity(long maximumSize) |
169 | 170 |
|
170 | 171 | private unsafe int EstimateFrequencyStd(T value) |
171 | 172 | { |
172 | | - var count = stackalloc int[4]; |
173 | 173 | int blockHash = Spread(comparer.GetHashCode(value)); |
174 | 174 | int counterHash = Rehash(blockHash); |
175 | 175 | int block = (blockHash & blockMask) << 3; |
176 | 176 |
|
177 | | - for (int i = 0; i < 4; i++) |
178 | | - { |
179 | | - int h = (int)((uint)counterHash >> (i << 3)); |
180 | | - int index = (h >> 1) & 15; |
181 | | - int offset = h & 1; |
182 | | - count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL); |
183 | | - } |
184 | | - return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3])); |
| 177 | + // Loop unrolling improves throughput |
| 178 | + int h0 = counterHash; |
| 179 | + int h1 = counterHash >>> 8; |
| 180 | + int h2 = counterHash >>> 16; |
| 181 | + int h3 = counterHash >>> 24; |
| 182 | + |
| 183 | + int index0 = (h0 >>> 1) & 15; |
| 184 | + int index1 = (h1 >>> 1) & 15; |
| 185 | + int index2 = (h2 >>> 1) & 15; |
| 186 | + int index3 = (h3 >>> 1) & 15; |
| 187 | + |
| 188 | + int slot0 = block + (h0 & 1); |
| 189 | + int slot1 = block + (h1 & 1) + 2; |
| 190 | + int slot2 = block + (h2 & 1) + 4; |
| 191 | + int slot3 = block + (h3 & 1) + 6; |
| 192 | + |
| 193 | + int count0 = (int)((table[slot0] >>> (index0 << 2)) & 0xfL); |
| 194 | + int count1 = (int)((table[slot1] >>> (index1 << 2)) & 0xfL); |
| 195 | + int count2 = (int)((table[slot2] >>> (index2 << 2)) & 0xfL); |
| 196 | + int count3 = (int)((table[slot3] >>> (index3 << 2)) & 0xfL); |
| 197 | + |
| 198 | + return Math.Min(Math.Min(count0, count1), Math.Min(count2, count3)); |
185 | 199 | } |
186 | 200 |
|
187 | 201 | private unsafe void IncrementStd(T value) |
188 | 202 | { |
189 | | - var index = stackalloc int[8]; |
190 | 203 | int blockHash = Spread(comparer.GetHashCode(value)); |
191 | 204 | int counterHash = Rehash(blockHash); |
192 | 205 | int block = (blockHash & blockMask) << 3; |
193 | 206 |
|
194 | | - for (int i = 0; i < 4; i++) |
195 | | - { |
196 | | - int h = (int)((uint)counterHash >> (i << 3)); |
197 | | - index[i] = (h >> 1) & 15; |
198 | | - int offset = h & 1; |
199 | | - index[i + 4] = block + offset + (i << 1); |
200 | | - } |
| 207 | + // Loop unrolling improves throughput |
| 208 | + int h0 = counterHash; |
| 209 | + int h1 = counterHash >>> 8; |
| 210 | + int h2 = counterHash >>> 16; |
| 211 | + int h3 = counterHash >>> 24; |
| 212 | + |
| 213 | + int index0 = (h0 >>> 1) & 15; |
| 214 | + int index1 = (h1 >>> 1) & 15; |
| 215 | + int index2 = (h2 >>> 1) & 15; |
| 216 | + int index3 = (h3 >>> 1) & 15; |
| 217 | + |
| 218 | + int slot0 = block + (h0 & 1); |
| 219 | + int slot1 = block + (h1 & 1) + 2; |
| 220 | + int slot2 = block + (h2 & 1) + 4; |
| 221 | + int slot3 = block + (h3 & 1) + 6; |
201 | 222 |
|
202 | 223 | bool added = |
203 | | - IncrementAt(index[4], index[0]) |
204 | | - | IncrementAt(index[5], index[1]) |
205 | | - | IncrementAt(index[6], index[2]) |
206 | | - | IncrementAt(index[7], index[3]); |
| 224 | + IncrementAt(slot0, index0) |
| 225 | + | IncrementAt(slot1, index1) |
| 226 | + | IncrementAt(slot2, index2) |
| 227 | + | IncrementAt(slot3, index3); |
207 | 228 |
|
208 | 229 | if (added && (++size == sampleSize)) |
209 | 230 | { |
|
0 commit comments