Skip to content

Commit 6a7ca5f

Browse files
committed
xxhash 64-bit
1 parent d7ad482 commit 6a7ca5f

File tree

12 files changed

+6166
-5183
lines changed

12 files changed

+6166
-5183
lines changed

std/assembly/map.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ export class Map<K,V> {
5858

5959
// buckets referencing their respective first entry, usize[bucketsMask + 1]
6060
private buckets: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>BUCKET_SIZE);
61-
private bucketsMask: u32 = INITIAL_CAPACITY - 1;
61+
private bucketsMask: u64 = INITIAL_CAPACITY - 1;
6262

6363
// entries in insertion order, MapEntry<K,V>[entriesCapacity]
6464
private entries: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>ENTRY_SIZE<K,V>());
@@ -83,7 +83,7 @@ export class Map<K,V> {
8383
this.entriesCount = 0;
8484
}
8585

86-
private find(key: K, hashCode: u32): MapEntry<K,V> | null {
86+
private find(key: K, hashCode: u64): MapEntry<K,V> | null {
8787
var entry = load<MapEntry<K,V>>( // unmanaged!
8888
changetype<usize>(this.buckets) + <usize>(hashCode & this.bucketsMask) * BUCKET_SIZE
8989
);
@@ -123,11 +123,11 @@ export class Map<K,V> {
123123
} else {
124124
// check if rehashing is necessary
125125
if (this.entriesOffset == this.entriesCapacity) {
126-
this.rehash(
126+
this.rehash(u32(
127127
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
128128
? this.bucketsMask // just rehash if 1/4+ entries are empty
129129
: (this.bucketsMask << 1) | 1 // grow capacity to next 2^N
130-
);
130+
));
131131
}
132132
// append new entry
133133
let entries = this.entries;
@@ -156,7 +156,7 @@ export class Map<K,V> {
156156
entry.taggedNext |= EMPTY;
157157
--this.entriesCount;
158158
// check if rehashing is appropriate
159-
var halfBucketsMask = this.bucketsMask >> 1;
159+
var halfBucketsMask = <u32>(this.bucketsMask >> 1);
160160
if (
161161
halfBucketsMask + 1 >= max<u32>(INITIAL_CAPACITY, this.entriesCount) &&
162162
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
@@ -181,8 +181,8 @@ export class Map<K,V> {
181181
let oldEntryKey = oldEntry.key;
182182
newEntry.key = oldEntryKey;
183183
newEntry.value = oldEntry.value;
184-
let newBucketIndex = HASH<K>(oldEntryKey) & newBucketsMask;
185-
let newBucketPtrBase = changetype<usize>(newBuckets) + <usize>newBucketIndex * BUCKET_SIZE;
184+
let newBucketIndex = <usize>(HASH<K>(oldEntryKey) & newBucketsMask);
185+
let newBucketPtrBase = changetype<usize>(newBuckets) + newBucketIndex * BUCKET_SIZE;
186186
newEntry.taggedNext = load<usize>(newBucketPtrBase);
187187
store<usize>(newBucketPtrBase, newPtr);
188188
newPtr += ENTRY_SIZE<K,V>();

std/assembly/set.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ export class Set<T> {
5555

5656
// buckets referencing their respective first entry, usize[bucketsMask + 1]
5757
private buckets: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>BUCKET_SIZE);
58-
private bucketsMask: u32 = INITIAL_CAPACITY - 1;
58+
private bucketsMask: u64 = INITIAL_CAPACITY - 1;
5959

6060
// entries in insertion order, SetEntry<K>[entriesCapacity]
6161
private entries: ArrayBuffer = new ArrayBuffer(INITIAL_CAPACITY * <i32>ENTRY_SIZE<T>());
@@ -80,7 +80,7 @@ export class Set<T> {
8080
this.entriesCount = 0;
8181
}
8282

83-
private find(key: T, hashCode: u32): SetEntry<T> | null {
83+
private find(key: T, hashCode: u64): SetEntry<T> | null {
8484
var entry = load<SetEntry<T>>( // unmanaged!
8585
changetype<usize>(this.buckets) + <usize>(hashCode & this.bucketsMask) * BUCKET_SIZE
8686
);
@@ -103,11 +103,11 @@ export class Set<T> {
103103
if (!entry) {
104104
// check if rehashing is necessary
105105
if (this.entriesOffset == this.entriesCapacity) {
106-
this.rehash(
106+
this.rehash(u32(
107107
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D
108108
? this.bucketsMask // just rehash if 1/4+ entries are empty
109109
: (this.bucketsMask << 1) | 1 // grow capacity to next 2^N
110-
);
110+
));
111111
}
112112
// append new entry
113113
entry = changetype<SetEntry<T>>(changetype<usize>(this.entries) + <usize>(this.entriesOffset++) * ENTRY_SIZE<T>());
@@ -136,7 +136,7 @@ export class Set<T> {
136136
entry.taggedNext |= EMPTY;
137137
--this.entriesCount;
138138
// check if rehashing is appropriate
139-
var halfBucketsMask = this.bucketsMask >> 1;
139+
var halfBucketsMask = <u32>(this.bucketsMask >> 1);
140140
if (
141141
halfBucketsMask + 1 >= max<u32>(INITIAL_CAPACITY, this.entriesCount) &&
142142
this.entriesCount < this.entriesCapacity * FREE_FACTOR_N / FREE_FACTOR_D

std/assembly/util/hash.ts

Lines changed: 106 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// @ts-ignore: decorator
22
@inline
3-
export function HASH<T>(key: T): u32 {
3+
export function HASH<T>(key: T): u64 {
44
if (isString<T>()) {
55
return hashStr(changetype<string>(key));
66
} else if (isReference<T>()) {
@@ -10,63 +10,128 @@ export function HASH<T>(key: T): u32 {
1010
if (sizeof<T>() == 4) return hash32(reinterpret<u32>(f32(key)));
1111
if (sizeof<T>() == 8) return hash64(reinterpret<u64>(f64(key)));
1212
} else {
13-
if (sizeof<T>() == 1) return hash8 (u32(key));
14-
if (sizeof<T>() == 2) return hash16(u32(key));
15-
if (sizeof<T>() == 4) return hash32(u32(key));
13+
if (sizeof<T>() <= 4) return hash32(u32(key));
1614
if (sizeof<T>() == 8) return hash64(u64(key));
1715
}
1816
return unreachable();
1917
}
2018

21-
// FNV-1a 32-bit as a starting point, see: http://isthe.com/chongo/tech/comp/fnv/
19+
// XXHash 32-bit as a starting point, see: https://cyan4973.github.io/xxHash
2220

21+
// primes
2322
// @ts-ignore: decorator
24-
@inline const FNV_OFFSET: u32 = 2166136261;
25-
23+
@inline const XXH64_P1: u64 = 11400714785074694791;
24+
// @ts-ignore: decorator
25+
@inline const XXH64_P2: u64 = 14029467366897019727;
26+
// @ts-ignore: decorator
27+
@inline const XXH64_P3: u64 = 1609587929392839161;
2628
// @ts-ignore: decorator
27-
@inline const FNV_PRIME: u32 = 16777619;
29+
@inline const XXH64_P4: u64 = 9650029242287828579;
30+
// @ts-ignore: decorator
31+
@inline const XXH64_P5: u64 = 2870177450012600261;
32+
// @ts-ignore: decorator
33+
@inline const XXH64_SEED: u64 = 0;
2834

29-
function hash8(key: u32): u32 {
30-
return (FNV_OFFSET ^ key) * FNV_PRIME;
35+
function hash32(key: u32): u64 {
36+
var h: u64 = XXH64_SEED + XXH64_P5 + 4;
37+
h ^= u64(key) * XXH64_P1;
38+
h = rotl(h, 23) * XXH64_P2 + XXH64_P3;
39+
h ^= h >> 33;
40+
h *= XXH64_P2;
41+
h ^= h >> 29;
42+
h *= XXH64_P3;
43+
h ^= h >> 32;
44+
return h;
3145
}
3246

33-
function hash16(key: u32): u32 {
34-
var v = FNV_OFFSET;
35-
v = (v ^ ( key & 0xff)) * FNV_PRIME;
36-
v = (v ^ ( key >> 8 )) * FNV_PRIME;
37-
return v;
47+
function hash64(key: u64): u64 {
48+
var h: u64 = XXH64_SEED + XXH64_P5 + 8;
49+
h ^= rotl(key * XXH64_P2, 31) * XXH64_P1;
50+
h = rotl(h, 27) * XXH64_P1 + XXH64_P4;
51+
h ^= h >> 33;
52+
h *= XXH64_P2;
53+
h ^= h >> 29;
54+
h *= XXH64_P3;
55+
h ^= h >> 32;
56+
return h;
3857
}
3958

40-
function hash32(key: u32): u32 {
41-
var v = FNV_OFFSET;
42-
v = (v ^ ( key & 0xff)) * FNV_PRIME;
43-
v = (v ^ ((key >> 8) & 0xff)) * FNV_PRIME;
44-
v = (v ^ ((key >> 16) & 0xff)) * FNV_PRIME;
45-
v = (v ^ ( key >> 24 )) * FNV_PRIME;
46-
return v;
59+
// @ts-ignore: decorator
60+
@inline
61+
function mix1(h: u64, key: u64): u64 {
62+
return rotl(h + key * XXH64_P2, 31) * XXH64_P1;
4763
}
4864

49-
function hash64(key: u64): u32 {
50-
var l = <u32> key;
51-
var h = <u32>(key >>> 32);
52-
var v = FNV_OFFSET;
53-
v = (v ^ ( l & 0xff)) * FNV_PRIME;
54-
v = (v ^ ((l >> 8) & 0xff)) * FNV_PRIME;
55-
v = (v ^ ((l >> 16) & 0xff)) * FNV_PRIME;
56-
v = (v ^ ( l >> 24 )) * FNV_PRIME;
57-
v = (v ^ ( h & 0xff)) * FNV_PRIME;
58-
v = (v ^ ((h >> 8) & 0xff)) * FNV_PRIME;
59-
v = (v ^ ((h >> 16) & 0xff)) * FNV_PRIME;
60-
v = (v ^ ( h >> 24 )) * FNV_PRIME;
61-
return v;
65+
// @ts-ignore: decorator
66+
@inline
67+
function mix2(h: u64, s: u64): u64 {
68+
return (h ^ (rotl(s, 31) * XXH64_P1)) * XXH64_P1 + XXH64_P4;
6269
}
6370

64-
function hashStr(key: string): u32 {
65-
var v = FNV_OFFSET;
66-
if (key !== null) {
67-
for (let i: usize = 0, k: usize = key.length << 1; i < k; ++i) {
68-
v = (v ^ <u32>load<u8>(changetype<usize>(key) + i)) * FNV_PRIME;
71+
function hashStr(key: string): u64 {
72+
if (key === null) {
73+
return XXH64_SEED;
74+
}
75+
var len = key.length << 1;
76+
var h: u64 = XXH64_SEED + XXH64_P5 + u64(len);
77+
78+
if (len >= 32) {
79+
let s1 = XXH64_SEED + XXH64_P1 + XXH64_P2;
80+
let s2 = XXH64_SEED + XXH64_P2;
81+
let s3 = XXH64_SEED;
82+
let s4 = XXH64_SEED - XXH64_P1;
83+
84+
let i = 0;
85+
len -= 32;
86+
87+
while (i <= len) {
88+
s1 = mix1(s1, load<u64>(changetype<usize>(key) + i ));
89+
s2 = mix1(s2, load<u64>(changetype<usize>(key) + i, 8));
90+
s3 = mix1(s3, load<u64>(changetype<usize>(key) + i, 16));
91+
s4 = mix1(s4, load<u64>(changetype<usize>(key) + i, 24));
92+
i += 32;
6993
}
94+
h = rotl(s1, 1) + rotl(s2, 7) + rotl(s3, 12) + rotl(s4, 18);
95+
96+
s1 *= XXH64_P2;
97+
s2 *= XXH64_P2;
98+
s3 *= XXH64_P2;
99+
s4 *= XXH64_P2;
100+
101+
h = mix2(h, s1);
102+
h = mix2(h, s2);
103+
h = mix2(h, s3);
104+
h = mix2(h, s4);
105+
106+
h += u64(len);
107+
len -= i;
108+
}
109+
110+
var i = 0;
111+
len -= 8;
112+
113+
while (i <= len) {
114+
h ^= rotl(load<u64>(changetype<usize>(key) + i) * XXH64_P2, 31) * XXH64_P1;
115+
h = rotl(h, 27) * XXH64_P1 + XXH64_P4;
116+
i += 8;
117+
}
118+
119+
if (i + 4 <= len) {
120+
h ^= <u64>load<u32>(changetype<usize>(key) + i) * XXH64_P1;
121+
h = rotl(h, 23) * XXH64_P2 + XXH64_P3;
122+
i += 4;
70123
}
71-
return v;
124+
125+
while (i < len) {
126+
h += <u64>load<u8>(changetype<usize>(key) + i) * XXH64_P5;
127+
h = rotl(h, 11) * XXH64_P1;
128+
i++;
129+
}
130+
131+
h ^= h >> 33;
132+
h *= XXH64_P2;
133+
h ^= h >> 29;
134+
h *= XXH64_P3;
135+
h ^= h >> 32;
136+
return h;
72137
}

0 commit comments

Comments
 (0)