Skip to content

Commit 6043982

Browse files
committed
rustdoc-search: yet another stringdex optimization attempt
This one's uses a different tactic. It shouldn't significantly increase the amount of downloaded index data, but still reduces the amount of disk usage. This one works by changing the suffix-only node representation to omit some data that's needed for checking. Since those nodes make up the bulk of the tree, it reduces the data they store, but also requires validating the match by fetching the name itself (but the names list is pretty small, and when I tried it with wordnet "indexing" it was about the same).
1 parent 5ab6924 commit 6043982

File tree

5 files changed

+647
-164
lines changed

5 files changed

+647
-164
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5225,9 +5225,9 @@ dependencies = [
52255225

52265226
[[package]]
52275227
name = "stringdex"
5228-
version = "0.0.1-alpha4"
5228+
version = "0.0.1-alpha7"
52295229
source = "registry+https://github.com/rust-lang/crates.io-index"
5230-
checksum = "2841fd43df5b1ff1b042e167068a1fe9b163dc93041eae56ab2296859013a9a0"
5230+
checksum = "3e1f1b3992e2e56edf27f7769eb17bd1955e15d2de7c50a4fa5b0d71740368d2"
52315231
dependencies = [
52325232
"stacker",
52335233
]

src/librustdoc/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
2121
serde = { version = "1.0", features = ["derive"] }
2222
serde_json = "1.0"
2323
smallvec = "1.8.1"
24-
stringdex = { version = "0.0.1-alpha4" }
24+
stringdex = { version = "0.0.1-alpha7" }
2525
tempfile = "3"
2626
threadpool = "1.8.1"
2727
tracing = "0.1"

src/librustdoc/html/static/js/search.js

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ class DocSearch {
12111211
* will never fulfill.
12121212
*/
12131213
async buildIndex() {
1214-
const nn = this.database.getIndex("normalizedName");
1214+
const nn = this.database.getData("normalizedName");
12151215
if (!nn) {
12161216
return;
12171217
}
@@ -3706,7 +3706,7 @@ class DocSearch {
37063706
* @returns {AsyncGenerator<rustdoc.ResultObject>}
37073707
*/
37083708
async function*(currentCrate) {
3709-
const index = this.database.getIndex("normalizedName");
3709+
const index = this.database.getData("normalizedName");
37103710
if (!index) {
37113711
return;
37123712
}
@@ -3835,8 +3835,7 @@ class DocSearch {
38353835
};
38363836
if (elem.normalizedPathLast === "") {
38373837
// faster full-table scan for this specific case.
3838-
const nameData = this.database.getData("name");
3839-
const l = nameData ? nameData.length : 0;
3838+
const l = index.length;
38403839
for (let id = 0; id < l; ++id) {
38413840
if (!idDuplicates.has(id)) {
38423841
idDuplicates.add(id);
@@ -3938,7 +3937,7 @@ class DocSearch {
39383937
* @returns {AsyncGenerator<rustdoc.ResultObject>}
39393938
*/
39403939
async function*(inputs, output, typeInfo, currentCrate) {
3941-
const index = this.database.getIndex("normalizedName");
3940+
const index = this.database.getData("normalizedName");
39423941
if (!index) {
39433942
return;
39443943
}

src/librustdoc/html/static/js/stringdex.d.ts

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,8 @@ declare namespace stringdex {
55
* The client interface to Stringdex.
66
*/
77
interface Database {
8-
getIndex(colname: string): SearchTree|undefined;
98
getData(colname: string): DataColumn|undefined;
109
}
11-
/**
12-
* A search index file.
13-
*/
14-
interface SearchTree {
15-
trie(): Trie;
16-
search(name: Uint8Array|string): Promise<Trie?>;
17-
searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
18-
}
1910
/**
2011
* A compressed node in the search tree.
2112
*
@@ -29,9 +20,7 @@ declare namespace stringdex {
2920
matches(): RoaringBitmap;
3021
substringMatches(): AsyncGenerator<RoaringBitmap>;
3122
prefixMatches(): AsyncGenerator<RoaringBitmap>;
32-
keys(): Uint8Array;
3323
keysExcludeSuffixOnly(): Uint8Array;
34-
children(): [number, Promise<Trie>][];
3524
childrenExcludeSuffixOnly(): [number, Promise<Trie>][];
3625
child(id: number): Promise<Trie>?;
3726
}
@@ -41,6 +30,8 @@ declare namespace stringdex {
4130
interface DataColumn {
4231
isEmpty(id: number): boolean;
4332
at(id: number): Promise<Uint8Array|undefined>;
33+
search(name: Uint8Array|string): Promise<Trie?>;
34+
searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
4435
length: number,
4536
}
4637
/**

0 commit comments

Comments
 (0)