Skip to content

Commit 5f3fa8f

Browse files
authored
DOCSP-430: Replace genindex.py with genindex.js, and add lunr (#2)
* Replace genindex.py with genindex.js, and add lunr
1 parent 7892d3d commit 5f3fa8f

File tree

6 files changed

+217
-470
lines changed

6 files changed

+217
-470
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
/build
2+
node_modules/

Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
PYTHON=python
21
GIT_BRANCH=`git rev-parse --abbrev-ref HEAD`
32

43
.PHONY: build serve help
54

65
help: ## Show this help message
76
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
87

9-
build: ## Build the documentation under build/<git branch>
8+
build: | tools/node_modules ## Build the documentation under build/<git branch>
109
hugo -d build/$(GIT_BRANCH)
11-
$(PYTHON) -B tools/genindex.py --out build/$(GIT_BRANCH)/tags.json --config config.toml content/tutorials/
10+
$(NODE) tools/genindex.js content/tutorials build/$(GIT_BRANCH)/ --config config.toml
1211

1312
serve: ## Host the documentation on port 1313
1413
hugo serve
14+
15+
tools/node_modules: tools/package.json
16+
cd tools && npm update

tools/genindex.js

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env node
2+
'use strict'
3+
4+
const __doc__ = `
5+
Usage:
6+
genindex.js <source> <outputPrefix> --config=<path>
7+
`
8+
9+
const assert = require('assert')
10+
const child_process = require('child_process')
11+
const fs = require('fs')
12+
const pathModule = require('path')
13+
const process = require('process')
14+
15+
const docopt = require('docopt')
16+
const sax = require('sax')
17+
const toml = require('toml')
18+
const lunr = require('lunr')
19+
20+
const PAT_HEADMATTER = /^\+\+\+\n([^]+)\n\+\+\+/
21+
22+
// Recursively step through an object and replace any numbers with a number
23+
// representable in a short ASCII string.
24+
function truncateNumbers(r) {
25+
if (!(r instanceof Object)) { return }
26+
for (let child of Object.keys(r)) {
27+
if (typeof r[child] === 'number') {
28+
r[child] = Number((r[child]).toString().slice(0, 10))
29+
}
30+
truncateNumbers(r[child])
31+
}
32+
}
33+
34+
const searchIndex = {
35+
idx: lunr(function () {
36+
this.ref('id')
37+
this.field('title', { boost: 20 })
38+
this.field('tags', { boost: 15 })
39+
this.field('minorTitles', { boost: 5 })
40+
this.field('body')
41+
}),
42+
docId: 0,
43+
slugs: [],
44+
toJSON: function() {
45+
truncateNumbers(searchIndex.idx.tokenStore.tokens)
46+
searchIndex.idx.tokenStore.compress()
47+
const json = searchIndex.idx.toJSON()
48+
json.slugs = searchIndex.slugs
49+
return json
50+
}
51+
}
52+
53+
function* walk(root) {
54+
const openList = [root]
55+
const closedList = new Set()
56+
while (openList.length) {
57+
const currentRoot = openList.pop()
58+
closedList.add(currentRoot)
59+
60+
for (const filename of fs.readdirSync(currentRoot)) {
61+
const path = currentRoot + '/' + filename
62+
const stat = fs.statSync(path)
63+
if (stat.isFile()) {
64+
yield path
65+
} else if (stat.isDirectory() && !closedList.has(path)) {
66+
openList.push(path)
67+
}
68+
}
69+
}
70+
}
71+
72+
function parseXML(path, headmatter, xml) {
73+
const spawnOutput = child_process.spawnSync('mmark', ['-xml'], {encoding: 'utf-8', input: xml})
74+
if (spawnOutput.status != 0) {
75+
throw new Error('Command "mmark" failed')
76+
}
77+
78+
const text = `<root>${spawnOutput.output[1]}</root>`
79+
const parser = sax.parser(true, {
80+
trim: true,
81+
normalize: true
82+
})
83+
84+
const doc = {
85+
id: searchIndex.docId,
86+
title: headmatter.title,
87+
tags: headmatter.tags,
88+
minorTitles: [],
89+
body: []
90+
}
91+
searchIndex.docId += 1
92+
searchIndex.slugs.push(headmatter.slug)
93+
94+
let sectionDepth = 0
95+
let inName = false
96+
let error = false
97+
98+
parser.onerror = function (error) {
99+
console.error('Error parsing ' + path)
100+
console.error(error)
101+
error = true
102+
}
103+
104+
parser.ontext = function (text) {
105+
if (inName) {
106+
assert.ok(sectionDepth >= 1)
107+
if (sectionDepth === 1) {
108+
doc.title += ' ' + text
109+
} else {
110+
doc.minorTitles.push(text)
111+
}
112+
113+
return
114+
}
115+
116+
doc.body.push(text)
117+
}
118+
119+
parser.onopentag = function (node) {
120+
if (node.name === 'section') {
121+
sectionDepth += 1
122+
} else if (node.name === 'name') {
123+
inName = true
124+
}
125+
}
126+
127+
parser.onclosetag = function (name) {
128+
if (name === 'section') {
129+
sectionDepth -= 1
130+
} else if (name === 'name') {
131+
assert.equal(inName, true)
132+
inName = false
133+
}
134+
}
135+
136+
parser.write(text).close()
137+
if (error) { throw new Error('Parse error') }
138+
139+
doc.title = doc.title.trim()
140+
doc.body = doc.body.join(' ').trim()
141+
142+
return doc
143+
}
144+
145+
function processFile(path) {
146+
const rawdata = fs.readFileSync(path, { encoding: 'utf-8' })
147+
const match = rawdata.match(PAT_HEADMATTER)
148+
if (!match) {
149+
throw new Error('Couldn\'t find headmatter')
150+
}
151+
152+
const rawHeadmatter = match[1]
153+
const headmatter = toml.parse(rawHeadmatter)
154+
if (!headmatter.slug) {
155+
headmatter.slug = pathModule.parse(path).base
156+
}
157+
158+
const searchDoc = parseXML(path, headmatter, rawdata.slice(match[0].length))
159+
searchIndex.idx.add(searchDoc)
160+
161+
return [headmatter.slug, headmatter.title, headmatter.tags]
162+
}
163+
164+
function main() {
165+
const args = docopt.docopt(__doc__)
166+
const data = []
167+
const tagManifest = toml.parse(fs.readFileSync(args['--config'])).tags || {}
168+
let error = false
169+
170+
for (const path of walk(args['<source>'])) {
171+
let headmatter
172+
try {
173+
headmatter = processFile(path)
174+
} catch(err) {
175+
console.error(`Error processing ${path}: ${err}`)
176+
error = true
177+
continue
178+
}
179+
180+
for (const tag of headmatter[2]) {
181+
if (tagManifest[tag] === undefined) {
182+
console.error(`Unknown tag "${tag}" in ${path}`)
183+
error = true
184+
}
185+
}
186+
187+
data.push(headmatter)
188+
}
189+
190+
if (error) {
191+
process.exit(1)
192+
}
193+
194+
fs.writeFileSync(args['<outputPrefix>'] + 'tags.json', JSON.stringify({
195+
tags: tagManifest,
196+
pages: data
197+
}))
198+
199+
const searchIndexJSON = searchIndex.toJSON()
200+
fs.writeFileSync(args['<outputPrefix>'] + 'search.json', JSON.stringify(searchIndexJSON))
201+
}
202+
203+
main()

tools/genindex.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

tools/package.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"dependencies" : {
3+
"docopt": "0.6.2",
4+
"toml" : "2.3.2",
5+
"sax": "1.2.2",
6+
"lunr": "i80and/lunr.js#19a85e62ae8103a48ce5a1fad507f758cf6016d9"
7+
}
8+
}

0 commit comments

Comments
 (0)