|
| 1 | +#!/usr/bin/env node |
| 2 | +'use strict' |
| 3 | + |
| 4 | +const __doc__ = ` |
| 5 | +Usage: |
| 6 | + genindex.js <source> <outputPrefix> --config=<path> |
| 7 | +` |
| 8 | + |
| 9 | +const assert = require('assert') |
| 10 | +const child_process = require('child_process') |
| 11 | +const fs = require('fs') |
| 12 | +const pathModule = require('path') |
| 13 | +const process = require('process') |
| 14 | + |
| 15 | +const docopt = require('docopt') |
| 16 | +const sax = require('sax') |
| 17 | +const toml = require('toml') |
| 18 | +const lunr = require('lunr') |
| 19 | + |
| 20 | +const PAT_HEADMATTER = /^\+\+\+\n([^]+)\n\+\+\+/ |
| 21 | + |
| 22 | +// Recursively step through an object and replace any numbers with a number |
| 23 | +// representable in a short ASCII string. |
| 24 | +function truncateNumbers(r) { |
| 25 | + if (!(r instanceof Object)) { return } |
| 26 | + for (let child of Object.keys(r)) { |
| 27 | + if (typeof r[child] === 'number') { |
| 28 | + r[child] = Number((r[child]).toString().slice(0, 10)) |
| 29 | + } |
| 30 | + truncateNumbers(r[child]) |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +const searchIndex = { |
| 35 | + idx: lunr(function () { |
| 36 | + this.ref('id') |
| 37 | + this.field('title', { boost: 20 }) |
| 38 | + this.field('tags', { boost: 15 }) |
| 39 | + this.field('minorTitles', { boost: 5 }) |
| 40 | + this.field('body') |
| 41 | + }), |
| 42 | + docId: 0, |
| 43 | + slugs: [], |
| 44 | + toJSON: function() { |
| 45 | + truncateNumbers(searchIndex.idx.tokenStore.tokens) |
| 46 | + searchIndex.idx.tokenStore.compress() |
| 47 | + const json = searchIndex.idx.toJSON() |
| 48 | + json.slugs = searchIndex.slugs |
| 49 | + return json |
| 50 | + } |
| 51 | +} |
| 52 | + |
| 53 | +function* walk(root) { |
| 54 | + const openList = [root] |
| 55 | + const closedList = new Set() |
| 56 | + while (openList.length) { |
| 57 | + const currentRoot = openList.pop() |
| 58 | + closedList.add(currentRoot) |
| 59 | + |
| 60 | + for (const filename of fs.readdirSync(currentRoot)) { |
| 61 | + const path = currentRoot + '/' + filename |
| 62 | + const stat = fs.statSync(path) |
| 63 | + if (stat.isFile()) { |
| 64 | + yield path |
| 65 | + } else if (stat.isDirectory() && !closedList.has(path)) { |
| 66 | + openList.push(path) |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +function parseXML(path, headmatter, xml) { |
| 73 | + const spawnOutput = child_process.spawnSync('mmark', ['-xml'], {encoding: 'utf-8', input: xml}) |
| 74 | + if (spawnOutput.status != 0) { |
| 75 | + throw new Error('Command "mmark" failed') |
| 76 | + } |
| 77 | + |
| 78 | + const text = `<root>${spawnOutput.output[1]}</root>` |
| 79 | + const parser = sax.parser(true, { |
| 80 | + trim: true, |
| 81 | + normalize: true |
| 82 | + }) |
| 83 | + |
| 84 | + const doc = { |
| 85 | + id: searchIndex.docId, |
| 86 | + title: headmatter.title, |
| 87 | + tags: headmatter.tags, |
| 88 | + minorTitles: [], |
| 89 | + body: [] |
| 90 | + } |
| 91 | + searchIndex.docId += 1 |
| 92 | + searchIndex.slugs.push(headmatter.slug) |
| 93 | + |
| 94 | + let sectionDepth = 0 |
| 95 | + let inName = false |
| 96 | + let error = false |
| 97 | + |
| 98 | + parser.onerror = function (error) { |
| 99 | + console.error('Error parsing ' + path) |
| 100 | + console.error(error) |
| 101 | + error = true |
| 102 | + } |
| 103 | + |
| 104 | + parser.ontext = function (text) { |
| 105 | + if (inName) { |
| 106 | + assert.ok(sectionDepth >= 1) |
| 107 | + if (sectionDepth === 1) { |
| 108 | + doc.title += ' ' + text |
| 109 | + } else { |
| 110 | + doc.minorTitles.push(text) |
| 111 | + } |
| 112 | + |
| 113 | + return |
| 114 | + } |
| 115 | + |
| 116 | + doc.body.push(text) |
| 117 | + } |
| 118 | + |
| 119 | + parser.onopentag = function (node) { |
| 120 | + if (node.name === 'section') { |
| 121 | + sectionDepth += 1 |
| 122 | + } else if (node.name === 'name') { |
| 123 | + inName = true |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + parser.onclosetag = function (name) { |
| 128 | + if (name === 'section') { |
| 129 | + sectionDepth -= 1 |
| 130 | + } else if (name === 'name') { |
| 131 | + assert.equal(inName, true) |
| 132 | + inName = false |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + parser.write(text).close() |
| 137 | + if (error) { throw new Error('Parse error') } |
| 138 | + |
| 139 | + doc.title = doc.title.trim() |
| 140 | + doc.body = doc.body.join(' ').trim() |
| 141 | + |
| 142 | + return doc |
| 143 | +} |
| 144 | + |
| 145 | +function processFile(path) { |
| 146 | + const rawdata = fs.readFileSync(path, { encoding: 'utf-8' }) |
| 147 | + const match = rawdata.match(PAT_HEADMATTER) |
| 148 | + if (!match) { |
| 149 | + throw new Error('Couldn\'t find headmatter') |
| 150 | + } |
| 151 | + |
| 152 | + const rawHeadmatter = match[1] |
| 153 | + const headmatter = toml.parse(rawHeadmatter) |
| 154 | + if (!headmatter.slug) { |
| 155 | + headmatter.slug = pathModule.parse(path).base |
| 156 | + } |
| 157 | + |
| 158 | + const searchDoc = parseXML(path, headmatter, rawdata.slice(match[0].length)) |
| 159 | + searchIndex.idx.add(searchDoc) |
| 160 | + |
| 161 | + return [headmatter.slug, headmatter.title, headmatter.tags] |
| 162 | +} |
| 163 | + |
| 164 | +function main() { |
| 165 | + const args = docopt.docopt(__doc__) |
| 166 | + const data = [] |
| 167 | + const tagManifest = toml.parse(fs.readFileSync(args['--config'])).tags || {} |
| 168 | + let error = false |
| 169 | + |
| 170 | + for (const path of walk(args['<source>'])) { |
| 171 | + let headmatter |
| 172 | + try { |
| 173 | + headmatter = processFile(path) |
| 174 | + } catch(err) { |
| 175 | + console.error(`Error processing ${path}: ${err}`) |
| 176 | + error = true |
| 177 | + continue |
| 178 | + } |
| 179 | + |
| 180 | + for (const tag of headmatter[2]) { |
| 181 | + if (tagManifest[tag] === undefined) { |
| 182 | + console.error(`Unknown tag "${tag}" in ${path}`) |
| 183 | + error = true |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + data.push(headmatter) |
| 188 | + } |
| 189 | + |
| 190 | + if (error) { |
| 191 | + process.exit(1) |
| 192 | + } |
| 193 | + |
| 194 | + fs.writeFileSync(args['<outputPrefix>'] + 'tags.json', JSON.stringify({ |
| 195 | + tags: tagManifest, |
| 196 | + pages: data |
| 197 | + })) |
| 198 | + |
| 199 | + const searchIndexJSON = searchIndex.toJSON() |
| 200 | + fs.writeFileSync(args['<outputPrefix>'] + 'search.json', JSON.stringify(searchIndexJSON)) |
| 201 | +} |
| 202 | + |
| 203 | +main() |
0 commit comments