Skip to content

Commit 6d2b1a6

Browse files
committed
🤔 Rethink HTML processing
- switches to a different mini-parser and executes different actions based on different tag fragments
1 parent cea64bc commit 6d2b1a6

File tree

5 files changed

+120
-44
lines changed

5 files changed

+120
-44
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"doc-chomp": "1.1.0",
3333
"front-matter": "^2.1.0",
3434
"highlight.js": "^9.6.0",
35+
"html-void-elements": "^1.0.1",
3536
"htmltojsx": "https://github.com/reactjs/react-magic#e63dabeefb87d4ce1e74ac2ddb73b0164c344219",
3637
"jsesc": "^2.2.0",
3738
"loader-utils": "^0.2.16",

src/html-to-jsx.js

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,6 @@
11
import HTMLtoJSX from 'htmltojsx';
22

3-
const prepareHtml = (html) => {
4-
const tree = [];
5-
6-
return html.replace(
7-
/(<([a-z\.]+)|\/>|<\/|>)/gi,
8-
(match, tagFragment, tagName) => {
9-
// If we have a tag name, this is an opening tag
10-
if (tagName) {
11-
tagFragment = tagFragment[0];
12-
}
13-
14-
switch (tagFragment) {
15-
case "<":
16-
tree.push(tagName);
17-
break;
18-
case "/>":
19-
match = `></${tree.pop()}>`;
20-
break;
21-
case ">":
22-
break;
23-
case "</":
24-
tree.pop();
25-
break;
26-
}
27-
28-
return match;
29-
}
30-
);
31-
};
32-
333
export default (html, indent) => {
34-
html = prepareHtml(html);
35-
364
const jsxConverter = new HTMLtoJSX({ createClass: false });
375

386
let jsx = jsxConverter.convert(html);

src/index.js

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1+
import anyBase from 'any-base';
12
import frontMatter from 'front-matter';
23
import { getLoaderConfig } from 'loader-utils';
4+
import hash from 'sha.js';
35
import HighlightJS from 'highlight.js';
46
import MarkdownIt from 'markdown-it';
57
import { DOM as ReactDOM } from 'react';
68

9+
import VOID_ELEMENTS from 'html-void-elements';
10+
711
import formatImport from './formatters/import';
812
import formatModule from './formatters/module';
913
import formatStatic from './formatters/static';
1014
import htmlToJsx from './html-to-jsx';
15+
import processHtml from './process-html';
1116
import StringReplacementCache from './string-replacement-cache';
1217

1318
const DEFAULT_CONFIGURATION = {
@@ -16,6 +21,14 @@ const DEFAULT_CONFIGURATION = {
1621
markdownItPlugins: []
1722
};
1823

24+
const lowercaseHash = (content) => (
25+
anyBase(anyBase.HEX, 'abcdefghijklmnopqrstuvwxyz')(
26+
hash('sha256')
27+
.update(content, 'utf-8')
28+
.digest('hex')
29+
)
30+
);
31+
1932
module.exports = function(source) {
2033
// This loader is deterministic and will return the same thing for the same inputs!
2134
this.cacheable && this.cacheable();
@@ -113,7 +126,34 @@ module.exports = function(source) {
113126
);
114127
}
115128

116-
const html = renderer.render(markdownSansAssignments);
129+
const tagCache = {};
130+
131+
const html = processHtml(
132+
renderer.render(markdownSansAssignments),
133+
(match, tagFragment, tag) => {
134+
switch (tagFragment) {
135+
case '<':
136+
// tag names which won't survive browser serialization, or those with
137+
// special characters, need to be cached
138+
if (tag.tagName.indexOf('.') !== -1 || tag.tagName.toLowerCase() !== tag.tagName) {
139+
const nameHash = lowercaseHash(tag.tagName);
140+
tagCache[nameHash] = tag.tagName;
141+
tag.tagName = nameHash;
142+
}
143+
return `<${tag.tagName}`;
144+
case '/>':
145+
return (
146+
VOID_ELEMENTS.indexOf(tagCache[tag.tagName] || tag.tagName) === -1
147+
? `></${tag.tagName}>`
148+
: match
149+
);
150+
case '</':
151+
return `</${tag.tagName}`;
152+
}
153+
154+
return match;
155+
}
156+
);
117157

118158
let jsx = htmlToJsx(
119159
html || '<!-- no input given -->',
@@ -123,17 +163,28 @@ module.exports = function(source) {
123163
// Unload caches so we've got our values back!
124164
jsx = stylePropertyCache.unload(assignmentExpressionCache.unload(jsx));
125165

126-
// Pass through `elementProps` to tags React knows about (the others are already under our control)
127-
if (config.passElementProps) {
128-
jsx = jsx.replace(
129-
/<([^\/][^\s>]*)([^/>\s]*)/g,
130-
(match, tagName) => {
131-
return (tagName in ReactDOM)
132-
? `${match} {...elementProps['${tagName}']}`
133-
: match;
166+
jsx = processHtml(
167+
jsx,
168+
(match, tagFragment, { tagName, state }) => {
169+
const correctedTagName = tagCache[tagName] || tagName;
170+
171+
switch (tagFragment) {
172+
case '<':
173+
// Pass through `elementProps` to tags React knows about (the others are already under our control)
174+
if (config.passElementProps && correctedTagName in ReactDOM) {
175+
return `<${correctedTagName} {...elementProps['${correctedTagName}']}`;
176+
}
177+
return `<${correctedTagName}`;
178+
case '/>':
179+
case '>':
180+
return match;
181+
case '</':
182+
return `</${correctedTagName}`;
134183
}
135-
);
136-
}
184+
185+
return match;
186+
}
187+
).replace(/\s\s\{/, ' {'); // Remove double spaces before spread statements;
137188

138189
return formatModule(
139190
config,

src/process-html.js

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
export default (html, callback) => {
2+
if (!(callback instanceof Function)) {
3+
return html;
4+
}
5+
6+
const tree = [];
7+
8+
return html.replace(
9+
/(<\/?([a-z][a-z0-9\.\-]*)|\/>|>)/gi,
10+
(match, tagFragment, tagName) => {
11+
// If we have a tag name, this is an opening tag
12+
if (tagName) {
13+
tagFragment = tagFragment.replace(tagName, '');
14+
}
15+
16+
let thisTag;
17+
const lastTag = tree[tree.length - 1] || {};
18+
let shouldPopTree = false;
19+
20+
switch (tagFragment) {
21+
case '<':
22+
thisTag = { tagName, state: 'open' };
23+
tree.push(thisTag);
24+
break;
25+
26+
case '/>':
27+
shouldPopTree = true;
28+
break;
29+
30+
case '>':
31+
if (lastTag.state === 'open') {
32+
lastTag.state = 'content';
33+
} else if (lastTag.state === 'closing') {
34+
shouldPopTree = true;
35+
}
36+
break;
37+
38+
case '</':
39+
lastTag.state = 'closing';
40+
break;
41+
}
42+
43+
const returnValue = callback(match, tagFragment, thisTag || lastTag, tree);
44+
45+
if (shouldPopTree) {
46+
tree.pop();
47+
}
48+
49+
return returnValue;
50+
}
51+
);
52+
};

yarn.lock

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1830,7 +1830,7 @@ faye-websocket@~0.11.0:
18301830

18311831
fb-watchman@^1.8.0, fb-watchman@^1.9.0:
18321832
version "1.9.0"
1833-
resolved "https://registry.yarnpkg.com/fb-watchman/-/fb-watchman-1.9.0.tgz#6f268f1f347a6b3c875d1e89da7e1ed79adfc0ec"
1833+
resolved "http://registry.npmjs.org/fb-watchman/-/fb-watchman-1.9.0.tgz#6f268f1f347a6b3c875d1e89da7e1ed79adfc0ec"
18341834
dependencies:
18351835
bser "^1.0.2"
18361836

@@ -2177,6 +2177,10 @@ html-encoding-sniffer@^1.0.1:
21772177
dependencies:
21782178
whatwg-encoding "^1.0.1"
21792179

2180+
html-void-elements@^1.0.1:
2181+
version "1.0.1"
2182+
resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-1.0.1.tgz#f929bea267a19e3535950502ca12c159f1b559af"
2183+
21802184
"htmlparser2@>= 3.7.3 < 4.0.0":
21812185
version "3.9.2"
21822186
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.9.2.tgz#1bdf87acca0f3f9e53fa4fcceb0f4b4cbb00b338"

0 commit comments

Comments
 (0)