Skip to content

Commit b05d838

Browse files
committed
feat(engine-js): improve js engine handling for markdown
1 parent b3d493b commit b05d838

File tree

7 files changed

+49
-28
lines changed

7 files changed

+49
-28
lines changed

docs/references/engine-js-compat.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
| | Count |
1212
| :-------------- | --------------------------------: |
1313
| Total Languages | 213 |
14-
| Fully Supported | [171](#fully-supported-languages) |
15-
| Mismatched | [24](#mismatched-languages) |
14+
| Fully Supported | [172](#fully-supported-languages) |
15+
| Mismatched | [23](#mismatched-languages) |
1616
| Unsupported | [18](#unsupported-languages) |
1717

1818
## Fully Supported Languages
@@ -115,6 +115,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam
115115
| make | ✅ OK | 51 | - | |
116116
| marko | ✅ OK | 926 | - | |
117117
| matlab | ✅ OK | 88 | - | |
118+
| mdc | ✅ OK | 784 | - | |
118119
| mojo | ✅ OK | 213 | - | |
119120
| move | ✅ OK | 120 | - | |
120121
| narrat | ✅ OK | 34 | - | |
@@ -209,8 +210,7 @@ Languages that does not throw with the JavaScript RegExp engine, but will produc
209210
| glsl | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=glsl) | 186 | - | 306 |
210211
| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 1612 | - | 48 |
211212
| kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | 40 |
212-
| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 648 |
213-
| mdc | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mdc) | 784 | - | 407 |
213+
| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 78 |
214214
| mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | 38 |
215215
| nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 378 | - | 4 |
216216
| objective-cpp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=objective-cpp) | 309 | - | 172 |

packages/engine-javascript/scripts/generate.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ type Replacement = ReplacementRecursiveBackReference | ReplacementStatic
2020
const replacements: Replacement[] = [
2121
{
2222
// Subroutine recursive reference are not supported in JavaScript regex engine.
23-
// We expand a few levels of recursion to literals to simulate the behavior (incomplete)
23+
// We expand a few levels of recursion to literals to simulate the behavior (it's incomplete tho)
2424
type: 'recursive-back-reference',
2525
regex: '(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*+\\])',
2626
groupName: 'square',
@@ -30,7 +30,7 @@ const replacements: Replacement[] = [
3030
type: 'recursive-back-reference',
3131
regex: '(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))',
3232
groupName: 'url',
33-
fallback: '[^\\s\\(\\)]',
33+
fallback: '(?>[^\\s()]+)',
3434
},
3535
]
3636

packages/engine-javascript/scripts/utils.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ export function expandRecursiveBackReference(
1515

1616
out = out
1717
.replace(refMarker, fallback)
18-
.replace(groupMaker, '(?:')
1918

2019
return out
2120
}

packages/engine-javascript/src/index.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,21 @@ export class JavaScriptScanner implements PatternScanner {
6666

6767
this.contiguousAnchorSimulation = Array.from({ length: patterns.length }, () => false)
6868
this.regexps = patterns.map((p, idx) => {
69+
/**
70+
* vscode-textmate replace anchors to \uFFFF, where we still not sure how to handle it correctly
71+
*
72+
* @see https://github.com/shikijs/vscode-textmate/blob/8d2e84a3aad21afd6b08fd53c7acd421c7f5aa44/src/rule.ts#L687-L702
73+
*
74+
* This is a temporary workaround for markdown grammar
75+
*/
76+
if (simulation)
77+
p = p.replaceAll('(^|\\\uFFFF)', '(^|\\G)')
78+
79+
// Detect contiguous anchors for simulation
6980
if (simulation && (p.startsWith('(^|\\G)') || p.startsWith('(\\G|^)')))
7081
this.contiguousAnchorSimulation[idx] = true
82+
83+
// Cache
7184
const cached = cache?.get(p)
7285
if (cached) {
7386
if (cached instanceof RegExp) {

packages/engine-javascript/src/replacements.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
export const replacements = [
44
[
55
'(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*+\\])',
6-
'(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])',
6+
'(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])',
77
],
88
[
99
'(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))',
10-
'(?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\([^\\s\\(\\)]*\\))*\\))*\\))',
10+
'(?<url>(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))*\\))*\\))',
1111
],
1212
] as [string, string][]

packages/engine-javascript/test/scripts.test.ts

Lines changed: 0 additions & 19 deletions
This file was deleted.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { expandRecursiveBackReference } from '../scripts/utils'
3+
4+
describe('expandRecursiveBackReference', () => {
5+
it('case 1', () => {
6+
const name = 'square'
7+
const regex = '(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*\\])'
8+
const fallback = '(?:[^\\[\\]\\\\])'
9+
10+
expect(expandRecursiveBackReference(regex, name, fallback, 0))
11+
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])"`)
12+
13+
expect(expandRecursiveBackReference(regex, name, fallback, 1))
14+
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])"`)
15+
16+
expect(expandRecursiveBackReference(regex, name, fallback, 2))
17+
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])*\\])"`)
18+
})
19+
20+
it('case 2', () => {
21+
const name = 'url'
22+
const regex = '(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))'
23+
const fallback = '(?>[^\\s()]+)'
24+
25+
expect(expandRecursiveBackReference(regex, name, fallback, 0))
26+
.toMatchInlineSnapshot(`"(?<url>(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))"`)
27+
})
28+
})

0 commit comments

Comments
 (0)