diff --git a/.editorconfig b/.editorconfig index 65365be..bd89331 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,6 +9,8 @@ root = true indent_style = space indent_size = 2 +max_line_length = 100 + end_of_line = lf charset = utf-8 trim_trailing_whitespace = true diff --git a/README.md b/README.md index 6934c41..6b21fcd 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,15 @@ const hexDigit = characterClass( characterRange('0', '9') ); +// prettier-ignore const hexColor = buildRegex( startOfString, optionally('#'), capture( - choiceOf(repeat({ count: 6 }, hexDigit), repeat({ count: 3 }, hexDigit)) + choiceOf( + repeat({ count: 6 }, hexDigit), + repeat({ count: 3 }, hexDigit) + ) ), endOfString ); @@ -44,6 +48,10 @@ import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); ``` +## Examples + +See [Examples document](./docs/Examples.md). + ## Contributing See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow. diff --git a/docs/Examples.md b/docs/Examples.md new file mode 100644 index 0000000..32f1c32 --- /dev/null +++ b/docs/Examples.md @@ -0,0 +1,41 @@ +# Regex Examples + +## IPv4 address validation + +```ts +// Match integers from 0-255 +const octet = choiceOf( + [digit], + [characterRange('1', '9'), digit], + ['1', repeat({ count: 2 }, digit)], + ['2', characterRange('0', '4'), digit], + ['25', characterRange('0', '5')] +); + +// Match +const regex = buildRegex([ + startOfString, + capture(octet), + '.', + capture(octet), + '.', + capture(octet), + '.', + capture(octet), + endOfString, +]); +``` + +This code generates the following regex pattern: + +```ts +const regex = + /^(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/; +``` + +This pattern uses repetition of the `capture(octet)` elements to generate capture groups for each of the IPv4 octets: + +```ts +// Matched groups ['192.168.0.1', '192', '168', '0', '1',] +const match = regex.exec('192.168.0.1'); +``` diff --git a/src/__tests__/builder.test.ts b/src/__tests__/builder.test.ts index 6aa5f59..2488a42 100644 --- a/src/__tests__/builder.test.ts +++ b/src/__tests__/builder.test.ts @@ -19,7 +19,5 @@ test('`regexBuilder` flags', () => { expect(buildRegex({ sticky: true }, 'a').flags).toBe('y'); expect(buildRegex({ sticky: false }, 'a').flags).toBe(''); - expect( - buildRegex({ global: true, ignoreCase: true, multiline: false }, 'a').flags - ).toBe('gi'); + expect(buildRegex({ global: true, ignoreCase: true, multiline: false }, 'a').flags).toBe('gi'); }); diff --git a/src/__tests__/examples.ts b/src/__tests__/examples.ts new file mode 100644 index 0000000..02df4c0 --- /dev/null +++ b/src/__tests__/examples.ts @@ -0,0 +1,49 @@ +import { + buildRegex, + capture, + characterRange, + choiceOf, + digit, + endOfString, + repeat, + startOfString, +} from '../index'; + +test('example: IPv4 address validator', () => { + const octet = choiceOf( + [digit], + [characterRange('1', '9'), digit], + ['1', repeat({ count: 2 }, digit)], + ['2', characterRange('0', '4'), digit], + ['25', characterRange('0', '5')] + ); + + const regex = buildRegex([ + startOfString, + capture(octet), + '.', + capture(octet), + '.', + capture(octet), + '.', + capture(octet), + endOfString, + ]); + + expect(regex).toMatchGroups('0.0.0.0', ['0.0.0.0', '0', '0', '0', '0']); + expect(regex).toMatchGroups('192.168.0.1', ['192.168.0.1', '192', '168', '0', '1']); + expect(regex).toMatchGroups('1.99.100.249', ['1.99.100.249', '1', '99', '100', '249']); + expect(regex).toMatchGroups('255.255.255.255', ['255.255.255.255', '255', '255', '255', '255']); + expect(regex).toMatchGroups('123.45.67.89', ['123.45.67.89', '123', '45', '67', '89']); + + expect(regex.test('0.0.0.')).toBe(false); + expect(regex.test('0.0.0.0.')).toBe(false); + expect(regex.test('0.-1.0.0')).toBe(false); + expect(regex.test('0.1000.0.0')).toBe(false); + expect(regex.test('0.0.300.0')).toBe(false); + expect(regex.test('255.255.255.256')).toBe(false); + + expect(regex.source).toEqual( + '^(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])$' + ); +}); diff --git a/src/builders.ts b/src/builders.ts index 6446091..3a811ef 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -35,19 +35,13 @@ export function buildRegex(elements: RegexNode | RegexNode[]): RegExp; * @param flags RegExp flags object * @returns RegExp object */ -export function buildRegex( - flags: RegexFlags, - elements: RegexNode | RegexNode[] -): RegExp; +export function buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp; export function buildRegex(first: any, second?: any): RegExp { return _buildRegex(...optionalFirstArg(first, second)); } -export function _buildRegex( - flags: RegexFlags, - elements: RegexNode | RegexNode[] -): RegExp { +export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp { const pattern = encodeSequence(asNodeArray(elements)).pattern; const flagsString = encodeFlags(flags ?? {}); return new RegExp(pattern, flagsString); diff --git a/src/components/__tests__/capture.test.tsx b/src/components/__tests__/capture.test.tsx index a969eee..2777819 100644 --- a/src/components/__tests__/capture.test.tsx +++ b/src/components/__tests__/capture.test.tsx @@ -11,9 +11,5 @@ test('`capture` base cases', () => { test('`capture` captures group', () => { expect(capture('b')).toMatchGroups('ab', ['b', 'b']); expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']); - expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', [ - 'abc', - 'b', - 'c', - ]); + expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']); }); diff --git a/src/components/__tests__/character-class.test.ts b/src/components/__tests__/character-class.test.ts index ff14592..c35f360 100644 --- a/src/components/__tests__/character-class.test.ts +++ b/src/components/__tests__/character-class.test.ts @@ -37,21 +37,17 @@ test('`whitespace` character class', () => { test('`characterClass` base cases', () => { expect(characterClass(characterRange('a', 'z'))).toHavePattern('[a-z]'); - expect( - characterClass(characterRange('a', 'z'), characterRange('A', 'Z')) - ).toHavePattern('[a-zA-Z]'); - expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern( - '[a-z05]' + expect(characterClass(characterRange('a', 'z'), characterRange('A', 'Z'))).toHavePattern( + '[a-zA-Z]' + ); + expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern('[a-z05]'); + expect(characterClass(characterRange('a', 'z'), whitespace, anyOf('05'))).toHavePattern( + '[a-z\\s05]' ); - expect( - characterClass(characterRange('a', 'z'), whitespace, anyOf('05')) - ).toHavePattern('[a-z\\s05]'); }); test('`characterClass` throws on inverted arguments', () => { - expect(() => - characterClass(inverted(whitespace)) - ).toThrowErrorMatchingInlineSnapshot( + expect(() => characterClass(inverted(whitespace))).toThrowErrorMatchingInlineSnapshot( `"\`characterClass\` should receive only non-inverted character classes"` ); }); @@ -89,11 +85,11 @@ test('`anyOf` with quantifiers', () => { }); test('`anyOf` escapes special characters', () => { - expect(anyOf('abc-+.')).toHavePattern('[-abc\\+\\.]'); + expect(anyOf('abc-+.]\\')).toHavePattern('[abc+.\\]\\\\-]'); }); -test('`anyOf` moves hyphen to the first position', () => { - expect(anyOf('a-bc')).toHavePattern('[-abc]'); +test('`anyOf` moves hyphen to the last position', () => { + expect(anyOf('a-bc')).toHavePattern('[abc-]'); }); test('`anyOf` throws on empty text', () => { diff --git a/src/components/__tests__/choice-of.test.ts b/src/components/__tests__/choice-of.test.ts index 852c528..2847141 100644 --- a/src/components/__tests__/choice-of.test.ts +++ b/src/components/__tests__/choice-of.test.ts @@ -24,16 +24,14 @@ test('`choiceOf` used in sequence', () => { test('`choiceOf` with sequence options', () => { expect([choiceOf(['a', 'b'])]).toHavePattern('ab'); expect([choiceOf(['a', 'b'], ['c', 'd'])]).toHavePattern('ab|cd'); - expect([ - choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd']), - ]).toHavePattern('ab*|c+d'); + expect([choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd'])]).toHavePattern('ab*|c+d'); }); test('`choiceOf` using nested regex', () => { expect(choiceOf(oneOrMore('a'), zeroOrMore('b'))).toHavePattern('a+|b*'); - expect( - choiceOf(repeat({ min: 1, max: 3 }, 'a'), repeat({ count: 5 }, 'bx')) - ).toHavePattern('a{1,3}|(?:bx){5}'); + expect(choiceOf(repeat({ min: 1, max: 3 }, 'a'), repeat({ count: 5 }, 'bx'))).toHavePattern( + 'a{1,3}|(?:bx){5}' + ); }); test('`choiceOf` throws on empty options', () => { diff --git a/src/components/__tests__/repeat.test.tsx b/src/components/__tests__/repeat.test.tsx index 94a2267..e524b28 100644 --- a/src/components/__tests__/repeat.test.tsx +++ b/src/components/__tests__/repeat.test.tsx @@ -7,12 +7,8 @@ test('`repeat` quantifier', () => { expect(['a', repeat({ min: 1 }, 'b')]).toHavePattern('ab{1,}'); expect(['a', repeat({ count: 1 }, 'b')]).toHavePattern('ab{1}'); - expect(['a', repeat({ count: 1 }, ['a', zeroOrMore('b')])]).toHavePattern( - 'a(?:ab*){1}' - ); - expect(repeat({ count: 5 }, ['text', ' ', oneOrMore('d')])).toHavePattern( - '(?:text d+){5}' - ); + expect(['a', repeat({ count: 1 }, ['a', zeroOrMore('b')])]).toHavePattern('a(?:ab*){1}'); + expect(repeat({ count: 5 }, ['text', ' ', oneOrMore('d')])).toHavePattern('(?:text d+){5}'); }); test('`repeat` optimizes grouping for atoms', () => { diff --git a/src/components/character-class.ts b/src/components/character-class.ts index 54525d6..270c680 100644 --- a/src/components/character-class.ts +++ b/src/components/character-class.ts @@ -1,5 +1,4 @@ import type { EncodeOutput } from '../encoder/types'; -import { escapeText } from '../utils/text'; export interface CharacterClass { type: 'characterClass'; @@ -52,9 +51,7 @@ export const whitespace: CharacterClass = { export function characterClass(...elements: CharacterClass[]): CharacterClass { elements.forEach((element) => { if (element.isInverted) { - throw new Error( - '`characterClass` should receive only non-inverted character classes' - ); + throw new Error('`characterClass` should receive only non-inverted character classes'); } }); @@ -69,37 +66,29 @@ export function characterClass(...elements: CharacterClass[]): CharacterClass { export function characterRange(start: string, end: string): CharacterClass { if (start.length !== 1) { - throw new Error( - '`characterRange` should receive only single character `start` string' - ); + throw new Error('`characterRange` should receive only single character `start` string'); } if (end.length !== 1) { - throw new Error( - '`characterRange` should receive only single character `end` string' - ); + throw new Error('`characterRange` should receive only single character `end` string'); } if (start > end) { throw new Error('`start` should be before or equal to `end`'); } - const range = { - start: escapeText(start), - end: escapeText(end), - }; - return { type: 'characterClass', characters: [], - ranges: [range], + ranges: [{ start, end }], isInverted: false, encode: encodeCharacterClass, }; } export function anyOf(characters: string): CharacterClass { - const charactersArray = characters.split('').map(escapeText); + const charactersArray = characters.split('').map((c) => escapeForCharacterClass(c)); + if (charactersArray.length === 0) { throw new Error('`anyOf` should received at least one character'); } @@ -125,17 +114,11 @@ export function inverted(element: CharacterClass): CharacterClass { function encodeCharacterClass(this: CharacterClass): EncodeOutput { if (this.characters.length === 0 && this.ranges.length === 0) { - throw new Error( - 'Character class should contain at least one character or character range' - ); + throw new Error('Character class should contain at least one character or character range'); } // Direct rendering for single-character class - if ( - this.characters.length === 1 && - this.ranges?.length === 0 && - !this.isInverted - ) { + if (this.characters.length === 1 && this.ranges?.length === 0 && !this.isInverted) { return { precedence: 'atom', pattern: this.characters[0]!, @@ -147,13 +130,15 @@ function encodeCharacterClass(this: CharacterClass): EncodeOutput { // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types const hyphen = this.characters.includes('-') ? '-' : ''; const otherCharacters = this.characters.filter((c) => c !== '-').join(''); - const ranges = this.ranges - .map(({ start, end }) => `${start}-${end}`) - .join(''); + const ranges = this.ranges.map(({ start, end }) => `${start}-${end}`).join(''); const isInverted = this.isInverted ? '^' : ''; return { precedence: 'atom', - pattern: `[${isInverted}${hyphen}${ranges}${otherCharacters}]`, + pattern: `[${isInverted}${ranges}${otherCharacters}${hyphen}]`, }; } + +function escapeForCharacterClass(text: string): string { + return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string +} diff --git a/src/components/choice-of.ts b/src/components/choice-of.ts index 47c7121..1f9ecf2 100644 --- a/src/components/choice-of.ts +++ b/src/components/choice-of.ts @@ -8,9 +8,7 @@ export interface ChoiceOf extends RegexElement { alternatives: RegexNode[][]; } -export function choiceOf( - ...alternatives: Array -): ChoiceOf { +export function choiceOf(...alternatives: Array): ChoiceOf { if (alternatives.length === 0) { throw new Error('`choiceOf` should receive at least one alternative'); } diff --git a/src/components/repeat.ts b/src/components/repeat.ts index 76f2368..fd24544 100644 --- a/src/components/repeat.ts +++ b/src/components/repeat.ts @@ -11,10 +11,7 @@ export interface Repeat extends RegexElement { export type RepeatOptions = { count: number } | { min: number; max?: number }; -export function repeat( - options: RepeatOptions, - nodes: RegexNode | RegexNode[] -): Repeat { +export function repeat(options: RepeatOptions, nodes: RegexNode | RegexNode[]): Repeat { const children = asNodeArray(nodes); if (children.length === 0) { @@ -41,8 +38,6 @@ function encodeRepeat(this: Repeat): EncodeOutput { return { precedence: 'sequence', - pattern: `${atomicNodes.pattern}{${this.options.min},${ - this.options?.max ?? '' - }}`, + pattern: `${atomicNodes.pattern}{${this.options.min},${this.options?.max ?? ''}}`, }; } diff --git a/src/encoder/__tests__/encoder.test.tsx b/src/encoder/__tests__/encoder.test.tsx index dbb103e..a71a28c 100644 --- a/src/encoder/__tests__/encoder.test.tsx +++ b/src/encoder/__tests__/encoder.test.tsx @@ -1,9 +1,5 @@ import { buildPattern, buildRegex } from '../../builders'; -import { - oneOrMore, - optionally, - zeroOrMore, -} from '../../components/quantifiers'; +import { oneOrMore, optionally, zeroOrMore } from '../../components/quantifiers'; import { repeat } from '../../components/repeat'; test('basic quantifies', () => { @@ -49,18 +45,14 @@ test('`buildPattern` escapes special characters', () => { expect('*.*').toHavePattern('\\*\\.\\*'); - expect([oneOrMore('.*'), zeroOrMore('[]{}')]).toHavePattern( - '(?:\\.\\*)+(?:\\[\\]\\{\\})*' - ); + expect([oneOrMore('.*'), zeroOrMore('[]{}')]).toHavePattern('(?:\\.\\*)+(?:\\[\\]\\{\\})*'); }); test('`buildRegex` throws error on unknown element', () => { expect(() => // @ts-expect-error intentionally passing incorrect object buildRegex({ type: 'unknown' }) - ).toThrowErrorMatchingInlineSnapshot( - `"\`encodeNode\`: unknown element type unknown"` - ); + ).toThrowErrorMatchingInlineSnapshot(`"\`encodeNode\`: unknown element type unknown"`); }); test('`buildPattern` throws on empty text', () => { diff --git a/src/encoder/encoder.ts b/src/encoder/encoder.ts index 2d2614d..eeb51e7 100644 --- a/src/encoder/encoder.ts +++ b/src/encoder/encoder.ts @@ -49,9 +49,7 @@ function concatSequence(encoded: EncodeOutput[]): EncodeOutput { return { precedence: 'sequence', - pattern: encoded - .map((n) => (n.precedence === 'alternation' ? asAtom(n) : n).pattern) - .join(''), + pattern: encoded.map((n) => (n.precedence === 'alternation' ? asAtom(n) : n).pattern).join(''), }; } diff --git a/test-utils/to-have-pattern.ts b/test-utils/to-have-pattern.ts index 1867b0d..b3b4652 100644 --- a/test-utils/to-have-pattern.ts +++ b/test-utils/to-have-pattern.ts @@ -12,9 +12,7 @@ export function toHavePattern( nodes.forEach((e) => { if (!isRegexNode(e)) { - throw new Error( - `\`toHavePattern()\` received an array of RegexElements and strings.` - ); + throw new Error(`\`toHavePattern()\` received an array of RegexElements and strings.`); } }); @@ -28,9 +26,7 @@ export function toHavePattern( message: () => this.utils.matcherHint('toHavePattern', undefined, undefined, options) + '\n\n' + - `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected( - expected - )}\n` + + `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` + `Received: ${this.utils.printReceived(received)}`, }; } diff --git a/test-utils/to-match-groups.ts b/test-utils/to-match-groups.ts index 87eb86f..8d912d7 100644 --- a/test-utils/to-match-groups.ts +++ b/test-utils/to-match-groups.ts @@ -5,21 +5,24 @@ import { isRegexNode } from './utils'; export function toMatchGroups( this: jest.MatcherContext, - nodes: RegexNode | RegexNode[], + received: RegExp | RegexNode | RegexNode[], input: string, expected: string[] ) { - nodes = asNodeArray(nodes); + let regex; + if (received instanceof RegExp) { + regex = received; + } else { + const nodes = asNodeArray(received); + nodes.forEach((e) => { + if (!isRegexNode(e)) { + throw new Error(`\`toMatchGroups()\` received an array of RegexElements and strings.`); + } + }); - nodes.forEach((e) => { - if (!isRegexNode(e)) { - throw new Error( - `\`toMatchGroups()\` received an array of RegexElements and strings.` - ); - } - }); + regex = buildRegex(nodes); + } - const regex = buildRegex(nodes); const options = { isNot: this.isNot, }; @@ -32,9 +35,7 @@ export function toMatchGroups( message: () => this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + '\n\n' + - `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected( - expected - )}\n` + + `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` + `Received: ${this.utils.printReceived(actual)}`, }; } diff --git a/tsconfig.json b/tsconfig.json index 6266709..95bc470 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -2,7 +2,7 @@ "compilerOptions": { "rootDir": ".", "paths": { - "ts-regex": ["./src/index"] + "ts-regex-builder": ["./src/index"] }, "allowUnreachableCode": false, "allowUnusedLabels": false,