diff --git a/README.md b/README.md index 5fc466e..0d5f8ad 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,7 @@ This library allows users to create regular expressions in a structured way, mak const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/; // After -const hexDigit = characterClass( - characterRange('a', 'f'), - characterRange('A', 'F'), - characterRange('0', '9') -); +const hexDigit = charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9')); // prettier-ignore const hexColor = buildRegex( @@ -39,7 +35,7 @@ const hexColor = buildRegex( npm install ts-regex-builder ``` -or +or ```sh yarn add ts-regex-builder @@ -59,14 +55,16 @@ const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); TS Regex Builder allows you to build complex regular expressions using domain-specific language of regex components. Terminology: -* regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct -* regex element (`RegexElement`) - object returned by regex components -* regex sequence (`RegexSequence`) - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) + +- regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct +- regex element (`RegexElement`) - object returned by regex components +- regex sequence (`RegexSequence`) - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) Most of the regex components accept a regex sequence. Examples of sequences: -* single string: `'Hello World'` (note: all characters will be automatically escaped in the resulting regex) -* single element: `capture('abc')` -* array of elements and strings: `['$', oneOrMore(digit)]` + +- single string: `'Hello World'` (note: all characters will be automatically escaped in the resulting regex) +- single element: `capture('abc')` +- array of elements and strings: `['$', oneOrMore(digit)]` Regex components can be composed into a complex tree: @@ -75,16 +73,15 @@ const currencyAmount = buildRegex([ choiceOf( '$', '€', - repeat({ count: 3 }, characterRange('A', 'Z')), // ISO currency code + repeat({ count: 3 }, charRange('A', 'Z')) // ISO currency code ), capture( oneOrMore(digit), // Integer part - optionally(['.', repeat({ count: 2}, digit)]), // Fractional part + optionally(['.', repeat({ count: 2 }, digit)]) // Fractional part ), -]) +]); ``` - ### Regex Builders | Regex Component | Regex Pattern | Description | @@ -100,9 +97,9 @@ const currencyAmount = buildRegex([ | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | Notes: -* `capture` accepts a sequence of elements -* `choiceOf()` accepts a variable number of sequences +- `capture` accepts a sequence of elements +- `choiceOf()` accepts a variable number of sequences ### Quantifiers @@ -119,24 +116,24 @@ All quantifiers accept sequence of elements ### Character classes -| Regex Component | Regex Pattern | Description | -| -------------------------- | ------------- | ------------------------------------------- | -| `any` | `.` | Any character | -| `word` | `\w` | Word characters | -| `digit` | `\d` | Digit characters | -| `whitespace` | `\s` | Whitespace characters | -| `anyOf('abc')` | `[abc]` | Any of supplied characters | -| `characterRange('a', 'z')` | `[a-z]` | Range of characters | -| `characterClass(...)` | `[...]` | Concatenation of multiple character classes | -| `inverted(...)` | `[^...]` | Negation of a given character class | +| Regex Component | Regex Pattern | Description | +| --------------------- | ------------- | ------------------------------------------- | +| `any` | `.` | Any character | +| `word` | `\w` | Word characters | +| `digit` | `\d` | Digit characters | +| `whitespace` | `\s` | Whitespace characters | +| `anyOf('abc')` | `[abc]` | Any of supplied characters | +| `charRange('a', 'z')` | `[a-z]` | Range of characters | +| `charClass(...)` | `[...]` | Concatenation of multiple character classes | +| `inverted(...)` | `[^...]` | Negation of a given character class | Notes: -* `any`, `word`, `digit`, `whitespace` are objects, no need to call them -* `anyof` accepts a single string of characters to match -* `characterRange` accepts exactly **two single character** strings representing range start and end (inclusive) -* `characterClass` accepts a variable number of character classes to join into a single class -* `inverted` accepts a single character class to be inverted +- `any`, `word`, `digit`, `whitespace` are objects, no need to call them +- `anyof` accepts a single string of characters to match +- `charRange` accepts exactly **two single character** strings representing range start and end (inclusive) +- `charClass` accepts a variable number of character classes to join into a single class +- `inverted` accepts a single character class to be inverted ### Anchors @@ -146,7 +143,8 @@ Notes: | `endOfString` | `$` | Match end of the string (or end of a line in multiline mode) | Notes: -* `startOfString`, `endOfString` are objects, no need to call them. + +- `startOfString`, `endOfString` are objects, no need to call them. ## Examples diff --git a/docs/Examples.md b/docs/Examples.md index 32f1c32..32505d7 100644 --- a/docs/Examples.md +++ b/docs/Examples.md @@ -6,10 +6,10 @@ // Match integers from 0-255 const octet = choiceOf( [digit], - [characterRange('1', '9'), digit], + [charRange('1', '9'), digit], ['1', repeat({ count: 2 }, digit)], - ['2', characterRange('0', '4'), digit], - ['25', characterRange('0', '5')] + ['2', charRange('0', '4'), digit], + ['25', charRange('0', '5')] ); // Match diff --git a/src/__tests__/examples.ts b/src/__tests__/examples.ts index fb48e11..4848d9f 100644 --- a/src/__tests__/examples.ts +++ b/src/__tests__/examples.ts @@ -1,7 +1,7 @@ import { buildRegex, capture, - characterRange, + charRange, choiceOf, digit, endOfString, @@ -12,10 +12,10 @@ import { test('example: IPv4 address validator', () => { const octet = choiceOf( [digit], - [characterRange('1', '9'), digit], + [charRange('1', '9'), digit], ['1', repeat({ count: 2 }, digit)], - ['2', characterRange('0', '4'), digit], - ['25', characterRange('0', '5')] + ['2', charRange('0', '4'), digit], + ['25', charRange('0', '5')] ); const regex = buildRegex([ diff --git a/src/components/__tests__/character-class.test.ts b/src/components/__tests__/character-class.test.ts index 95a4f83..eaab83f 100644 --- a/src/components/__tests__/character-class.test.ts +++ b/src/components/__tests__/character-class.test.ts @@ -2,8 +2,8 @@ import { oneOrMore, optionally, zeroOrMore } from '../quantifiers'; import { any, anyOf, - characterClass, - characterRange, + charClass, + charRange, digit, inverted, whitespace, @@ -35,38 +35,34 @@ test('`whitespace` character class', () => { expect(['x', whitespace, 'x']).toHavePattern(/x\sx/); }); -test('`characterClass` base cases', () => { - expect(characterClass(characterRange('a', 'z'))).toHavePattern(/[a-z]/); - expect(characterClass(characterRange('a', 'z'), characterRange('A', 'Z'))).toHavePattern( - /[a-zA-Z]/ - ); - expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern(/[a-z05]/); - expect(characterClass(characterRange('a', 'z'), whitespace, anyOf('05'))).toHavePattern( - /[a-z\s05]/ - ); +test('`charClass` base cases', () => { + expect(charClass(charRange('a', 'z'))).toHavePattern(/[a-z]/); + expect(charClass(charRange('a', 'z'), charRange('A', 'Z'))).toHavePattern(/[a-zA-Z]/); + expect(charClass(charRange('a', 'z'), anyOf('05'))).toHavePattern(/[a-z05]/); + expect(charClass(charRange('a', 'z'), whitespace, anyOf('05'))).toHavePattern(/[a-z\s05]/); }); -test('`characterClass` throws on inverted arguments', () => { - expect(() => characterClass(inverted(whitespace))).toThrowErrorMatchingInlineSnapshot( - `"\`characterClass\` should receive only non-inverted character classes"` +test('`charClass` throws on inverted arguments', () => { + expect(() => charClass(inverted(whitespace))).toThrowErrorMatchingInlineSnapshot( + `"\`charClass\` should receive only non-inverted character classes"` ); }); -test('`characterRange` base cases', () => { - expect(characterRange('a', 'z')).toHavePattern(/[a-z]/); - expect(['x', characterRange('0', '9')]).toHavePattern(/x[0-9]/); - expect([characterRange('A', 'F'), 'x']).toHavePattern(/[A-F]x/); +test('`charRange` base cases', () => { + expect(charRange('a', 'z')).toHavePattern(/[a-z]/); + expect(['x', charRange('0', '9')]).toHavePattern(/x[0-9]/); + expect([charRange('A', 'F'), 'x']).toHavePattern(/[A-F]x/); }); -test('`characterRange` throws on incorrect arguments', () => { - expect(() => characterRange('z', 'a')).toThrowErrorMatchingInlineSnapshot( +test('`charRange` throws on incorrect arguments', () => { + expect(() => charRange('z', 'a')).toThrowErrorMatchingInlineSnapshot( `"\`start\` should be before or equal to \`end\`"` ); - expect(() => characterRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot( - `"\`characterRange\` should receive only single character \`start\` string"` + expect(() => charRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot( + `"\`charRange\` should receive only single character \`start\` string"` ); - expect(() => characterRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot( - `"\`characterRange\` should receive only single character \`end\` string"` + expect(() => charRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot( + `"\`charRange\` should receive only single character \`end\` string"` ); }); @@ -119,7 +115,7 @@ test('`encodeCharacterClass` throws on empty text', () => { // @ts-expect-error inverted({ type: 'characterClass', - characters: [], + chars: [], ranges: [], isInverted: false, }) diff --git a/src/components/character-class.ts b/src/components/character-class.ts index 270c680..4279779 100644 --- a/src/components/character-class.ts +++ b/src/components/character-class.ts @@ -2,7 +2,7 @@ import type { EncodeOutput } from '../encoder/types'; export interface CharacterClass { type: 'characterClass'; - characters: string[]; + chars: string[]; ranges: CharacterRange[]; isInverted: boolean; encode: () => EncodeOutput; @@ -18,7 +18,7 @@ export interface CharacterRange { export const any: CharacterClass = { type: 'characterClass', - characters: ['.'], + chars: ['.'], ranges: [], isInverted: false, encode: encodeCharacterClass, @@ -26,7 +26,7 @@ export const any: CharacterClass = { export const digit: CharacterClass = { type: 'characterClass', - characters: ['\\d'], + chars: ['\\d'], ranges: [], isInverted: false, encode: encodeCharacterClass, @@ -34,7 +34,7 @@ export const digit: CharacterClass = { export const word: CharacterClass = { type: 'characterClass', - characters: ['\\w'], + chars: ['\\w'], ranges: [], isInverted: false, encode: encodeCharacterClass, @@ -42,35 +42,35 @@ export const word: CharacterClass = { export const whitespace: CharacterClass = { type: 'characterClass', - characters: ['\\s'], + chars: ['\\s'], ranges: [], isInverted: false, encode: encodeCharacterClass, }; -export function characterClass(...elements: CharacterClass[]): CharacterClass { +export function charClass(...elements: CharacterClass[]): CharacterClass { elements.forEach((element) => { if (element.isInverted) { - throw new Error('`characterClass` should receive only non-inverted character classes'); + throw new Error('`charClass` should receive only non-inverted character classes'); } }); return { type: 'characterClass', - characters: elements.map((c) => c.characters).flat(), + chars: elements.map((c) => c.chars).flat(), ranges: elements.map((c) => c.ranges).flat(), isInverted: false, encode: encodeCharacterClass, }; } -export function characterRange(start: string, end: string): CharacterClass { +export function charRange(start: string, end: string): CharacterClass { if (start.length !== 1) { - throw new Error('`characterRange` should receive only single character `start` string'); + throw new Error('`charRange` should receive only single character `start` string'); } if (end.length !== 1) { - throw new Error('`characterRange` should receive only single character `end` string'); + throw new Error('`charRange` should receive only single character `end` string'); } if (start > end) { @@ -79,7 +79,7 @@ export function characterRange(start: string, end: string): CharacterClass { return { type: 'characterClass', - characters: [], + chars: [], ranges: [{ start, end }], isInverted: false, encode: encodeCharacterClass, @@ -87,15 +87,15 @@ export function characterRange(start: string, end: string): CharacterClass { } export function anyOf(characters: string): CharacterClass { - const charactersArray = characters.split('').map((c) => escapeForCharacterClass(c)); + const chars = characters.split('').map((c) => escapeForCharacterClass(c)); - if (charactersArray.length === 0) { + if (chars.length === 0) { throw new Error('`anyOf` should received at least one character'); } return { type: 'characterClass', - characters: charactersArray, + chars, ranges: [], isInverted: false, encode: encodeCharacterClass, @@ -105,7 +105,7 @@ export function anyOf(characters: string): CharacterClass { export function inverted(element: CharacterClass): CharacterClass { return { type: 'characterClass', - characters: element.characters, + chars: element.chars, ranges: element.ranges, isInverted: !element.isInverted, encode: encodeCharacterClass, @@ -113,29 +113,29 @@ export function inverted(element: CharacterClass): CharacterClass { } function encodeCharacterClass(this: CharacterClass): EncodeOutput { - if (this.characters.length === 0 && this.ranges.length === 0) { + if (this.chars.length === 0 && this.ranges.length === 0) { throw new Error('Character class should contain at least one character or character range'); } // Direct rendering for single-character class - if (this.characters.length === 1 && this.ranges?.length === 0 && !this.isInverted) { + if (this.chars.length === 1 && this.ranges?.length === 0 && !this.isInverted) { return { precedence: 'atom', - pattern: this.characters[0]!, + pattern: this.chars[0]!, }; } // If passed characters includes hyphen (`-`) it need to be moved to // first (or last) place in order to treat it as hyphen character and not a range. // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types - const hyphen = this.characters.includes('-') ? '-' : ''; - const otherCharacters = this.characters.filter((c) => c !== '-').join(''); + const hyphen = this.chars.includes('-') ? '-' : ''; + const otherChars = this.chars.filter((c) => c !== '-').join(''); const ranges = this.ranges.map(({ start, end }) => `${start}-${end}`).join(''); const isInverted = this.isInverted ? '^' : ''; return { precedence: 'atom', - pattern: `[${isInverted}${ranges}${otherCharacters}${hyphen}]`, + pattern: `[${isInverted}${ranges}${otherChars}${hyphen}]`, }; } diff --git a/src/index.ts b/src/index.ts index 1e3d723..ee5b82f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,8 +10,8 @@ export { whitespace, word, anyOf, - characterRange, - characterClass, + charRange, + charClass, inverted, } from './components/character-class'; export { choiceOf } from './components/choice-of';