From 4ee33489de5ab4827d079bebdafa59a03e9f3b7a Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 19 Dec 2023 23:21:44 +0100 Subject: [PATCH 1/3] feat: characterClass --- .../__tests__/character-class.test.ts | 28 ++++- src/components/character-class.ts | 116 +++++++++++++----- src/components/types.ts | 11 +- 3 files changed, 123 insertions(+), 32 deletions(-) diff --git a/src/components/__tests__/character-class.test.ts b/src/components/__tests__/character-class.test.ts index 4e8ab2c..a98995d 100644 --- a/src/components/__tests__/character-class.test.ts +++ b/src/components/__tests__/character-class.test.ts @@ -1,7 +1,9 @@ -import { oneOrMore, optionally, zeroOrMore } from '../quantifiers'; +import { one, oneOrMore, optionally, zeroOrMore } from '../quantifiers'; import { any, anyOf, + characterClass, + characterRange, digit, encodeCharacterClass, inverted, @@ -33,6 +35,25 @@ test('`whitespace` character class', () => { expect(['x', whitespace, 'x']).toHavePattern('x\\sx'); }); +test('"characterClass" base cases', () => { + expect(characterClass(characterRange('a', 'z'))).toHavePattern('[a-z]'); + expect( + characterClass(characterRange('a', 'z'), characterRange('A', 'Z')) + ).toHavePattern('[a-zA-Z]'); + expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern( + '[a-z05]' + ); + expect( + characterClass(characterRange('a', 'z'), whitespace, anyOf('05')) + ).toHavePattern('[a-z\\s05]'); +}); + +test('"characterRange" base cases', () => { + expect(characterRange('a', 'z')).toHavePattern('[a-z]'); + expect(['x', characterRange('0', '9')]).toHavePattern('x[0-9]'); + expect([characterRange('A', 'F'), 'x']).toHavePattern('[A-F]x'); +}); + test('`anyOf` base cases', () => { expect(anyOf('a')).toHavePattern('a'); expect(['x', anyOf('a'), 'x']).toHavePattern('xax'); @@ -81,9 +102,10 @@ test('`encodeCharacterClass` throws on empty text', () => { encodeCharacterClass({ type: 'characterClass', characters: [], - inverted: false, + ranges: [], + isInverted: false, }) ).toThrowErrorMatchingInlineSnapshot( - `"Character class should contain at least one character"` + `"Character class should contain at least one character or character range"` ); }); diff --git a/src/components/character-class.ts b/src/components/character-class.ts index 880030b..1569844 100644 --- a/src/components/character-class.ts +++ b/src/components/character-class.ts @@ -5,27 +5,78 @@ import type { CharacterClass } from './types'; export const any: CharacterClass = { type: 'characterClass', characters: ['.'], - inverted: false, + ranges: [], + isInverted: false, }; export const digit: CharacterClass = { type: 'characterClass', characters: ['\\d'], - inverted: false, + ranges: [], + isInverted: false, }; export const word: CharacterClass = { type: 'characterClass', characters: ['\\w'], - inverted: false, + ranges: [], + isInverted: false, }; export const whitespace: CharacterClass = { type: 'characterClass', characters: ['\\s'], - inverted: false, + ranges: [], + isInverted: false, }; +export function characterClass(...elements: CharacterClass[]): CharacterClass { + elements.forEach((element) => { + if (element.isInverted) { + throw new Error( + '`characterClass` should receive only non-inverted character classes' + ); + } + }); + + return { + type: 'characterClass', + characters: elements.map((c) => c.characters).flat(), + ranges: elements.map((c) => c.ranges).flat(), + isInverted: false, + }; +} + +export function characterRange(start: string, end: string): CharacterClass { + if (start.length !== 1) { + throw new Error( + '`characterRange` should receive only single character `start` string' + ); + } + + if (end.length !== 1) { + throw new Error( + '`characterRange` should receive only single character `end` string' + ); + } + + if (start > end) { + throw new Error('`start` should be less or equal to `end`'); + } + + const range = { + start: escapeText(start), + end: escapeText(end), + }; + + return { + type: 'characterClass', + characters: [], + ranges: [range], + isInverted: false, + }; +} + export function anyOf(characters: string): CharacterClass { const charactersArray = characters.split('').map(escapeText); if (charactersArray.length === 0) { @@ -35,46 +86,55 @@ export function anyOf(characters: string): CharacterClass { return { type: 'characterClass', characters: charactersArray, - inverted: false, + ranges: [], + isInverted: false, }; } -export function inverted(characterClass: CharacterClass): CharacterClass { +export function inverted({ + characters, + ranges, + isInverted, +}: CharacterClass): CharacterClass { return { type: 'characterClass', - characters: characterClass.characters, - inverted: !characterClass.inverted, + characters: characters, + ranges: ranges, + isInverted: !isInverted, }; } -export function encodeCharacterClass( - characterClass: CharacterClass -): EncoderNode { - if (characterClass.characters.length === 0) { - throw new Error('Character class should contain at least one character'); +export function encodeCharacterClass({ + characters, + ranges, + isInverted, +}: CharacterClass): EncoderNode { + if (characters.length === 0 && ranges.length === 0) { + throw new Error( + 'Character class should contain at least one character or character range' + ); } - if (characterClass.characters.length === 1 && !characterClass.inverted) { + // Direct rendering for single-character class + if (characters.length === 1 && ranges?.length === 0 && !isInverted) { return { precedence: EncoderPrecedence.Atom, - pattern: characterClass.characters[0]!, + pattern: characters[0]!, }; } - const characterString = reorderHyphen(characterClass.characters).join(''); + // If passed characters includes hyphen (`-`) it need to be moved to + // first (or last) place in order to treat it as hyphen character and not a range. + // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types + const hypenString = characters.includes('-') ? '-' : ''; + const charactersString = characters.filter((c) => c !== '-').join(''); + const rangesString = ranges + .map(({ start, end }) => `${start}-${end}`) + .join(''); + const invertedString = isInverted ? '^' : ''; + return { precedence: EncoderPrecedence.Atom, - pattern: `[${characterClass.inverted ? '^' : ''}${characterString}]`, + pattern: `[${invertedString}${hypenString}${rangesString}${charactersString}]`, }; } - -// If passed characters includes hyphen (`-`) it need to be moved to -// first (or last) place in order to treat it as hyphen character and not a range. -// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types -function reorderHyphen(characters: string[]) { - if (characters.includes('-')) { - return ['-', ...characters.filter((c) => c !== '-')]; - } - - return characters; -} diff --git a/src/components/types.ts b/src/components/types.ts index 9d26d5c..fb5bc98 100644 --- a/src/components/types.ts +++ b/src/components/types.ts @@ -5,7 +5,16 @@ export type Quantifier = One | OneOrMore | Optionally | ZeroOrMore | Repeat; export type CharacterClass = { type: 'characterClass'; characters: string[]; - inverted: boolean; + ranges: CharacterRange[]; + isInverted: boolean; +}; + +/** + * Character range from start to end (inclusive). + */ +export type CharacterRange = { + start: string; + end: string; }; // Components From 44248a554d46d8affee9c073a3797adf2fb2bc3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Fri, 22 Dec 2023 11:13:50 +0100 Subject: [PATCH 2/3] chore: improve codecov --- .../__tests__/character-class.test.ts | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/components/__tests__/character-class.test.ts b/src/components/__tests__/character-class.test.ts index a98995d..0656092 100644 --- a/src/components/__tests__/character-class.test.ts +++ b/src/components/__tests__/character-class.test.ts @@ -1,4 +1,4 @@ -import { one, oneOrMore, optionally, zeroOrMore } from '../quantifiers'; +import { oneOrMore, optionally, zeroOrMore } from '../quantifiers'; import { any, anyOf, @@ -35,7 +35,7 @@ test('`whitespace` character class', () => { expect(['x', whitespace, 'x']).toHavePattern('x\\sx'); }); -test('"characterClass" base cases', () => { +test('`characterClass` base cases', () => { expect(characterClass(characterRange('a', 'z'))).toHavePattern('[a-z]'); expect( characterClass(characterRange('a', 'z'), characterRange('A', 'Z')) @@ -48,12 +48,32 @@ test('"characterClass" base cases', () => { ).toHavePattern('[a-z\\s05]'); }); -test('"characterRange" base cases', () => { +test('`characterClass` throws on inverted arguments', () => { + expect(() => + characterClass(inverted(whitespace)) + ).toThrowErrorMatchingInlineSnapshot( + `"\`characterClass\` should receive only non-inverted character classes"` + ); +}); + +test('`characterRange` base cases', () => { expect(characterRange('a', 'z')).toHavePattern('[a-z]'); expect(['x', characterRange('0', '9')]).toHavePattern('x[0-9]'); expect([characterRange('A', 'F'), 'x']).toHavePattern('[A-F]x'); }); +test('`characterRange` throws on incorrect arguments', () => { + expect(() => characterRange('z', 'a')).toThrowErrorMatchingInlineSnapshot( + `"\`start\` should be less or equal to \`end\`"` + ); + expect(() => characterRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot( + `"\`characterRange\` should receive only single character \`start\` string"` + ); + expect(() => characterRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot( + `"\`characterRange\` should receive only single character \`end\` string"` + ); +}); + test('`anyOf` base cases', () => { expect(anyOf('a')).toHavePattern('a'); expect(['x', anyOf('a'), 'x']).toHavePattern('xax'); From 88139c2fc823a1b2d745d5eba4ed21eb2cf0d3ae Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Fri, 22 Dec 2023 14:19:06 +0100 Subject: [PATCH 3/3] docs: update README --- README.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 806ea90..8e890c5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # TS Regex Builder -User-friendly egular Expression builder for TypeScript and JavaScript. +User-friendly Regular Expression builder for TypeScript and JavaScript. -## The problem & solution +## Goal Regular expressions are a powerful tool for matching complex text patterns, yet they are notorious for their hard-to-understand syntax. @@ -13,15 +13,19 @@ Inspired by Swift's Regex Builder, this library allows users to write easily and const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/; // After -const hexDigit = characterClass(['a', 'f'], ['A', 'F'], ['0', '9']); +const hexDigit = characterClass( + characterRange('a', 'f'), + characterRange('A', 'F'), + characterRange('0', '9') +); + const hexColor = buildRegex( startOfString, - '#', - choiceOf( - repeat({ count: 6 }, hexDigit), - repeat({ count: 3 }, hexDigit), + optionally('#'), + capture( + choiceOf(repeat({ count: 6 }, hexDigit), repeat({ count: 3 }, hexDigit)) ), - endOfString, + endOfString ); ``` @@ -34,10 +38,10 @@ npm install ts-regex-builder ## Usage ```js -import { buildRegex, oneOrMore } from 'ts-regex-builder'; +import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; -// /(Hello)+ World/ -const regex = buildRegex(oneOrMore('Hello'), ' World'); +// /Hello (\w+)/ +const regex = buildRegex('Hello ', capture(oneOrMore(word))); ``` ## Contributing