diff --git a/README.md b/README.md index 6b21fcd..e403691 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # TS Regex Builder -User-friendly Regular Expression builder for TypeScript and JavaScript. +A user-friendly regular expression builder for TypeScript and JavaScript. ## Goal -Regular expressions are a powerful tool for matching complex text patterns, yet they are notorious for their hard-to-understand syntax. +Regular expressions are a powerful tool for matching simple and complex text patterns, yet they are notorious for their hard-to-understand syntax. -Inspired by Swift's Regex Builder, this library allows users to write easily and understand regular expressions. +Inspired by Swift's Regex Builder, this library allows users to write and understand regular expressions easily. ```ts // Before @@ -26,10 +26,10 @@ const hexColor = buildRegex( capture( choiceOf( repeat({ count: 6 }, hexDigit), - repeat({ count: 3 }, hexDigit) + repeat({ count: 3 }, hexDigit), ) ), - endOfString + endOfString, ); ``` @@ -39,7 +39,13 @@ const hexColor = buildRegex( npm install ts-regex-builder ``` -## Usage +or + +```sh +yarn add ts-regex-builder +``` + +## Basic usage ```js import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; @@ -48,6 +54,94 @@ import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); ``` +## Domain-specific language + +TS Regex Builder allows you to build complex regular expressions using domain-specific language or regex components. + +Terminology: +* regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct +* regex element (`RegexElement`) - object returned by regex components +* regex sequence (`RegexSequence`) - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) + +Most of the regex components accept a regex sequence. Examples of sequences: +* single string: `'Hello World'` - note all characters will be automatically escaped in the resulting regex +* single element: `capture('abc')` +* array of elements and strings: `['$', oneOrMore(digit)]` + +Regex components can be composed into a complex tree: + +```ts +const currencyAmount = buildRegex([ + choiceOf('$', '€', repeat({ count: 3 }, characterRange('A', 'Z'))), + oneOrMore(digit), + optionally([ + '.', + repeat({ count: 2}, digit), + ]), +]) +``` + + +### Building regex + +| Regex Component | Regex Pattern | Type | Description | +| --------------------------------------- | ------------- | --------------------------------------------------- | ----------------------------------- | +| `buildRegex(...)` | `/.../` | `(seq: RegexSequence) => RegExp` | Create `RegExp` instance | +| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, seq: RegexSequence) => RegExp` | Create `RegExp` instance with flags | + +### Components + +| Regex Component | Regex Pattern | Type | Notes | +| ------------------- | ------------- | ---------------------------------------------------- | --------------------------- | +| `capture(...)` | `(...)` | `(seq: RegexSequence) => RegexElement` | Capture group | +| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: RegexSequence[]) => RegexElement` | Either of provided patterns | + +Notes: +* `choiceOf()` accepts a variable number of sequences. + + +### Quantifiers + +| Regex Component | Regex Pattern | Type | Description | +| -------------------------------- | ------------- | -------------------------------------------------------------------- | ------------------------------------------------- | +| `zeroOrMore(x)` | `x*` | `(seq: RegexSequence) => RegexElement` | Zero or more occurence of a pattern | +| `oneOrMore(x)` | `x+` | `(seq: RegexSequence) => RegexElement` | One or more occurence of a pattern | +| `optionally(x)` | `x?` | `(seq: RegexSequence) => RegexElement` | Zero or one occurence of a pattern | +| `repeat({ count: n }, x)` | `x{n}` | `({ count: number }, seq: RegexSequence) => RegexElement` | Pattern repeats exact number of times | +| `repeat({ min: n, }, x)` | `x{n,}` | `({ min: number }, seq: RegexSequence) => RegexElement` | Pattern repeats at least given number of times | +| `repeat({ min: n, max: n2 }, x)` | `x{n1,n2}` | `({ min: number, max: number }, seq: RegexSequence) => RegexElement` | Pattern repeats between n1 and n2 number of times | + +### Character classes + +| Regex Component | Regex Pattern | Type | Description | +| -------------------------- | ------------- | ------------------------------------------------------ | ------------------------------------------- | +| `any` | `.` | `CharacterClass` | Any character | +| `word` | `\w` | `CharacterClass` | Word characters | +| `digit` | `\d` | `CharacterClass` | Digit characters | +| `whitespace` | `\s` | `CharacterClass` | Whitespace characters | +| `anyOf('abc')` | `[abc]` | `(chars: string) => CharacterClass` | Any of supplied characters | +| `characterRange('a', 'z')` | `[a-z]` | `(from: string, to: string) => CharacterClass` | Range of characters | +| `characterClass(...)` | `[...]` | `(...charClasses: CharacterClass[]) => CharacterClass` | Concatenation of multiple character classes | +| `inverted(...)` | `[^...]` | `(charClass: CharacterClass) => CharacterClass` | Inverts character class | + +Notes: +* `any`, `word`, `digit`, `whitespace` - are objects, no need to call them. +* `anyof` accepts a single string of characters to match +* `characterRange` accepts exactly **two single character** strings representing range start and end (inclusive). +* `characterClass` accepts a variable number of character classes to join into a single class +* `inverted` accepts a single character class to be inverted + + +### Anchors + +| Regex Component | Regex Pattern | Type | Notes | +| --------------- | ------------- | -------- | ---------------------------------------------------------- | +| `startOfString` | `^` | `Anchor` | Start of the string (or start of a line in multiline mode) | +| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | + +Notes: +* `startOfString`, `endOfString` - are objects, no need to call them. + ## Examples See [Examples document](./docs/Examples.md). @@ -55,7 +149,6 @@ See [Examples document](./docs/Examples.md). ## Contributing See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow. - See the [project guidelines](GUIDELINES.md) to understand our core principles. ## License diff --git a/src/builders.ts b/src/builders.ts index 3a811ef..e230a6f 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -1,4 +1,4 @@ -import type { RegexNode } from './types'; +import type { RegexSequence } from './types'; import { encodeSequence } from './encoder/encoder'; import { asNodeArray } from './utils/nodes'; import { optionalFirstArg } from './utils/optional-arg'; @@ -26,7 +26,7 @@ export interface RegexFlags { * @param elements Single regex element or array of elements * @returns */ -export function buildRegex(elements: RegexNode | RegexNode[]): RegExp; +export function buildRegex(sequence: RegexSequence): RegExp; /** * Generate RegExp object from elements with passed flags. @@ -35,14 +35,14 @@ export function buildRegex(elements: RegexNode | RegexNode[]): RegExp; * @param flags RegExp flags object * @returns RegExp object */ -export function buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp; +export function buildRegex(flags: RegexFlags, sequence: RegexSequence): RegExp; export function buildRegex(first: any, second?: any): RegExp { return _buildRegex(...optionalFirstArg(first, second)); } -export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp { - const pattern = encodeSequence(asNodeArray(elements)).pattern; +export function _buildRegex(flags: RegexFlags, sequence: RegexSequence): RegExp { + const pattern = encodeSequence(asNodeArray(sequence)).pattern; const flagsString = encodeFlags(flags ?? {}); return new RegExp(pattern, flagsString); } @@ -52,8 +52,8 @@ export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[] * @param elements Single regex element or array of elements * @returns regex pattern string */ -export function buildPattern(elements: RegexNode | RegexNode[]): string { - return encodeSequence(asNodeArray(elements)).pattern; +export function buildPattern(sequence: RegexSequence): string { + return encodeSequence(asNodeArray(sequence)).pattern; } function encodeFlags(flags: RegexFlags): string { diff --git a/src/components/capture.ts b/src/components/capture.ts index 4fd7b21..917632e 100644 --- a/src/components/capture.ts +++ b/src/components/capture.ts @@ -1,17 +1,17 @@ import { encodeSequence } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface Capture extends RegexElement { type: 'capture'; children: RegexNode[]; } -export function capture(nodes: RegexNode | RegexNode[]): Capture { +export function capture(sequence: RegexSequence): Capture { return { type: 'capture', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeCapture, }; } diff --git a/src/components/choice-of.ts b/src/components/choice-of.ts index 1f9ecf2..27f880c 100644 --- a/src/components/choice-of.ts +++ b/src/components/choice-of.ts @@ -1,14 +1,14 @@ import { encodeSequence } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface ChoiceOf extends RegexElement { type: 'choiceOf'; alternatives: RegexNode[][]; } -export function choiceOf(...alternatives: Array): ChoiceOf { +export function choiceOf(...alternatives: RegexSequence[]): ChoiceOf { if (alternatives.length === 0) { throw new Error('`choiceOf` should receive at least one alternative'); } diff --git a/src/components/quantifiers.ts b/src/components/quantifiers.ts index d379703..6640557 100644 --- a/src/components/quantifiers.ts +++ b/src/components/quantifiers.ts @@ -1,7 +1,7 @@ import { encodeAtom } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface OneOrMore extends RegexElement { type: 'oneOrMore'; @@ -18,26 +18,26 @@ export interface ZeroOrMore extends RegexElement { children: RegexNode[]; } -export function oneOrMore(nodes: RegexNode | RegexNode[]): OneOrMore { +export function oneOrMore(sequence: RegexSequence): OneOrMore { return { type: 'oneOrMore', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeOneOrMore, }; } -export function optionally(nodes: RegexNode | RegexNode[]): Optionally { +export function optionally(sequence: RegexSequence): Optionally { return { type: 'optionally', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeOptionally, }; } -export function zeroOrMore(nodes: RegexNode | RegexNode[]): ZeroOrMore { +export function zeroOrMore(sequence: RegexSequence): ZeroOrMore { return { type: 'zeroOrMore', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeZeroOrMore, }; } diff --git a/src/components/repeat.ts b/src/components/repeat.ts index fd24544..7d20a79 100644 --- a/src/components/repeat.ts +++ b/src/components/repeat.ts @@ -1,7 +1,7 @@ import { encodeAtom } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface Repeat extends RegexElement { type: 'repeat'; @@ -11,8 +11,8 @@ export interface Repeat extends RegexElement { export type RepeatOptions = { count: number } | { min: number; max?: number }; -export function repeat(options: RepeatOptions, nodes: RegexNode | RegexNode[]): Repeat { - const children = asNodeArray(nodes); +export function repeat(options: RepeatOptions, sequence: RegexSequence): Repeat { + const children = asNodeArray(sequence); if (children.length === 0) { throw new Error('`repeat` should receive at least one element'); diff --git a/src/types.ts b/src/types.ts index 71efcd2..0130786 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,5 +1,7 @@ import type { EncodeOutput } from './encoder/types'; +export type RegexSequence = RegexNode | RegexNode[]; + export type RegexNode = RegexElement | string; export interface RegexElement { diff --git a/src/utils/nodes.ts b/src/utils/nodes.ts index 76020b0..bb0553e 100644 --- a/src/utils/nodes.ts +++ b/src/utils/nodes.ts @@ -1,5 +1,5 @@ -import type { RegexNode } from '../types'; +import type { RegexNode, RegexSequence } from '../types'; -export function asNodeArray(nodeOrArray: RegexNode | RegexNode[]): RegexNode[] { - return Array.isArray(nodeOrArray) ? nodeOrArray : [nodeOrArray]; +export function asNodeArray(sequence: RegexSequence): RegexNode[] { + return Array.isArray(sequence) ? sequence : [sequence]; } diff --git a/test-utils/to-have-pattern.ts b/test-utils/to-have-pattern.ts index 8157113..9521848 100644 --- a/test-utils/to-have-pattern.ts +++ b/test-utils/to-have-pattern.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toHavePattern( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expected: RegExp ) { const receivedPattern = asRegExp(received).source; diff --git a/test-utils/to-match-groups.ts b/test-utils/to-match-groups.ts index 5a5f271..45494da 100644 --- a/test-utils/to-match-groups.ts +++ b/test-utils/to-match-groups.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toMatchGroups( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expectedString: string, expectedGroups: string[] ) { diff --git a/test-utils/to-match-string.ts b/test-utils/to-match-string.ts index 5a87d03..d0086df 100644 --- a/test-utils/to-match-string.ts +++ b/test-utils/to-match-string.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toMatchString( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expected: string ) { const receivedRegex = asRegExp(received);