From ea0dbd825ac360fcadb35300bb617480aa0f6c3c Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 26 Dec 2023 23:45:13 +0100 Subject: [PATCH 1/8] wip --- docs/API.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 docs/API.md diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..e5c489f --- /dev/null +++ b/docs/API.md @@ -0,0 +1,88 @@ +# API + +## Overview + +TS Regex Builder allows building readable regular expressions objects using easy-to-read domain-specific language (DSL). + +```ts +// Same as: const regex = /Hello (\w+)/ +const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); +``` + +This library exposes numerous regex components (e.g., `capture`, `oneOrMore`) corresponding to regular expression constructs. Components, when called, return `RegexElement` instances. You can form regular expressions by creating a sequence of elements and strings using JavaScript array literals: + +```ts +const elements = ['Hello ', capture(oneOrMore(word))]; +``` + +We refer to `RegexElement | string` type for brevity as `RegexNode`. Most of the regex components accept either a single regex node or an array of regex nodes: + +```ts +oneOrMore('a'); // Single string node +oneOrMore(anyOf('xyz')); // Single anyOf element +oneOrMore(['a', anyOf('xyz')]); // Array of regex elements and strings +``` + +| Regex Component | Regex construct | Notes | +| ---------------------------------- | --------------- | -------------------------------------------- | +| `buildRegex(...)` | `/.../` | Create `RegExp`` object | +| `anyOf('abc')` | `[abc]` | Specify matching characters | +| `characterRange('a', 'z')` | `[a-z]` | Specify range of matching characters | +| `capture(x)` | `(x)` | Capture group | +| `word` | `\w` | +| `digit` | `\d` | +| `any` | `.` | +| `whitespace` | `\s` | +| `startOfString` | `^` | Start of string (or line in multiline mode) | +| `endOfString` | `$` | End of string (or line in multiline mode) | +| `inverted(...)` | `[^...]` | Inverts character class | +| `characterClass(...)` | `[...]` | Construct concatenation of character classes | +| `choiceOf(x, y, z)` | `x\|y\|z` | Alternative | +| `zeroOrMore(x)` | `x*` | | +| `oneOrMore(x)` | `x+` | | +| `optionally(x)` | `x?` | | +| `repeat({ count: n }, ...)` | `...{n}` | Repeat exact number of times | +| `repeat({ min: n, }, ...)` | `...{n,}` | Repeat at least given number of times | +| `repeat({ min: n, max: n2 }, ...)` | `...{n1,n2}` | Repeat number of times in a range | + +## Types + +```ts +import type { EncodeOutput } from './encoder/types'; + +export type RegexNode = RegexElement | string; + +export interface RegexElement { + type: string; + encode(): EncodeOutput; +} +``` + +## `buildRegex` + +```ts +function buildRegex(elements: RegexNode | RegexNode[]): RegExp; +function buildRegex( + flags: { + global?: boolean; // Global search + ignoreCase?: boolean; // Case-insensitive search + multiline?: boolean; // Allows ^ and $ to match newline characters. + hasIndices?: boolean; // Generate indices for substring matches. + sticky?: boolean; // Perform a "sticky" search that matches starting at the current position in the target string. + }, + elements: RegexNode | RegexNode[] +): RegExp; +``` + +Parameters: + +- `elements` - single regex element or string or array of such elements +- `flags` - flags for used for construction of `RegExp` object + +The top-level method for TS Regex Builder is `buildRegex`; it accepts either a single regex element, a string, or an array of such. + +## Character classes + +Te + +### From d58b633b87c1aeaadfa2f453ed18b8c65c13e35c Mon Sep 17 00:00:00 2001 From: Maciej Jastrzebski Date: Tue, 26 Dec 2023 23:47:20 +0100 Subject: [PATCH 2/8] docs: tweaks --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6b21fcd..6540712 100644 --- a/README.md +++ b/README.md @@ -26,10 +26,10 @@ const hexColor = buildRegex( capture( choiceOf( repeat({ count: 6 }, hexDigit), - repeat({ count: 3 }, hexDigit) + repeat({ count: 3 }, hexDigit), ) ), - endOfString + endOfString, ); ``` From f3c12aa81e2fe222ff7cf194680a6f06a971ebc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 13:49:11 +0100 Subject: [PATCH 3/8] refactor: introduce regex sequence --- src/builders.ts | 14 +++++++------- src/components/capture.ts | 6 +++--- src/components/choice-of.ts | 4 ++-- src/components/quantifiers.ts | 14 +++++++------- src/components/repeat.ts | 6 +++--- src/types.ts | 2 ++ src/utils/nodes.ts | 6 +++--- test-utils/to-have-pattern.ts | 4 ++-- test-utils/to-match-groups.ts | 4 ++-- test-utils/to-match-string.ts | 4 ++-- 10 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/builders.ts b/src/builders.ts index 3a811ef..e230a6f 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -1,4 +1,4 @@ -import type { RegexNode } from './types'; +import type { RegexSequence } from './types'; import { encodeSequence } from './encoder/encoder'; import { asNodeArray } from './utils/nodes'; import { optionalFirstArg } from './utils/optional-arg'; @@ -26,7 +26,7 @@ export interface RegexFlags { * @param elements Single regex element or array of elements * @returns */ -export function buildRegex(elements: RegexNode | RegexNode[]): RegExp; +export function buildRegex(sequence: RegexSequence): RegExp; /** * Generate RegExp object from elements with passed flags. @@ -35,14 +35,14 @@ export function buildRegex(elements: RegexNode | RegexNode[]): RegExp; * @param flags RegExp flags object * @returns RegExp object */ -export function buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp; +export function buildRegex(flags: RegexFlags, sequence: RegexSequence): RegExp; export function buildRegex(first: any, second?: any): RegExp { return _buildRegex(...optionalFirstArg(first, second)); } -export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp { - const pattern = encodeSequence(asNodeArray(elements)).pattern; +export function _buildRegex(flags: RegexFlags, sequence: RegexSequence): RegExp { + const pattern = encodeSequence(asNodeArray(sequence)).pattern; const flagsString = encodeFlags(flags ?? {}); return new RegExp(pattern, flagsString); } @@ -52,8 +52,8 @@ export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[] * @param elements Single regex element or array of elements * @returns regex pattern string */ -export function buildPattern(elements: RegexNode | RegexNode[]): string { - return encodeSequence(asNodeArray(elements)).pattern; +export function buildPattern(sequence: RegexSequence): string { + return encodeSequence(asNodeArray(sequence)).pattern; } function encodeFlags(flags: RegexFlags): string { diff --git a/src/components/capture.ts b/src/components/capture.ts index 4fd7b21..917632e 100644 --- a/src/components/capture.ts +++ b/src/components/capture.ts @@ -1,17 +1,17 @@ import { encodeSequence } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface Capture extends RegexElement { type: 'capture'; children: RegexNode[]; } -export function capture(nodes: RegexNode | RegexNode[]): Capture { +export function capture(sequence: RegexSequence): Capture { return { type: 'capture', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeCapture, }; } diff --git a/src/components/choice-of.ts b/src/components/choice-of.ts index 1f9ecf2..27f880c 100644 --- a/src/components/choice-of.ts +++ b/src/components/choice-of.ts @@ -1,14 +1,14 @@ import { encodeSequence } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface ChoiceOf extends RegexElement { type: 'choiceOf'; alternatives: RegexNode[][]; } -export function choiceOf(...alternatives: Array): ChoiceOf { +export function choiceOf(...alternatives: RegexSequence[]): ChoiceOf { if (alternatives.length === 0) { throw new Error('`choiceOf` should receive at least one alternative'); } diff --git a/src/components/quantifiers.ts b/src/components/quantifiers.ts index d379703..6640557 100644 --- a/src/components/quantifiers.ts +++ b/src/components/quantifiers.ts @@ -1,7 +1,7 @@ import { encodeAtom } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface OneOrMore extends RegexElement { type: 'oneOrMore'; @@ -18,26 +18,26 @@ export interface ZeroOrMore extends RegexElement { children: RegexNode[]; } -export function oneOrMore(nodes: RegexNode | RegexNode[]): OneOrMore { +export function oneOrMore(sequence: RegexSequence): OneOrMore { return { type: 'oneOrMore', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeOneOrMore, }; } -export function optionally(nodes: RegexNode | RegexNode[]): Optionally { +export function optionally(sequence: RegexSequence): Optionally { return { type: 'optionally', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeOptionally, }; } -export function zeroOrMore(nodes: RegexNode | RegexNode[]): ZeroOrMore { +export function zeroOrMore(sequence: RegexSequence): ZeroOrMore { return { type: 'zeroOrMore', - children: asNodeArray(nodes), + children: asNodeArray(sequence), encode: encodeZeroOrMore, }; } diff --git a/src/components/repeat.ts b/src/components/repeat.ts index fd24544..7d20a79 100644 --- a/src/components/repeat.ts +++ b/src/components/repeat.ts @@ -1,7 +1,7 @@ import { encodeAtom } from '../encoder/encoder'; import type { EncodeOutput } from '../encoder/types'; import { asNodeArray } from '../utils/nodes'; -import type { RegexElement, RegexNode } from '../types'; +import type { RegexElement, RegexNode, RegexSequence } from '../types'; export interface Repeat extends RegexElement { type: 'repeat'; @@ -11,8 +11,8 @@ export interface Repeat extends RegexElement { export type RepeatOptions = { count: number } | { min: number; max?: number }; -export function repeat(options: RepeatOptions, nodes: RegexNode | RegexNode[]): Repeat { - const children = asNodeArray(nodes); +export function repeat(options: RepeatOptions, sequence: RegexSequence): Repeat { + const children = asNodeArray(sequence); if (children.length === 0) { throw new Error('`repeat` should receive at least one element'); diff --git a/src/types.ts b/src/types.ts index 71efcd2..0130786 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,5 +1,7 @@ import type { EncodeOutput } from './encoder/types'; +export type RegexSequence = RegexNode | RegexNode[]; + export type RegexNode = RegexElement | string; export interface RegexElement { diff --git a/src/utils/nodes.ts b/src/utils/nodes.ts index 76020b0..bb0553e 100644 --- a/src/utils/nodes.ts +++ b/src/utils/nodes.ts @@ -1,5 +1,5 @@ -import type { RegexNode } from '../types'; +import type { RegexNode, RegexSequence } from '../types'; -export function asNodeArray(nodeOrArray: RegexNode | RegexNode[]): RegexNode[] { - return Array.isArray(nodeOrArray) ? nodeOrArray : [nodeOrArray]; +export function asNodeArray(sequence: RegexSequence): RegexNode[] { + return Array.isArray(sequence) ? sequence : [sequence]; } diff --git a/test-utils/to-have-pattern.ts b/test-utils/to-have-pattern.ts index 8157113..9521848 100644 --- a/test-utils/to-have-pattern.ts +++ b/test-utils/to-have-pattern.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toHavePattern( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expected: RegExp ) { const receivedPattern = asRegExp(received).source; diff --git a/test-utils/to-match-groups.ts b/test-utils/to-match-groups.ts index 5a5f271..45494da 100644 --- a/test-utils/to-match-groups.ts +++ b/test-utils/to-match-groups.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toMatchGroups( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expectedString: string, expectedGroups: string[] ) { diff --git a/test-utils/to-match-string.ts b/test-utils/to-match-string.ts index 5a87d03..d0086df 100644 --- a/test-utils/to-match-string.ts +++ b/test-utils/to-match-string.ts @@ -1,9 +1,9 @@ -import type { RegexNode } from '../src/types'; +import type { RegexSequence } from '../src/types'; import { asRegExp } from './utils'; export function toMatchString( this: jest.MatcherContext, - received: RegExp | RegexNode | RegexNode[], + received: RegExp | RegexSequence, expected: string ) { const receivedRegex = asRegExp(received); From 8b1d73828724a540ca4b71430f936f09e79d5fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 13:49:28 +0100 Subject: [PATCH 4/8] docs: add minimal comprehensive docs --- README.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++--- docs/API.md | 88 ----------------------------------------------------- 2 files changed, 84 insertions(+), 92 deletions(-) delete mode 100644 docs/API.md diff --git a/README.md b/README.md index 6540712..9eb4c40 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # TS Regex Builder -User-friendly Regular Expression builder for TypeScript and JavaScript. +Structured Regular Expression builder for TypeScript and JavaScript. ## Goal -Regular expressions are a powerful tool for matching complex text patterns, yet they are notorious for their hard-to-understand syntax. +Regular expressions are a powerful tool for matching simple and complex text patterns, yet they are notorious for their hard-to-understand syntax. Inspired by Swift's Regex Builder, this library allows users to write easily and understand regular expressions. @@ -39,7 +39,13 @@ const hexColor = buildRegex( npm install ts-regex-builder ``` -## Usage +or + +```sh +yarn add ts-regex-builder +``` + +## Basic usage ```js import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; @@ -48,6 +54,81 @@ import { buildRegex, capture, oneOrMore } from 'ts-regex-builder'; const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); ``` +## Domain-specific language + +Terminology: +* regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing regex construct +* regex element (`RegexElement`) - object returned by regex components +* regex node (`RegexNode`) - regex element or string +* regex sequence - single regex node (`RegexElement | string`) or array of such nodes (`Array`) + +Most components accept a regex sequence. + +### Building regex + +| Regex Component | Regex construct | Type | Description | +| --------------------------------------- | --------------- | ---------------------------------------------------------------- | ----------------------------------- | +| `buildRegex(...)` | `/.../` | `(nodes: RegexNode \| RegexNode[]) => RegExp` | Create `RegExp` instance | +| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, nodes: RegexNode \| RegexNode[]) => RegExp` | Create `RegExp` instance with flags | + +Builders accept either a single element (`oneOrMore('a')`) or string (`'a'`) or array of multiple elements and strings (`[oneOrMore('a'), 'b']`). + +### Components + +| Regex Component | Regex construct | Type | Notes | +| ------------------- | --------------- | -------------------------------------------------------------------- | --------------------------- | +| `capture(...)` | `(...)` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Capture group | +| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: Array) => RegexElement` | Either of provided patterns | + +Notes: +* `capture()` accepts either a single element (`oneOrMore('a')`) or string (`'a'`) or array of multiple elements and strings (`[oneOrMore('a'), 'b']`). +* `choiceOf()` accepts variable number of elements or sequences. + + +### Quantifiers + +| Regex Component | Regex construct | Type | Description | +| ---------------------------------- | --------------- | --------------------------------------------------------------------------------- | ------------------------------------------------- | +| `zeroOrMore(x)` | `x*` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Zero or more occurence of a pattern | +| `oneOrMore(x)` | `x+` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | One or more occurence of a pattern | +| `optionally(x)` | `x?` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Zero or one occurence of a pattern | +| `repeat({ count: n }, ...)` | `x{n}` | `({ count: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats exact number of times | +| `repeat({ min: n, }, ...)` | `x{n,}` | `({ min: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats at least given number of times | +| `repeat({ min: n, max: n2 }, ...)` | `x{n1,n2}` | `({ min: number, max: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats between n1 and n2 number of times | + +All quantifiers accept a single element or array of elements. + +### Character classes + +| Regex Component | Regex construct | Type | Description | +| -------------------------- | --------------- | ------------------------------------------------------ | ------------------------------------------- | +| `any` | `.` | `CharacterClass` | Any character | +| `word` | `\w` | `CharacterClass` | Word characters | +| `digit` | `\d` | `CharacterClass` | Digit characters | +| `whitespace` | `\s` | `CharacterClass` | Whitespace characters | +| `anyOf('abc')` | `[abc]` | `(chars: string) => CharacterClass` | Any of supplied characters | +| `characterRange('a', 'z')` | `[a-z]` | `(from: string, to: string) => CharacterClass` | Range of characters | +| `characterClass(...)` | `[...]` | `(...charClasses: CharacterClass[]) => CharacterClass` | Concatenation of multiple character classes | +| `inverted(...)` | `[^...]` | `(charClass: CharacterClass) => CharacterClass` | Inverts character class | + +Notes: +* `any`, `word`, `digit`, `whitespace` - are objects, no need to call them. +* `anyof` accepts a single string of characters to match +* `characterRange` accepts exactly two **single character** strings representing range start and end (inclusive). +* `characterClass` accepts a variable number of character classes to join +* `inverted` accepts a single character class to be inverted + + +### Anchors + +| Regex Component | Regex construct | Type | Notes | +| --------------- | --------------- | -------- | ----------------------------------------------------- | +| `startOfString` | `^` | `Anchor` | Start of string (or start of line in multiline mode) | +| `endOfString` | `$` | `Anchor` | End of string (or end of line in multiline mode) | + +Notes: +* `startOfString`, `endOfString` - are objects, no need to call them. + ## Examples See [Examples document](./docs/Examples.md). @@ -55,7 +136,6 @@ See [Examples document](./docs/Examples.md). ## Contributing See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow. - See the [project guidelines](GUIDELINES.md) to understand our core principles. ## License diff --git a/docs/API.md b/docs/API.md deleted file mode 100644 index e5c489f..0000000 --- a/docs/API.md +++ /dev/null @@ -1,88 +0,0 @@ -# API - -## Overview - -TS Regex Builder allows building readable regular expressions objects using easy-to-read domain-specific language (DSL). - -```ts -// Same as: const regex = /Hello (\w+)/ -const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); -``` - -This library exposes numerous regex components (e.g., `capture`, `oneOrMore`) corresponding to regular expression constructs. Components, when called, return `RegexElement` instances. You can form regular expressions by creating a sequence of elements and strings using JavaScript array literals: - -```ts -const elements = ['Hello ', capture(oneOrMore(word))]; -``` - -We refer to `RegexElement | string` type for brevity as `RegexNode`. Most of the regex components accept either a single regex node or an array of regex nodes: - -```ts -oneOrMore('a'); // Single string node -oneOrMore(anyOf('xyz')); // Single anyOf element -oneOrMore(['a', anyOf('xyz')]); // Array of regex elements and strings -``` - -| Regex Component | Regex construct | Notes | -| ---------------------------------- | --------------- | -------------------------------------------- | -| `buildRegex(...)` | `/.../` | Create `RegExp`` object | -| `anyOf('abc')` | `[abc]` | Specify matching characters | -| `characterRange('a', 'z')` | `[a-z]` | Specify range of matching characters | -| `capture(x)` | `(x)` | Capture group | -| `word` | `\w` | -| `digit` | `\d` | -| `any` | `.` | -| `whitespace` | `\s` | -| `startOfString` | `^` | Start of string (or line in multiline mode) | -| `endOfString` | `$` | End of string (or line in multiline mode) | -| `inverted(...)` | `[^...]` | Inverts character class | -| `characterClass(...)` | `[...]` | Construct concatenation of character classes | -| `choiceOf(x, y, z)` | `x\|y\|z` | Alternative | -| `zeroOrMore(x)` | `x*` | | -| `oneOrMore(x)` | `x+` | | -| `optionally(x)` | `x?` | | -| `repeat({ count: n }, ...)` | `...{n}` | Repeat exact number of times | -| `repeat({ min: n, }, ...)` | `...{n,}` | Repeat at least given number of times | -| `repeat({ min: n, max: n2 }, ...)` | `...{n1,n2}` | Repeat number of times in a range | - -## Types - -```ts -import type { EncodeOutput } from './encoder/types'; - -export type RegexNode = RegexElement | string; - -export interface RegexElement { - type: string; - encode(): EncodeOutput; -} -``` - -## `buildRegex` - -```ts -function buildRegex(elements: RegexNode | RegexNode[]): RegExp; -function buildRegex( - flags: { - global?: boolean; // Global search - ignoreCase?: boolean; // Case-insensitive search - multiline?: boolean; // Allows ^ and $ to match newline characters. - hasIndices?: boolean; // Generate indices for substring matches. - sticky?: boolean; // Perform a "sticky" search that matches starting at the current position in the target string. - }, - elements: RegexNode | RegexNode[] -): RegExp; -``` - -Parameters: - -- `elements` - single regex element or string or array of such elements -- `flags` - flags for used for construction of `RegExp` object - -The top-level method for TS Regex Builder is `buildRegex`; it accepts either a single regex element, a string, or an array of such. - -## Character classes - -Te - -### From ad7f52dc263fff0b3e7a98b95cb13c362dd67e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 13:55:45 +0100 Subject: [PATCH 5/8] docs: spell check --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9eb4c40..e8d41b8 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # TS Regex Builder -Structured Regular Expression builder for TypeScript and JavaScript. +A User-friendly regular expression builder for TypeScript and JavaScript. ## Goal Regular expressions are a powerful tool for matching simple and complex text patterns, yet they are notorious for their hard-to-understand syntax. -Inspired by Swift's Regex Builder, this library allows users to write easily and understand regular expressions. +Inspired by Swift's Regex Builder, this library allows users to write and understand regular expressions easily. ```ts // Before @@ -82,7 +82,7 @@ Builders accept either a single element (`oneOrMore('a')`) or string (`'a'`) or Notes: * `capture()` accepts either a single element (`oneOrMore('a')`) or string (`'a'`) or array of multiple elements and strings (`[oneOrMore('a'), 'b']`). -* `choiceOf()` accepts variable number of elements or sequences. +* `choiceOf()` accepts a variable number of elements or sequences. ### Quantifiers @@ -123,8 +123,8 @@ Notes: | Regex Component | Regex construct | Type | Notes | | --------------- | --------------- | -------- | ----------------------------------------------------- | -| `startOfString` | `^` | `Anchor` | Start of string (or start of line in multiline mode) | -| `endOfString` | `$` | `Anchor` | End of string (or end of line in multiline mode) | +| `startOfString` | `^` | `Anchor` | Start of the string (or start of a line in multiline mode) | +| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | Notes: * `startOfString`, `endOfString` - are objects, no need to call them. From 672806424490a76655e0f4ed58dc52bb50b91675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 13:56:20 +0100 Subject: [PATCH 6/8] docs: fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e8d41b8..e807df2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # TS Regex Builder -A User-friendly regular expression builder for TypeScript and JavaScript. +A user-friendly regular expression builder for TypeScript and JavaScript. ## Goal From 9c48fe87aeec0be5ccecdba4cd9b698985b7292c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 14:12:23 +0100 Subject: [PATCH 7/8] docs: tweaks --- README.md | 75 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e807df2..266c0f3 100644 --- a/README.md +++ b/README.md @@ -56,47 +56,60 @@ const regex = buildRegex(['Hello ', capture(oneOrMore(word))]); ## Domain-specific language +TS Regex Builder allows you to build complex regular expressions using domain-specific language or regex components. + Terminology: -* regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing regex construct +* regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct * regex element (`RegexElement`) - object returned by regex components -* regex node (`RegexNode`) - regex element or string -* regex sequence - single regex node (`RegexElement | string`) or array of such nodes (`Array`) +* regex sequence - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) -Most components accept a regex sequence. +Most of the regex components accept a regex sequence. Examples of sequences: +* single string: `'a'`, `'Hello World'`, `.` - note all characters will be automatically escaped in the resulting regex +* single element: `capture('abc')` +* array of elements and strings: `['$', oneOrMore(digit)]` + +Regex components can be composed into a complex tree: + +```ts +const currencyAmount = buildRegex([ + choiceOf('$', '€', repeat({ count: 3 }, characterRange('A', 'Z'))), + oneOrMore(digit), + optionally([ + '.', + repeat({ count: 2}, digit), + ]), +]) +``` -### Building regex -| Regex Component | Regex construct | Type | Description | -| --------------------------------------- | --------------- | ---------------------------------------------------------------- | ----------------------------------- | -| `buildRegex(...)` | `/.../` | `(nodes: RegexNode \| RegexNode[]) => RegExp` | Create `RegExp` instance | -| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, nodes: RegexNode \| RegexNode[]) => RegExp` | Create `RegExp` instance with flags | +### Building regex -Builders accept either a single element (`oneOrMore('a')`) or string (`'a'`) or array of multiple elements and strings (`[oneOrMore('a'), 'b']`). +| Regex Component | Regex construct | Type | Description | +| --------------------------------------- | --------------- | --------------------------------------------------- | ----------------------------------- | +| `buildRegex(...)` | `/.../` | `(seq: RegexSequence) => RegExp` | Create `RegExp` instance | +| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, seq: RegexSequence) => RegExp` | Create `RegExp` instance with flags | ### Components -| Regex Component | Regex construct | Type | Notes | -| ------------------- | --------------- | -------------------------------------------------------------------- | --------------------------- | -| `capture(...)` | `(...)` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Capture group | -| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: Array) => RegexElement` | Either of provided patterns | +| Regex Component | Regex construct | Type | Notes | +| ------------------- | --------------- | ---------------------------------------------------- | --------------------------- | +| `capture(...)` | `(...)` | `(seq: RegexSequence) => RegexElement` | Capture group | +| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: RegexSequence[]) => RegexElement` | Either of provided patterns | Notes: -* `capture()` accepts either a single element (`oneOrMore('a')`) or string (`'a'`) or array of multiple elements and strings (`[oneOrMore('a'), 'b']`). -* `choiceOf()` accepts a variable number of elements or sequences. +* `choiceOf()` accepts a variable number of sequences. ### Quantifiers -| Regex Component | Regex construct | Type | Description | -| ---------------------------------- | --------------- | --------------------------------------------------------------------------------- | ------------------------------------------------- | -| `zeroOrMore(x)` | `x*` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Zero or more occurence of a pattern | -| `oneOrMore(x)` | `x+` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | One or more occurence of a pattern | -| `optionally(x)` | `x?` | `(nodes: RegexNode \| RegexNode[]) => RegexElement` | Zero or one occurence of a pattern | -| `repeat({ count: n }, ...)` | `x{n}` | `({ count: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats exact number of times | -| `repeat({ min: n, }, ...)` | `x{n,}` | `({ min: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats at least given number of times | -| `repeat({ min: n, max: n2 }, ...)` | `x{n1,n2}` | `({ min: number, max: number }, nodes: RegexNode \| RegexNode[]) => RegexElement` | Pattern repeats between n1 and n2 number of times | - -All quantifiers accept a single element or array of elements. +| Regex Component | Regex construct | Type | Description | +| ---------------------------------- | --------------- | -------------------------------------------------------------------- | ------------------------------------------------- | +| `zeroOrMore(x)` | `x*` | `(seq: RegexSequence) => RegexElement` | Zero or more occurence of a pattern | +| `oneOrMore(x)` | `x+` | `(seq: RegexSequence) => RegexElement` | One or more occurence of a pattern | +| `optionally(x)` | `x?` | `(seq: RegexSequence) => RegexElement` | Zero or one occurence of a pattern | +| `repeat({ count: n }, ...)` | `x{n}` | `({ count: number }, seq: RegexSequence) => RegexElement` | Pattern repeats exact number of times | +| `repeat({ min: n, }, ...)` | `x{n,}` | `({ min: number }, seq: RegexSequence) => RegexElement` | Pattern repeats at least given number of times | +| `repeat({ min: n, max: n2 }, ...)` | `x{n1,n2}` | `({ min: number, max: number }, seq: RegexSequence) => RegexElement` | Pattern repeats between n1 and n2 number of times | ### Character classes @@ -114,17 +127,17 @@ All quantifiers accept a single element or array of elements. Notes: * `any`, `word`, `digit`, `whitespace` - are objects, no need to call them. * `anyof` accepts a single string of characters to match -* `characterRange` accepts exactly two **single character** strings representing range start and end (inclusive). -* `characterClass` accepts a variable number of character classes to join +* `characterRange` accepts exactly **two single character** strings representing range start and end (inclusive). +* `characterClass` accepts a variable number of character classes to join into a single class * `inverted` accepts a single character class to be inverted ### Anchors -| Regex Component | Regex construct | Type | Notes | -| --------------- | --------------- | -------- | ----------------------------------------------------- | +| Regex Component | Regex construct | Type | Notes | +| --------------- | --------------- | -------- | ---------------------------------------------------------- | | `startOfString` | `^` | `Anchor` | Start of the string (or start of a line in multiline mode) | -| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | +| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | Notes: * `startOfString`, `endOfString` - are objects, no need to call them. From 9944633fe5138d645425661372ec534988e23273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Thu, 28 Dec 2023 14:13:27 +0100 Subject: [PATCH 8/8] docs: tweaks --- README.md | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 266c0f3..e403691 100644 --- a/README.md +++ b/README.md @@ -61,10 +61,10 @@ TS Regex Builder allows you to build complex regular expressions using domain-sp Terminology: * regex component (e.g., `capture()`, `oneOrMore()`, `word`) - function or object representing a regex construct * regex element (`RegexElement`) - object returned by regex components -* regex sequence - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) +* regex sequence (`RegexSequence`) - single regex element or string (`RegexElement | string`) or array of such elements and strings (`Array`) Most of the regex components accept a regex sequence. Examples of sequences: -* single string: `'a'`, `'Hello World'`, `.` - note all characters will be automatically escaped in the resulting regex +* single string: `'Hello World'` - note all characters will be automatically escaped in the resulting regex * single element: `capture('abc')` * array of elements and strings: `['$', oneOrMore(digit)]` @@ -84,17 +84,17 @@ const currencyAmount = buildRegex([ ### Building regex -| Regex Component | Regex construct | Type | Description | -| --------------------------------------- | --------------- | --------------------------------------------------- | ----------------------------------- | -| `buildRegex(...)` | `/.../` | `(seq: RegexSequence) => RegExp` | Create `RegExp` instance | -| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, seq: RegexSequence) => RegExp` | Create `RegExp` instance with flags | +| Regex Component | Regex Pattern | Type | Description | +| --------------------------------------- | ------------- | --------------------------------------------------- | ----------------------------------- | +| `buildRegex(...)` | `/.../` | `(seq: RegexSequence) => RegExp` | Create `RegExp` instance | +| `buildRegex({ ignoreCase: true }, ...)` | `/.../i` | `(flags: RegexFlags, seq: RegexSequence) => RegExp` | Create `RegExp` instance with flags | ### Components -| Regex Component | Regex construct | Type | Notes | -| ------------------- | --------------- | ---------------------------------------------------- | --------------------------- | -| `capture(...)` | `(...)` | `(seq: RegexSequence) => RegexElement` | Capture group | -| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: RegexSequence[]) => RegexElement` | Either of provided patterns | +| Regex Component | Regex Pattern | Type | Notes | +| ------------------- | ------------- | ---------------------------------------------------- | --------------------------- | +| `capture(...)` | `(...)` | `(seq: RegexSequence) => RegexElement` | Capture group | +| `choiceOf(x, y, z)` | `x\|y\|z` | `(...alternatives: RegexSequence[]) => RegexElement` | Either of provided patterns | Notes: * `choiceOf()` accepts a variable number of sequences. @@ -102,27 +102,27 @@ Notes: ### Quantifiers -| Regex Component | Regex construct | Type | Description | -| ---------------------------------- | --------------- | -------------------------------------------------------------------- | ------------------------------------------------- | -| `zeroOrMore(x)` | `x*` | `(seq: RegexSequence) => RegexElement` | Zero or more occurence of a pattern | -| `oneOrMore(x)` | `x+` | `(seq: RegexSequence) => RegexElement` | One or more occurence of a pattern | -| `optionally(x)` | `x?` | `(seq: RegexSequence) => RegexElement` | Zero or one occurence of a pattern | -| `repeat({ count: n }, ...)` | `x{n}` | `({ count: number }, seq: RegexSequence) => RegexElement` | Pattern repeats exact number of times | -| `repeat({ min: n, }, ...)` | `x{n,}` | `({ min: number }, seq: RegexSequence) => RegexElement` | Pattern repeats at least given number of times | -| `repeat({ min: n, max: n2 }, ...)` | `x{n1,n2}` | `({ min: number, max: number }, seq: RegexSequence) => RegexElement` | Pattern repeats between n1 and n2 number of times | +| Regex Component | Regex Pattern | Type | Description | +| -------------------------------- | ------------- | -------------------------------------------------------------------- | ------------------------------------------------- | +| `zeroOrMore(x)` | `x*` | `(seq: RegexSequence) => RegexElement` | Zero or more occurence of a pattern | +| `oneOrMore(x)` | `x+` | `(seq: RegexSequence) => RegexElement` | One or more occurence of a pattern | +| `optionally(x)` | `x?` | `(seq: RegexSequence) => RegexElement` | Zero or one occurence of a pattern | +| `repeat({ count: n }, x)` | `x{n}` | `({ count: number }, seq: RegexSequence) => RegexElement` | Pattern repeats exact number of times | +| `repeat({ min: n, }, x)` | `x{n,}` | `({ min: number }, seq: RegexSequence) => RegexElement` | Pattern repeats at least given number of times | +| `repeat({ min: n, max: n2 }, x)` | `x{n1,n2}` | `({ min: number, max: number }, seq: RegexSequence) => RegexElement` | Pattern repeats between n1 and n2 number of times | ### Character classes -| Regex Component | Regex construct | Type | Description | -| -------------------------- | --------------- | ------------------------------------------------------ | ------------------------------------------- | -| `any` | `.` | `CharacterClass` | Any character | -| `word` | `\w` | `CharacterClass` | Word characters | -| `digit` | `\d` | `CharacterClass` | Digit characters | -| `whitespace` | `\s` | `CharacterClass` | Whitespace characters | -| `anyOf('abc')` | `[abc]` | `(chars: string) => CharacterClass` | Any of supplied characters | -| `characterRange('a', 'z')` | `[a-z]` | `(from: string, to: string) => CharacterClass` | Range of characters | -| `characterClass(...)` | `[...]` | `(...charClasses: CharacterClass[]) => CharacterClass` | Concatenation of multiple character classes | -| `inverted(...)` | `[^...]` | `(charClass: CharacterClass) => CharacterClass` | Inverts character class | +| Regex Component | Regex Pattern | Type | Description | +| -------------------------- | ------------- | ------------------------------------------------------ | ------------------------------------------- | +| `any` | `.` | `CharacterClass` | Any character | +| `word` | `\w` | `CharacterClass` | Word characters | +| `digit` | `\d` | `CharacterClass` | Digit characters | +| `whitespace` | `\s` | `CharacterClass` | Whitespace characters | +| `anyOf('abc')` | `[abc]` | `(chars: string) => CharacterClass` | Any of supplied characters | +| `characterRange('a', 'z')` | `[a-z]` | `(from: string, to: string) => CharacterClass` | Range of characters | +| `characterClass(...)` | `[...]` | `(...charClasses: CharacterClass[]) => CharacterClass` | Concatenation of multiple character classes | +| `inverted(...)` | `[^...]` | `(charClass: CharacterClass) => CharacterClass` | Inverts character class | Notes: * `any`, `word`, `digit`, `whitespace` - are objects, no need to call them. @@ -134,10 +134,10 @@ Notes: ### Anchors -| Regex Component | Regex construct | Type | Notes | -| --------------- | --------------- | -------- | ---------------------------------------------------------- | -| `startOfString` | `^` | `Anchor` | Start of the string (or start of a line in multiline mode) | -| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | +| Regex Component | Regex Pattern | Type | Notes | +| --------------- | ------------- | -------- | ---------------------------------------------------------- | +| `startOfString` | `^` | `Anchor` | Start of the string (or start of a line in multiline mode) | +| `endOfString` | `$` | `Anchor` | End of the string (or end of a line in multiline mode) | Notes: * `startOfString`, `endOfString` - are objects, no need to call them.