From 94fcd6080372c5429d32205f9f713e69ffcc1426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:19:29 +0000 Subject: [PATCH 01/11] docs: describe flags --- docs/API.md | 14 ++++++++++---- src/types.ts | 11 ++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/API.md b/docs/API.md index 18b6cfc..6ba0883 100644 --- a/docs/API.md +++ b/docs/API.md @@ -2,14 +2,12 @@ ## Builder -### `buildRegExp()` function +### `buildRegExp()` ```ts -function buildRegExp(sequence: RegexSequence): RegExp; - function buildRegExp( sequence: RegexSequence, - flags: { + flags?: { global?: boolean; ignoreCase?: boolean; multiline?: boolean; @@ -19,6 +17,14 @@ function buildRegExp( ): RegExp; ``` +The `buildRegExp` is a top-level function responsible for build JavaScript-native `RegExp` object from passed regex sequence. + +It optionally accepts a list of regex flags: +- `global` - find all matches in a string, instead of just the first one. +- `ignoreCase` - perform case-insensitive matching. +- `multiline` - treat the start and end of each line in a string as the beginning and end of the string. +- `hasIndices` - provide the start and end indices of each captured group in a match. + ## Constructs ### `capture()` diff --git a/src/types.ts b/src/types.ts index 6d57ad5..c3b8cef 100644 --- a/src/types.ts +++ b/src/types.ts @@ -23,18 +23,15 @@ export interface RegexConstruct { } export interface RegexFlags { - /** Global search. */ + /** Find all matches in a string, instead of just the first one. */ global?: boolean; - /** Case-insensitive search. */ + /** Perform case-insensitive matching. */ ignoreCase?: boolean; - /** Allows ^ and $ to match newline characters. */ + /** Treat the start and end of each line in a string as the beginning and end of the string. */ multiline?: boolean; - /** Generate indices for substring matches. */ + /** Penerate the start and end indices of each captured group in a match. */ hasIndices?: boolean; - - /** Perform a "sticky" search that matches starting at the current position in the target string. */ - sticky?: boolean; } From c73592eae0b23c2c2f0d496e30de0c51373a0ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:26:05 +0000 Subject: [PATCH 02/11] docs: document quantifiers --- docs/API.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/docs/API.md b/docs/API.md index 6ba0883..3e66e0d 100644 --- a/docs/API.md +++ b/docs/API.md @@ -29,14 +29,16 @@ It optionally accepts a list of regex flags: ### `capture()` -Captures, also known as capturing groups, are used to extract and store parts of the matched string for later use. - ```ts function capture( sequence: RegexSequence ): Capture ``` +Regex syntax: `(...)`. + +Captures, also known as capturing groups, are used to extract and store parts of the matched string for later use. + ### `choiceOf()` ```ts @@ -45,12 +47,16 @@ function choiceOf( ): ChoiceOf { ``` -The `choiceOf` (alternation) construct is used to match one out of several possible sequences. It functions similarly to a logical OR operator in programming. It can match simple string options as well as complex patterns. +Regex syntax: `a|b|c`. + +The `choiceOf` (disjunction) construct is used to match one out of several possible sequences. It functions similarly to a logical OR operator in programming. It can match simple string options as well as complex patterns. Example: `choiceOf("color", "colour")` matches either `color` or `colour` pattern. ## Quantifiers +Quantifiers in regex define the number of occurrences to match for a pattern. + ### `zeroOrMore()` ```ts @@ -59,6 +65,10 @@ function zeroOrMore( ): ZeroOrMore ``` +Regex syntax: `x*`; + +The `zeroOrMore` quantifier matches zero or more occurrences of given pattern, allowing a flexible number of repetitions of that element. + ### `oneOrMore()` ```ts @@ -67,6 +77,10 @@ function oneOrMore( ): OneOrMore ``` +Regex syntax: `x+`; + +The `oneOrMore` quantifier matches one or more occurrences of given pattern, allowing a flexible number of repetitions of that element. + ### `optionally()` ```ts @@ -75,6 +89,10 @@ function optionally( ): Optionally ``` +Regex syntax: `x?`; + +The `optionally` quantifier matches zero or one occurrence of given pattern, making it optional. + ### `repeat()` ```ts @@ -84,6 +102,11 @@ function repeat( ): Repeat ``` +Regex syntax: `{n}`, `{min,}`, `{min, max}`. + +The `repeat` quantifier in regex matches either exactly `options` count or between `min` and `max` count. If only `min` is provided it matches at least `min` count. + + ## Character classes Character classes are a set of characters that match any one of the characters in the set. From c2000413db17230ca68a70956052a16344079262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:32:06 +0000 Subject: [PATCH 03/11] docs: document other parts --- docs/API.md | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/docs/API.md b/docs/API.md index 3e66e0d..3291d76 100644 --- a/docs/API.md +++ b/docs/API.md @@ -120,10 +120,10 @@ const digit: CharacterClass; const whitespace: CharacterClass; ``` -* `any` matches any character except newline characters. -* `word` matches any word character (alphanumeric & underscore). -* `digit` matches any digit. -* `whitespace` matches any whitespace character (spaces, tabs, line breaks). +* `any` matches any character except newline characters. Regex syntax: `*`. +* `word` matches any word character (alphanumeric & underscore). Regex syntax: `\w`. +* `digit` matches any digit. Regex syntax: `\d`. +* `whitespace` matches any whitespace character (spaces, tabs, line breaks). Regex syntax: `\s`. ### `anyOf()` @@ -133,39 +133,45 @@ function anyOf( ): CharacterClass ``` +Regex syntax: `[abc]`. + The `anyOf` class matches any character present in the `character` string. Example: `anyOf('aeiou')` will match either `a`, `e`, `i` `o` or `u` characters. -### `characterRange()` +### `charRange()` ```ts -function characterRange( +function charRange( start: string, end: string, ): CharacterClass ``` -The `characterRange` class matches any character present in the range from `start` to `end` (inclusive). +Regex syntax: `[a-z]`. + +The `charRange` class matches any character present in the range from `start` to `end` (inclusive). Examples: -* `characterRange('a', 'z')` will match all lowercase characters from `a` to `z`. -* `characterRange('A', 'Z')` will match all uppercase characters from `a` to `z`. -* `characterRange('0', '9')` will match all digit characters from `0` to `9`. +* `charRange('a', 'z')` will match all lowercase characters from `a` to `z`. +* `charRange('A', 'Z')` will match all uppercase characters from `A` to `Z`. +* `charRange('0', '9')` will match all digit characters from `0` to `9`. -### `characterClass()` +### `charClass()` ```ts -function characterClass( +function charClass( ...elements: CharacterClass[], ): CharacterClass ``` -The `characterClass` construct creates a new character class that includes all passed character classes. +Regex syntax: `[...]`. -Example: -* `characterClass(characterRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). -* `characterClass(characterRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase latin lettet from `a` to `z`, and either of `.`, `_`, and `-` characters. +The `charClass` construct creates a new character class that includes all passed character classes. + +Examples: +* `charClass(charRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). +* `charClass(charRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase latin lettet from `a` to `z`, and either of `.`, `_`, and `-` characters. ### `inverted()` @@ -175,25 +181,26 @@ function inverted( ): CharacterClass ``` +Regex syntax: `[^...]`. + The `inverted` construct creates a new character class that matches any character that is not present in the passed character class. Examples: * `inverted(digit)` matches any character that is not a digit * `inverted(anyOf('aeiou'))` matches any character that is not a lowercase vowel. - - ## Anchors Anchors are special characters or sequences that specify positions in the input string, rather than matching specific characters. -### Line start and end +### Start and end of string ```ts -const startOfString: Anchor; // Regex: ^ -const endOfString: Anchor; // Regex: $ +const startOfString: Anchor; +const endOfString: Anchor; ``` -The `startOfString` (regex: `^`) matches the start of a string (or line, if multiline mode is enabled). +* `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`. +* `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`. + -The `endOfString` (regex: `$`) matches the end of a string (or line, if multiline mode is enabled). \ No newline at end of file From d8e8c1deccd30fa0debd1141ff3a632e0a429700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:33:28 +0000 Subject: [PATCH 04/11] chore: reformat docs with prettier --- docs/API.md | 55 ++++++++++++++++++++++++------------------------ docs/Examples.md | 2 +- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/docs/API.md b/docs/API.md index 3291d76..c983ed4 100644 --- a/docs/API.md +++ b/docs/API.md @@ -6,20 +6,21 @@ ```ts function buildRegExp( - sequence: RegexSequence, - flags?: { - global?: boolean; - ignoreCase?: boolean; - multiline?: boolean; - hasIndices?: boolean; - sticky?: boolean; - }, + sequence: RegexSequence, + flags?: { + global?: boolean; + ignoreCase?: boolean; + multiline?: boolean; + hasIndices?: boolean; + sticky?: boolean; + }, ): RegExp; ``` The `buildRegExp` is a top-level function responsible for build JavaScript-native `RegExp` object from passed regex sequence. It optionally accepts a list of regex flags: + - `global` - find all matches in a string, instead of just the first one. - `ignoreCase` - perform case-insensitive matching. - `multiline` - treat the start and end of each line in a string as the beginning and end of the string. @@ -37,7 +38,7 @@ function capture( Regex syntax: `(...)`. -Captures, also known as capturing groups, are used to extract and store parts of the matched string for later use. +Captures, also known as capturing groups, are used to extract and store parts of the matched string for later use. ### `choiceOf()` @@ -55,7 +56,7 @@ Example: `choiceOf("color", "colour")` matches either `color` or `colour` patter ## Quantifiers -Quantifiers in regex define the number of occurrences to match for a pattern. +Quantifiers in regex define the number of occurrences to match for a pattern. ### `zeroOrMore()` @@ -106,10 +107,9 @@ Regex syntax: `{n}`, `{min,}`, `{min, max}`. The `repeat` quantifier in regex matches either exactly `options` count or between `min` and `max` count. If only `min` is provided it matches at least `min` count. - ## Character classes -Character classes are a set of characters that match any one of the characters in the set. +Character classes are a set of characters that match any one of the characters in the set. ### Common character classess @@ -120,10 +120,10 @@ const digit: CharacterClass; const whitespace: CharacterClass; ``` -* `any` matches any character except newline characters. Regex syntax: `*`. -* `word` matches any word character (alphanumeric & underscore). Regex syntax: `\w`. -* `digit` matches any digit. Regex syntax: `\d`. -* `whitespace` matches any whitespace character (spaces, tabs, line breaks). Regex syntax: `\s`. +- `any` matches any character except newline characters. Regex syntax: `*`. +- `word` matches any word character (alphanumeric & underscore). Regex syntax: `\w`. +- `digit` matches any digit. Regex syntax: `\d`. +- `whitespace` matches any whitespace character (spaces, tabs, line breaks). Regex syntax: `\s`. ### `anyOf()` @@ -153,9 +153,10 @@ Regex syntax: `[a-z]`. The `charRange` class matches any character present in the range from `start` to `end` (inclusive). Examples: -* `charRange('a', 'z')` will match all lowercase characters from `a` to `z`. -* `charRange('A', 'Z')` will match all uppercase characters from `A` to `Z`. -* `charRange('0', '9')` will match all digit characters from `0` to `9`. + +- `charRange('a', 'z')` will match all lowercase characters from `a` to `z`. +- `charRange('A', 'Z')` will match all uppercase characters from `A` to `Z`. +- `charRange('0', '9')` will match all digit characters from `0` to `9`. ### `charClass()` @@ -170,8 +171,9 @@ Regex syntax: `[...]`. The `charClass` construct creates a new character class that includes all passed character classes. Examples: -* `charClass(charRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). -* `charClass(charRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase latin lettet from `a` to `z`, and either of `.`, `_`, and `-` characters. + +- `charClass(charRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). +- `charClass(charRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase latin lettet from `a` to `z`, and either of `.`, `_`, and `-` characters. ### `inverted()` @@ -186,8 +188,9 @@ Regex syntax: `[^...]`. The `inverted` construct creates a new character class that matches any character that is not present in the passed character class. Examples: -* `inverted(digit)` matches any character that is not a digit -* `inverted(anyOf('aeiou'))` matches any character that is not a lowercase vowel. + +- `inverted(digit)` matches any character that is not a digit +- `inverted(anyOf('aeiou'))` matches any character that is not a lowercase vowel. ## Anchors @@ -200,7 +203,5 @@ const startOfString: Anchor; const endOfString: Anchor; ``` -* `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`. -* `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`. - - +- `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`. +- `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`. diff --git a/docs/Examples.md b/docs/Examples.md index 7333a45..40496d0 100644 --- a/docs/Examples.md +++ b/docs/Examples.md @@ -9,7 +9,7 @@ const octet = choiceOf( [charRange('1', '9'), digit], ['1', repeat(digit, 2)], ['2', charRange('0', '4'), digit], - ['25', charRange('0', '5')] + ['25', charRange('0', '5')], ); // Match From 31fd24312b9fb45febdc18549ca8e1d154de3038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:44:45 +0000 Subject: [PATCH 05/11] refactor: improve repeat count naming --- README.md | 23 ++++---------------- docs/API.md | 45 ++++++++++++++++++++++++++++------------ src/constructs/repeat.ts | 14 ++++++------- 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 5fde5d3..554c621 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,10 @@ const currencyAmount = buildRegExp([ ]); ``` +## API + +Comprehensive API document is available [here](./API.md). + ### Regex Builders | Builder | Regex Pattern | Description | @@ -102,11 +106,6 @@ const currencyAmount = buildRegExp([ | `capture(...)` | `(...)` | Create a capture group | | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | -Notes: - -- `capture` accepts a sequence of elements -- `choiceOf()` accepts a variable number of sequences - ### Quantifiers | Regex Construct | Regex Pattern | Description | @@ -118,8 +117,6 @@ Notes: | `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times | | `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times | -All quantifiers accept sequence of elements - ### Character classes | Regex Construct | Regex Pattern | Description | @@ -133,14 +130,6 @@ All quantifiers accept sequence of elements | `charClass(...)` | `[...]` | Concatenation of multiple character classes | | `inverted(...)` | `[^...]` | Negation of a given character class | -Notes: - -- `any`, `word`, `digit`, `whitespace` are objects, no need to call them -- `anyOf` accepts a single string of characters to match -- `charRange` accepts exactly **two single character** strings representing range start and end (inclusive) -- `charClass` accepts a variable number of character classes to join into a single class -- `inverted` accepts a single character class to be inverted - ### Anchors | Regex Construct | Regex Pattern | Description | @@ -148,10 +137,6 @@ Notes: | `startOfString` | `^` | Match start of the string (or start of a line in multiline mode) | | `endOfString` | `$` | Match end of the string (or end of a line in multiline mode) | -Notes: - -- `startOfString`, `endOfString` are objects, no need to call them. - ## Examples See [Examples document](./docs/Examples.md). diff --git a/docs/API.md b/docs/API.md index c983ed4..3b23257 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1,5 +1,22 @@ # API +## Types + +### `RegexSequence` + +The sequence of regex elements forming a regular expression. For developer convenience it also accepts a single element instead of array. + +### `RegexElement` + +Fundamental building blocks of a regular expression, defined as either a regex construct or a string. + +### `RegexConstruct` + +The common type for all regex constructs like character classes, quantifiers, and anchors. You should not need to use this type directly, it is returned by all regex construct functions. + +Note: the shape of the `RegexConstruct` is considered private, and may change in a breaking way without a major release. We will focus on maintaining the compatibility of regexes built with + + ## Builder ### `buildRegExp()` @@ -28,11 +45,13 @@ It optionally accepts a list of regex flags: ## Constructs +These functions and objects represent available regex constructs. + ### `capture()` ```ts function capture( - sequence: RegexSequence + sequence: RegexSequence ): Capture ``` @@ -44,7 +63,7 @@ Captures, also known as capturing groups, are used to extract and store parts of ```ts function choiceOf( - ...alternatives: RegexSequence[] + ...alternatives: RegexSequence[] ): ChoiceOf { ``` @@ -62,7 +81,7 @@ Quantifiers in regex define the number of occurrences to match for a pattern. ```ts function zeroOrMore( - sequence: RegexSequence, + sequence: RegexSequence, ): ZeroOrMore ``` @@ -74,7 +93,7 @@ The `zeroOrMore` quantifier matches zero or more occurrences of given pattern, a ```ts function oneOrMore( - sequence: RegexSequence, + sequence: RegexSequence, ): OneOrMore ``` @@ -86,7 +105,7 @@ The `oneOrMore` quantifier matches one or more occurrences of given pattern, all ```ts function optionally( - sequence: RegexSequence, + sequence: RegexSequence, ): Optionally ``` @@ -98,14 +117,14 @@ The `optionally` quantifier matches zero or one occurrence of given pattern, mak ```ts function repeat( - options: number | { min: number; max?: number }, - sequence: RegexSequence, + sequence: RegexSequence, + count: number | { min: number; max?: number }, ): Repeat ``` Regex syntax: `{n}`, `{min,}`, `{min, max}`. -The `repeat` quantifier in regex matches either exactly `options` count or between `min` and `max` count. If only `min` is provided it matches at least `min` count. +The `repeat` quantifier in regex matches either exactly `count` or between `min` and `max` count. If only `min` is provided it matches at least `min` count. ## Character classes @@ -129,7 +148,7 @@ const whitespace: CharacterClass; ```ts function anyOf( - characters: string, + characters: string, ): CharacterClass ``` @@ -143,8 +162,8 @@ Example: `anyOf('aeiou')` will match either `a`, `e`, `i` `o` or `u` characters. ```ts function charRange( - start: string, - end: string, + start: string, + end: string, ): CharacterClass ``` @@ -162,7 +181,7 @@ Examples: ```ts function charClass( - ...elements: CharacterClass[], + ...elements: CharacterClass[], ): CharacterClass ``` @@ -179,7 +198,7 @@ Examples: ```ts function inverted( - element: CharacterClass, + element: CharacterClass, ): CharacterClass ``` diff --git a/src/constructs/repeat.ts b/src/constructs/repeat.ts index 26a9d48..bf35e2f 100644 --- a/src/constructs/repeat.ts +++ b/src/constructs/repeat.ts @@ -5,13 +5,13 @@ import type { RegexConstruct, RegexElement, RegexSequence } from '../types'; export interface Repeat extends RegexConstruct { type: 'repeat'; - options: RepeatOptions; + count: RepeatCount; children: RegexElement[]; } -export type RepeatOptions = number | { min: number; max?: number }; +export type RepeatCount = number | { min: number; max?: number }; -export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat { +export function repeat(sequence: RegexSequence, count: RepeatCount): Repeat { const children = ensureArray(sequence); if (children.length === 0) { @@ -21,7 +21,7 @@ export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat return { type: 'repeat', children, - options, + count: count, encode: encodeRepeat, }; } @@ -29,15 +29,15 @@ export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat function encodeRepeat(this: Repeat): EncodeResult { const atomicNodes = encodeAtom(this.children); - if (typeof this.options === 'number') { + if (typeof this.count === 'number') { return { precedence: 'sequence', - pattern: `${atomicNodes.pattern}{${this.options}}`, + pattern: `${atomicNodes.pattern}{${this.count}}`, }; } return { precedence: 'sequence', - pattern: `${atomicNodes.pattern}{${this.options.min},${this.options?.max ?? ''}}`, + pattern: `${atomicNodes.pattern}{${this.count.min},${this.count?.max ?? ''}}`, }; } From e2e5c32ca0f6e54554e3d5e3ad6f30a8c42039ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:46:22 +0000 Subject: [PATCH 06/11] docs: improve --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 554c621..db6dd91 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ A user-friendly regular expression builder for TypeScript and JavaScript. +[API docs](./API.md) + ## Goal Regular expressions are a powerful tool for matching simple and complex text patterns, yet they are notorious for their hard-to-parse syntax. @@ -72,7 +74,7 @@ Examples of sequences: - single construct: `capture('abc')` - single string: `'Hello'` -Regex constructs can be composed into a tree: +Regex constructs can be composed into a tree structure: ```ts const currencyAmount = buildRegExp([ @@ -88,8 +90,6 @@ const currencyAmount = buildRegExp([ ]); ``` -## API - Comprehensive API document is available [here](./API.md). ### Regex Builders From 611555a457111eb4899aa37b90f54b95a9fa7291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:49:01 +0000 Subject: [PATCH 07/11] chore: fix build and lint --- docs/API.md | 1 - src/__tests__/builder.test.ts | 3 --- src/builders.ts | 1 - 3 files changed, 5 deletions(-) diff --git a/docs/API.md b/docs/API.md index 3b23257..d20a990 100644 --- a/docs/API.md +++ b/docs/API.md @@ -29,7 +29,6 @@ function buildRegExp( ignoreCase?: boolean; multiline?: boolean; hasIndices?: boolean; - sticky?: boolean; }, ): RegExp; ``` diff --git a/src/__tests__/builder.test.ts b/src/__tests__/builder.test.ts index 7377430..d9e18f8 100644 --- a/src/__tests__/builder.test.ts +++ b/src/__tests__/builder.test.ts @@ -16,9 +16,6 @@ test('`regexBuilder` flags', () => { expect(buildRegExp('a', { hasIndices: true }).flags).toBe('d'); expect(buildRegExp('a', { hasIndices: false }).flags).toBe(''); - expect(buildRegExp('a', { sticky: true }).flags).toBe('y'); - expect(buildRegExp('a', { sticky: false }).flags).toBe(''); - expect( buildRegExp('a', { global: true, // diff --git a/src/builders.ts b/src/builders.ts index b550299..6b48f82 100644 --- a/src/builders.ts +++ b/src/builders.ts @@ -31,7 +31,6 @@ function encodeFlags(flags: RegexFlags): string { if (flags.ignoreCase) result += 'i'; if (flags.multiline) result += 'm'; if (flags.hasIndices) result += 'd'; - if (flags.sticky) result += 'y'; return result; } From 7c193bd4fed2e3d785ca0fe6d8d074ce834c0a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sat, 20 Jan 2024 23:56:12 +0000 Subject: [PATCH 08/11] docs: tweaks --- README.md | 16 ++++++++-------- docs/API.md | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index db6dd91..658a891 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A user-friendly regular expression builder for TypeScript and JavaScript. -[API docs](./API.md) +[API docs](./API.md) | [Examples](./Examples.md) ## Goal @@ -70,9 +70,9 @@ Terminology: Most of the regex constructs accept a regex sequence as their argument. Examples of sequences: +- single element (construct): `capture('abc')` +- single element (string): `'Hello'` - array of elements: `['USD', oneOrMore(digit)]` -- single construct: `capture('abc')` -- single string: `'Hello'` Regex constructs can be composed into a tree structure: @@ -94,21 +94,21 @@ Comprehensive API document is available [here](./API.md). ### Regex Builders -| Builder | Regex Pattern | Description | +| Builder | Regex Syntax | Description | | ---------------------------------------- | ------------- | ----------------------------------- | | `buildRegExp(...)` | `/.../` | Create `RegExp` instance | | `buildRegExp(..., { ignoreCase: true })` | `/.../i` | Create `RegExp` instance with flags | ### Regex Constructs -| Regex Construct | Regex Pattern | Notes | +| Construct | Regex Syntax | Notes | | ------------------- | ------------- | ------------------------------- | | `capture(...)` | `(...)` | Create a capture group | | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | ### Quantifiers -| Regex Construct | Regex Pattern | Description | +| Quantifier | Regex Syntax | Description | | -------------------------------- | ------------- | ------------------------------------------------- | | `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern | | `oneOrMore(x)` | `x+` | One or more occurence of a pattern | @@ -119,7 +119,7 @@ Comprehensive API document is available [here](./API.md). ### Character classes -| Regex Construct | Regex Pattern | Description | +| Character class | Regex Syntax | Description | | --------------------- | ------------- | ------------------------------------------- | | `any` | `.` | Any character | | `word` | `\w` | Word characters | @@ -132,7 +132,7 @@ Comprehensive API document is available [here](./API.md). ### Anchors -| Regex Construct | Regex Pattern | Description | +| Anchor | Regex Syntax | Description | | --------------- | ------------- | ---------------------------------------------------------------- | | `startOfString` | `^` | Match start of the string (or start of a line in multiline mode) | | `endOfString` | `$` | Match end of the string (or end of a line in multiline mode) | diff --git a/docs/API.md b/docs/API.md index d20a990..70e4e2a 100644 --- a/docs/API.md +++ b/docs/API.md @@ -121,9 +121,9 @@ function repeat( ): Repeat ``` -Regex syntax: `{n}`, `{min,}`, `{min, max}`. +Regex syntax: `x{n}`, `x{min,}`, `x{min, max}`. -The `repeat` quantifier in regex matches either exactly `count` or between `min` and `max` count. If only `min` is provided it matches at least `min` count. +The `repeat` quantifier in regex matches either exactly `count` times or between `min` and `max` times. If only `min` is provided it matches at least `min` times. ## Character classes From b8ff98cece688bbc5682a6b1ffacb049d1809e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sun, 21 Jan 2024 00:19:38 +0000 Subject: [PATCH 09/11] docs: examples js number --- docs/Examples.md | 16 +++++++++++++ src/__tests__/examples.test.ts | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/docs/Examples.md b/docs/Examples.md index 40496d0..10c891a 100644 --- a/docs/Examples.md +++ b/docs/Examples.md @@ -1,5 +1,21 @@ # Regex Examples +## JavaScript number + +```ts +const optionalSign = optionally(anyOf('+-')); +const exponent = [anyOf('eE'), optionalSign, oneOrMore(digit)]; + +const regex = buildRegExp([ + optionalSign, + choiceOf( + [oneOrMore(digit), optionally(['.', zeroOrMore(digit)])], // leading digit + ['.', oneOrMore(digit)], // leading dot + ), + optionally(exponent), // exponent +]); +``` + ## IPv4 address validation ```ts diff --git a/src/__tests__/examples.test.ts b/src/__tests__/examples.test.ts index de4b072..ffb8333 100644 --- a/src/__tests__/examples.test.ts +++ b/src/__tests__/examples.test.ts @@ -1,13 +1,56 @@ import { + anyOf, buildRegExp, charRange, choiceOf, digit, endOfString, + oneOrMore, + optionally, repeat, startOfString, + zeroOrMore, } from '../index'; +test('example: validate JavaScript number', () => { + const optionalSign = optionally(anyOf('+-')); + const exponent = [anyOf('eE'), optionalSign, oneOrMore(digit)]; + + const regex = buildRegExp([ + startOfString, + optionalSign, + choiceOf( + [oneOrMore(digit), optionally(['.', zeroOrMore(digit)])], // leading digit + ['.', oneOrMore(digit)], // leading dot + ), + optionally(exponent), // exponent + endOfString, + ]); + + expect(regex).toMatchString('0'); + expect(regex).toMatchString('-1'); + expect(regex).toMatchString('+1'); + expect(regex).toMatchString('1.0'); + expect(regex).toMatchString('1.1234'); + expect(regex).toMatchString('1.'); + expect(regex).toMatchString('.1'); + expect(regex).toMatchString('-.1234'); + expect(regex).toMatchString('+.5'); + expect(regex).toMatchString('1e21'); + expect(regex).toMatchString('1e-21'); + expect(regex).toMatchString('+1e+42'); + expect(regex).toMatchString('-1e-42'); + + expect(regex).not.toMatchString(''); + expect(regex).not.toMatchString('a'); + expect(regex).not.toMatchString('1a'); + expect(regex).not.toMatchString('1.0.'); + expect(regex).not.toMatchString('.1.1'); + expect(regex).not.toMatchString('.'); + + expect(regex).toHavePattern(/a/); +}); + test('example: IPv4 address validator', () => { const octet = choiceOf( [digit], From 37bdb803408daf98efd4710fca24e7bb83c0b8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sun, 21 Jan 2024 00:24:16 +0000 Subject: [PATCH 10/11] chore: fix test --- src/__tests__/examples.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/__tests__/examples.test.ts b/src/__tests__/examples.test.ts index ffb8333..204c3e8 100644 --- a/src/__tests__/examples.test.ts +++ b/src/__tests__/examples.test.ts @@ -48,7 +48,7 @@ test('example: validate JavaScript number', () => { expect(regex).not.toMatchString('.1.1'); expect(regex).not.toMatchString('.'); - expect(regex).toHavePattern(/a/); + expect(regex).toHavePattern(/^[+-]?(?:\\d+(?:\\.\\d*)?|\\.\\d+)(?:[eE][+-]?\\d+)?$/); }); test('example: IPv4 address validator', () => { From d8cc06f7697da2c714cd77e07d03bb869f1910c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sun, 21 Jan 2024 00:26:05 +0000 Subject: [PATCH 11/11] chore: fix --- src/__tests__/examples.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/__tests__/examples.test.ts b/src/__tests__/examples.test.ts index 204c3e8..0155b4d 100644 --- a/src/__tests__/examples.test.ts +++ b/src/__tests__/examples.test.ts @@ -48,7 +48,7 @@ test('example: validate JavaScript number', () => { expect(regex).not.toMatchString('.1.1'); expect(regex).not.toMatchString('.'); - expect(regex).toHavePattern(/^[+-]?(?:\\d+(?:\\.\\d*)?|\\.\\d+)(?:[eE][+-]?\\d+)?$/); + expect(regex).toHavePattern(/^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/); }); test('example: IPv4 address validator', () => {