Skip to content

Commit f6c5363

Browse files
committed
docs + minor tweaks
1 parent fdda307 commit f6c5363

File tree

3 files changed

+50
-13
lines changed

3 files changed

+50
-13
lines changed

crates/embucket-functions/src/regexp/regexp_substr.rs

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,44 @@ use std::any::Any;
1616
use std::fmt::Debug;
1717
use std::sync::Arc;
1818

19-
//TODO: Docs
19+
/// `REGEXP_SUBSTR` function implementation
20+
///
21+
/// Returns the position of the specified occurrence of the regular expression pattern in the string subject.
22+
/// If no match is found, returns 0.
23+
///
24+
/// Syntax: `REGEXP_SUBSTR( <subject> , <pattern> [ , <position> [ , <occurrence> [ , <regex_parameters> [ , <group_num> ] ] ] ] )`
25+
///
26+
/// Arguments:
27+
///
28+
/// `Required`:
29+
/// - `<subject>` the string to search for matches.
30+
/// - `<pattern>` pattern to match.
31+
///
32+
/// `Optional`:
33+
/// - `<position>` number of characters from the beginning of the string where the function starts searching for matches.
34+
/// Default: `1` (the search for a match starts at the first character on the left)
35+
/// - `<occurrence>` specifies the first occurrence of the pattern from which to start returning matches.
36+
/// The function skips the first occurrence - 1 matches. For example, if there are 5 matches and you specify 3 for the occurrence argument,
37+
/// the function ignores the first two matches and returns the third, fourth, and fifth matches.
38+
/// Default: `1`
39+
/// - `<regex_parameters>` String of one or more characters that specifies the parameters used for searching for matches.
40+
/// Supported values:
41+
/// ---------------------------------------------------------------------------
42+
/// | Parameter | Description |
43+
/// |-----------------|-------------------------------------------|
44+
/// | c | Case-sensitive matching |
45+
/// | i | Case-insensitive matching |
46+
/// | m | Multi-line mode |
47+
/// | e | Extract submatches |
48+
/// | s | POSIX wildcard character `.` matches `\n` |
49+
/// ---------------------------------------------------------------------------
50+
/// Default: `c`
51+
/// - `<group_num>` the `group_num` parameter specifies which group to extract.
52+
/// Groups are specified by using parentheses in the regular expression.
53+
/// If a `group_num` is specified, it allows extraction even if the e option was not also specified.
54+
/// The e option is implied.
55+
///
56+
/// Example: `REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')`
2057
#[derive(Debug)]
2158
pub struct RegexpSubstrFunc {
2259
signature: Signature,
@@ -114,7 +151,7 @@ impl RegexpSubstrFunc {
114151
},
115152
)?;
116153

117-
let regexp_parameters = args.get(4).map_or_else(
154+
let regex_parameters = args.get(4).map_or_else(
118155
|| Ok("c"),
119156
|value| match value {
120157
ColumnarValue::Scalar(
@@ -147,7 +184,7 @@ impl RegexpSubstrFunc {
147184

148185
let group_num = args.get(5).map_or_else(
149186
|| {
150-
if regexp_parameters.contains('e') {
187+
if regex_parameters.contains('e') {
151188
Ok(1)
152189
} else {
153190
Ok(0)
@@ -173,7 +210,7 @@ impl RegexpSubstrFunc {
173210
},
174211
)?;
175212

176-
Ok((position, occurrence, regexp_parameters, group_num))
213+
Ok((position, occurrence, regex_parameters, group_num))
177214
}
178215
}
179216

@@ -233,7 +270,7 @@ impl ScalarUDFImpl for RegexpSubstrFunc {
233270
}
234271
};
235272

236-
let (position, occurrence, regexp_parameters, group_num) =
273+
let (position, occurrence, regex_parameters, group_num) =
237274
Self::take_args_values(&args.args)?;
238275

239276
//TODO: Or data_capacity: 1024
@@ -242,7 +279,7 @@ impl ScalarUDFImpl for RegexpSubstrFunc {
242279
match array.data_type() {
243280
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
244281
let string_array: &StringArray = as_generic_string_array(array)?;
245-
let regex = pattern_to_regex(pattern, regexp_parameters)
282+
let regex = pattern_to_regex(pattern, regex_parameters)
246283
.context(regexp_errors::UnsupportedRegexSnafu)?;
247284
regexp(string_array, &regex, position).for_each(|opt_iter| {
248285
result_array.append_option(opt_iter.and_then(|mut cap_iter| {

crates/embucket-functions/src/tests/regexp/regexp_substr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::test_query;
22

33
test_query!(
44
regexp_substr_basic_scalar,
5-
"SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore\\d')",
5+
"SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')",
66
snapshot_path = "regexp_substr"
77
);
88

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
---
22
source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
3-
description: "\"SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore\\\\d')\""
3+
description: "\"SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')\""
44
---
55
Ok(
66
[
7-
"+--------------------------------------------------------------------------------+",
8-
"| regexp_substr(Utf8(\"nevermore1, nevermore2, nevermore3.\"),Utf8(\"nevermore\\d\")) |",
9-
"+--------------------------------------------------------------------------------+",
10-
"| nevermore1 |",
11-
"+--------------------------------------------------------------------------------+",
7+
"+------------------------------------------------------------------------------+",
8+
"| regexp_substr(Utf8(\"nevermore1, nevermore2, nevermore3.\"),Utf8(\"nevermore\")) |",
9+
"+------------------------------------------------------------------------------+",
10+
"| nevermore |",
11+
"+------------------------------------------------------------------------------+",
1212
],
1313
)

0 commit comments

Comments
 (0)