@@ -16,7 +16,44 @@ use std::any::Any;
1616use std:: fmt:: Debug ;
1717use std:: sync:: Arc ;
1818
19- //TODO: Docs
19+ /// `REGEXP_SUBSTR` function implementation
20+ ///
21+ /// Returns the position of the specified occurrence of the regular expression pattern in the string subject.
22+ /// If no match is found, returns 0.
23+ ///
24+ /// Syntax: `REGEXP_SUBSTR( <subject> , <pattern> [ , <position> [ , <occurrence> [ , <regex_parameters> [ , <group_num> ] ] ] ] )`
25+ ///
26+ /// Arguments:
27+ ///
28+ /// `Required`:
29+ /// - `<subject>` the string to search for matches.
30+ /// - `<pattern>` pattern to match.
31+ ///
32+ /// `Optional`:
33+ /// - `<position>` number of characters from the beginning of the string where the function starts searching for matches.
34+ /// Default: `1` (the search for a match starts at the first character on the left)
35+ /// - `<occurrence>` specifies the first occurrence of the pattern from which to start returning matches.
36+ /// The function skips the first occurrence - 1 matches. For example, if there are 5 matches and you specify 3 for the occurrence argument,
37+ /// the function ignores the first two matches and returns the third, fourth, and fifth matches.
38+ /// Default: `1`
39+ /// - `<regex_parameters>` String of one or more characters that specifies the parameters used for searching for matches.
40+ /// Supported values:
41+ /// ---------------------------------------------------------------------------
42+ /// | Parameter | Description |
43+ /// |-----------------|-------------------------------------------|
44+ /// | c | Case-sensitive matching |
45+ /// | i | Case-insensitive matching |
46+ /// | m | Multi-line mode |
47+ /// | e | Extract submatches |
48+ /// | s | POSIX wildcard character `.` matches `\n` |
49+ /// ---------------------------------------------------------------------------
50+ /// Default: `c`
51+ /// - `<group_num>` the `group_num` parameter specifies which group to extract.
52+ /// Groups are specified by using parentheses in the regular expression.
53+ /// If a `group_num` is specified, it allows extraction even if the e option was not also specified.
54+ /// The e option is implied.
55+ ///
56+ /// Example: `REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')`
2057#[ derive( Debug ) ]
2158pub struct RegexpSubstrFunc {
2259 signature : Signature ,
@@ -114,7 +151,7 @@ impl RegexpSubstrFunc {
114151 } ,
115152 ) ?;
116153
117- let regexp_parameters = args. get ( 4 ) . map_or_else (
154+ let regex_parameters = args. get ( 4 ) . map_or_else (
118155 || Ok ( "c" ) ,
119156 |value| match value {
120157 ColumnarValue :: Scalar (
@@ -147,7 +184,7 @@ impl RegexpSubstrFunc {
147184
148185 let group_num = args. get ( 5 ) . map_or_else (
149186 || {
150- if regexp_parameters . contains ( 'e' ) {
187+ if regex_parameters . contains ( 'e' ) {
151188 Ok ( 1 )
152189 } else {
153190 Ok ( 0 )
@@ -173,7 +210,7 @@ impl RegexpSubstrFunc {
173210 } ,
174211 ) ?;
175212
176- Ok ( ( position, occurrence, regexp_parameters , group_num) )
213+ Ok ( ( position, occurrence, regex_parameters , group_num) )
177214 }
178215}
179216
@@ -233,7 +270,7 @@ impl ScalarUDFImpl for RegexpSubstrFunc {
233270 }
234271 } ;
235272
236- let ( position, occurrence, regexp_parameters , group_num) =
273+ let ( position, occurrence, regex_parameters , group_num) =
237274 Self :: take_args_values ( & args. args ) ?;
238275
239276 //TODO: Or data_capacity: 1024
@@ -242,7 +279,7 @@ impl ScalarUDFImpl for RegexpSubstrFunc {
242279 match array. data_type ( ) {
243280 DataType :: Utf8 | DataType :: LargeUtf8 | DataType :: Utf8View => {
244281 let string_array: & StringArray = as_generic_string_array ( array) ?;
245- let regex = pattern_to_regex ( pattern, regexp_parameters )
282+ let regex = pattern_to_regex ( pattern, regex_parameters )
246283 . context ( regexp_errors:: UnsupportedRegexSnafu ) ?;
247284 regexp ( string_array, & regex, position) . for_each ( |opt_iter| {
248285 result_array. append_option ( opt_iter. and_then ( |mut cap_iter| {
0 commit comments