diff --git a/crates/embucket-functions/src/datetime/date_part_extract.rs b/crates/embucket-functions/src/datetime/date_part_extract.rs
index f75c2a52b..722b5940f 100644
--- a/crates/embucket-functions/src/datetime/date_part_extract.rs
+++ b/crates/embucket-functions/src/datetime/date_part_extract.rs
@@ -451,6 +451,7 @@ fn calculate_year_of_week(date: NaiveDate, week_start: usize, week_of_year_polic
 #[cfg(test)]
 mod tests {
     use super::*;
+
     use datafusion::prelude::SessionContext;
     use datafusion_common::assert_batches_eq;
     use datafusion_expr::ScalarUDF;
diff --git a/crates/embucket-functions/src/regexp/mod.rs b/crates/embucket-functions/src/regexp/mod.rs
index 5109d4b52..baeff0c82 100644
--- a/crates/embucket-functions/src/regexp/mod.rs
+++ b/crates/embucket-functions/src/regexp/mod.rs
@@ -1,14 +1,19 @@
 pub mod errors;
 pub mod regexp_instr;
+mod regexp_substr;
 
 use crate::regexp::regexp_instr::RegexpInstrFunc;
+use crate::regexp::regexp_substr::RegexpSubstrFunc;
 use datafusion_expr::ScalarUDF;
 use datafusion_expr::registry::FunctionRegistry;
 pub use errors::Error;
 use std::sync::Arc;
 
 pub fn register_udfs(registry: &mut dyn FunctionRegistry) -> datafusion_common::Result<()> {
-    let functions: Vec<Arc<ScalarUDF>> = vec![Arc::new(ScalarUDF::from(RegexpInstrFunc::new()))];
+    let functions: Vec<Arc<ScalarUDF>> = vec![
+        Arc::new(ScalarUDF::from(RegexpInstrFunc::new())),
+        Arc::new(ScalarUDF::from(RegexpSubstrFunc::new())),
+    ];
     for func in functions {
         registry.register_udf(func)?;
     }
diff --git a/crates/embucket-functions/src/regexp/regexp_instr.rs b/crates/embucket-functions/src/regexp/regexp_instr.rs
index e9d19a3a3..fa65daad3 100644
--- a/crates/embucket-functions/src/regexp/regexp_instr.rs
+++ b/crates/embucket-functions/src/regexp/regexp_instr.rs
@@ -239,7 +239,7 @@ impl RegexpInstrFunc {
                 }
                 other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
                     data_type: other.data_type(),
-                    position: 6usize,
+                    position: 7usize,
                 }
                 .fail(),
             },
diff --git a/crates/embucket-functions/src/regexp/regexp_substr.rs b/crates/embucket-functions/src/regexp/regexp_substr.rs
new file mode 100644
index 000000000..4b25fc370
--- /dev/null
+++ b/crates/embucket-functions/src/regexp/regexp_substr.rs
@@ -0,0 +1,302 @@
+use super::errors as regexp_errors;
+use crate::utils::{pattern_to_regex, regexp};
+use datafusion::arrow::array::{StringArray, StringBuilder};
+use datafusion::arrow::datatypes::DataType;
+use datafusion::error::Result as DFResult;
+use datafusion::logical_expr::{
+    ColumnarValue, Signature, TypeSignature, TypeSignatureClass, Volatility,
+};
+use datafusion_common::ScalarValue;
+use datafusion_common::arrow::array::Array;
+use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::types::logical_string;
+use datafusion_expr::{Coercion, ScalarFunctionArgs, ScalarUDFImpl};
+use snafu::ResultExt;
+use std::any::Any;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+/// `REGEXP_SUBSTR` function implementation
+///
+/// Returns the position of the specified occurrence of the regular expression pattern in the string subject.
+/// If no match is found, returns 0.
+///
+/// Syntax: `REGEXP_SUBSTR( <subject> , <pattern> [ , <position> [ , <occurrence> [ , <regex_parameters> [ , <group_num> ] ] ] ] )`
+///
+/// Arguments:
+///
+/// `Required`:
+/// - `<subject>` the string to search for matches.
+/// - `<pattern>` pattern to match.
+///
+/// `Optional`:
+/// - `<position>` number of characters from the beginning of the string where the function starts searching for matches.
+///   Default: `1` (the search for a match starts at the first character on the left)
+/// - `<occurrence>` specifies the first occurrence of the pattern from which to start returning matches.
+///   The function skips the first occurrence - 1 matches. For example, if there are 5 matches and you specify 3 for the occurrence argument,
+///   the function ignores the first two matches and returns the third, fourth, and fifth matches.
+///   Default: `1`
+/// - `<regex_parameters>` String of one or more characters that specifies the parameters used for searching for matches.
+///   Supported values:
+///   ---------------------------------------------------------------------------
+///   | Parameter       | Description                               |
+///   |-----------------|-------------------------------------------|
+///   | c               | Case-sensitive matching                   |
+///   | i               | Case-insensitive matching                 |
+///   | m               | Multi-line mode                           |
+///   | e               | Extract submatches                        |
+///   | s               | POSIX wildcard character `.` matches `\n` |
+///   ---------------------------------------------------------------------------
+///   Default: `c`
+/// - `<group_num>` the `group_num` parameter specifies which group to extract.
+///   Groups are specified by using parentheses in the regular expression.
+///   If a `group_num` is specified, it allows extraction even if the e option was not also specified.
+///   The e option is implied.
+///
+/// Example: `REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')`
+#[derive(Debug)]
+pub struct RegexpSubstrFunc {
+    signature: Signature,
+}
+
+impl Default for RegexpSubstrFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl RegexpSubstrFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                    ]),
+                    TypeSignature::Coercible(vec![
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                        Coercion::new_exact(TypeSignatureClass::Integer),
+                    ]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+    #[allow(clippy::too_many_lines, clippy::unwrap_used)]
+    fn take_args_values(args: &[ColumnarValue]) -> DFResult<(usize, usize, &str, usize)> {
+        let position = args.get(2).map_or_else(
+            || Ok(0),
+            |value| match value {
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
+                    usize::try_from(*value - 1)
+                        .context(regexp_errors::InvalidIntegerConversionSnafu)
+                }
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 > *value => {
+                    regexp_errors::WrongArgValueSnafu {
+                        got: value.to_string(),
+                        reason: "Position must be positive".to_string(),
+                    }
+                    .fail()
+                }
+                other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
+                    data_type: other.data_type(),
+                    position: 3usize,
+                }
+                .fail(),
+            },
+        )?;
+
+        let occurrence = args.get(3).map_or_else(
+            || Ok(0),
+            |value| match value {
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
+                    usize::try_from(*value - 1)
+                        .context(crate::regexp::errors::InvalidIntegerConversionSnafu)
+                }
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 > *value => {
+                    regexp_errors::WrongArgValueSnafu {
+                        got: value.to_string(),
+                        reason: "Occurrence must be positive".to_string(),
+                    }
+                    .fail()
+                }
+                other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
+                    data_type: other.data_type(),
+                    position: 4usize,
+                }
+                .fail(),
+            },
+        )?;
+
+        let regex_parameters = args.get(4).map_or_else(
+            || Ok("c"),
+            |value| match value {
+                ColumnarValue::Scalar(
+                    ScalarValue::Utf8(Some(value))
+                    | ScalarValue::Utf8View(Some(value))
+                    | ScalarValue::LargeUtf8(Some(value)),
+                ) if value.contains(['c', 'i', 'm', 'e', 's']) => Ok(value),
+                ColumnarValue::Scalar(
+                    ScalarValue::Utf8(Some(value))
+                    | ScalarValue::Utf8View(Some(value))
+                    | ScalarValue::LargeUtf8(Some(value)),
+                ) if value.is_empty() => Ok("c"),
+                ColumnarValue::Scalar(
+                    ScalarValue::Utf8(Some(value))
+                    | ScalarValue::Utf8View(Some(value))
+                    | ScalarValue::LargeUtf8(Some(value)),
+                ) => regexp_errors::WrongArgValueSnafu {
+                    got: value.to_string(),
+                    //We just checked if value is empty, if not - this is valid, since we are getting here the excluded range so just the zeroes character
+                    reason: format!("Unknown parameter: '{}'", value.get(0..1).unwrap()),
+                }
+                .fail(),
+                other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
+                    data_type: other.data_type(),
+                    position: 5usize,
+                }
+                .fail(),
+            },
+        )?;
+
+        let group_num = args.get(5).map_or_else(
+            || {
+                if regex_parameters.contains('e') {
+                    Ok(1)
+                } else {
+                    Ok(0)
+                }
+            },
+            |value| match value {
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
+                    usize::try_from(*value)
+                        .context(crate::regexp::errors::InvalidIntegerConversionSnafu)
+                }
+                ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 > *value => {
+                    crate::regexp::errors::WrongArgValueSnafu {
+                        got: value.to_string(),
+                        reason: "Capture group mustbe non-negative".to_string(),
+                    }
+                    .fail()
+                }
+                other => crate::regexp::errors::UnsupportedInputTypeWithPositionSnafu {
+                    data_type: other.data_type(),
+                    position: 6usize,
+                }
+                .fail(),
+            },
+        )?;
+
+        Ok((position, occurrence, regex_parameters, group_num))
+    }
+}
+
+impl ScalarUDFImpl for RegexpSubstrFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &'static str {
+        "regexp_substr"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> DFResult<DataType> {
+        match arg_types.len() {
+            0 => regexp_errors::NotEnoughArgumentsSnafu {
+                got: 0usize,
+                at_least: 2usize,
+            }
+            .fail()?,
+            //Return type specified as Number, probably an `Integer` which is an alias to `Number(38, 0)`,
+            // we return `Int64` for better internal DF compatibility
+            n if 7 > n && 1 < n => Ok(DataType::Utf8),
+            n => regexp_errors::TooManyArgumentsSnafu {
+                got: n,
+                at_maximum: 6usize,
+            }
+            .fail()?,
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> DFResult<ColumnarValue> {
+        //Already checked that it's at least > 1
+        let subject = &args.args[0];
+        let array = match subject {
+            ColumnarValue::Array(array) => array,
+            //Can't fail (shouldn't)
+            ColumnarValue::Scalar(scalar) => &scalar.to_array()?,
+        };
+
+        //Already checked that it's at least > 1
+        let pattern = match &args.args[1] {
+            ColumnarValue::Scalar(
+                ScalarValue::Utf8(Some(pattern))
+                | ScalarValue::LargeUtf8(Some(pattern))
+                | ScalarValue::Utf8View(Some(pattern)),
+            ) => pattern,
+            other => {
+                return regexp_errors::UnsupportedInputTypeWithPositionSnafu {
+                    data_type: other.data_type(),
+                    position: 2usize,
+                }
+                .fail()?;
+            }
+        };
+
+        let (position, occurrence, regex_parameters, group_num) =
+            Self::take_args_values(&args.args)?;
+
+        //TODO: Or data_capacity: 1024
+        let mut result_array = StringBuilder::with_capacity(array.len(), array.len() * 10);
+
+        match array.data_type() {
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+                let string_array: &StringArray = as_generic_string_array(array)?;
+                let regex = pattern_to_regex(pattern, regex_parameters)
+                    .context(regexp_errors::UnsupportedRegexSnafu)?;
+                regexp(string_array, &regex, position).for_each(|opt_iter| {
+                    result_array.append_option(opt_iter.and_then(|mut cap_iter| {
+                        cap_iter.nth(occurrence).and_then(|cap| {
+                            //group_num == 0, means get the whole match (seems docs in regex are incorrect)
+                            cap.get(group_num).map(|mat| mat.as_str())
+                        })
+                    }));
+                });
+            }
+            other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
+                position: 1usize,
+                data_type: other.clone(),
+            }
+            .fail()?,
+        }
+
+        Ok(ColumnarValue::Array(Arc::new(result_array.finish())))
+    }
+}
diff --git a/crates/embucket-functions/src/tests/regexp/mod.rs b/crates/embucket-functions/src/tests/regexp/mod.rs
index 72c9db3cc..7dd4d6172 100644
--- a/crates/embucket-functions/src/tests/regexp/mod.rs
+++ b/crates/embucket-functions/src/tests/regexp/mod.rs
@@ -1 +1,2 @@
 mod regexp_instr;
+mod regexp_substr;
diff --git a/crates/embucket-functions/src/tests/regexp/regexp_substr.rs b/crates/embucket-functions/src/tests/regexp/regexp_substr.rs
new file mode 100644
index 000000000..3ee83a7b2
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/regexp_substr.rs
@@ -0,0 +1,85 @@
+use crate::test_query;
+
+test_query!(
+    regexp_substr_basic_scalar,
+    "SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_basic_column,
+    "SELECT REGEXP_SUBSTR(column1, 'the\\W+\\w+')
+    FROM VALUES ('It was the best of times, it was the worst of times.'),
+    ('In    the   string   the   extra   spaces  are   redundant.'),
+    ('A thespian theater is nearby.')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_occurrence,
+    "SELECT REGEXP_SUBSTR(column1, 'the\\W+\\w+', 1, 2)
+    FROM VALUES ('It was the best of times, it was the worst of times.'),
+    ('In    the   string   the   extra   spaces  are   redundant.'),
+    ('A thespian theater is nearby.')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_group_num,
+    "SELECT REGEXP_SUBSTR(column1, 'the\\W+(\\w+)', 1, 2, 'e', 1)
+    FROM VALUES ('It was the best of times, it was the worst of times.'),
+    ('In    the   string   the   extra   spaces  are   redundant.'),
+    ('A thespian theater is nearby.')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_word_groups,
+    "SELECT REGEXP_SUBSTR(column1, 'A\\W+(\\w+)', 1, 1, 'e', 1),
+    REGEXP_SUBSTR(column1, 'A\\W+(\\w+)', 1, 2, 'e', 1),
+    REGEXP_SUBSTR(column1, 'A\\W+(\\w+)', 1, 3, 'e', 1),
+    REGEXP_SUBSTR(column1, 'A\\W+(\\w+)', 1, 4, 'e', 1)
+    FROM VALUES ('A MAN A PLAN A CANAL')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_letter_groups,
+    "SELECT REGEXP_SUBSTR(column1, 'A\\W+(\\w)(\\w)(\\w)', 1, 1, 'e', 1),
+    REGEXP_SUBSTR(column1, 'A\\W+(\\w)(\\w)(\\w)', 1, 1, 'e', 2),
+    REGEXP_SUBSTR(column1, 'A\\W+(\\w)(\\w)(\\w)', 1, 1, 'e', 3)
+    FROM VALUES ('A MAN A PLAN A CANAL')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_word_boundary,
+    "SELECT REGEXP_SUBSTR('It was the best of times, it was the worst of times','\\bwas\\b', 1, 1)",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_regex_patterns_1,
+    "SELECT REGEXP_SUBSTR('It was the best of times, it was the worst of times', '[[:alpha:]]{2,}st', 15, 1)",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_regex_patterns_2,
+    "SELECT REGEXP_SUBSTR(column1, '\\b\\S*o\\S*\\b')
+    FROM VALUES ('Hellooo World'),
+    ('How are you doing today?'),
+    ('the quick brown fox jumps over the lazy dog'),
+    ('PACK MY BOX WITH FIVE DOZEN LIQUOR JUGS')",
+    snapshot_path = "regexp_substr"
+);
+
+test_query!(
+    regexp_substr_regex_patterns_3,
+    "SELECT REGEXP_SUBSTR(column1, '\\b\\S*o\\S*\\b', 3, 3, 'i')
+    FROM VALUES ('Hellooo World'),
+    ('How are you doing today?'),
+    ('the quick brown fox jumps over the lazy dog'),
+    ('PACK MY BOX WITH FIVE DOZEN LIQUOR JUGS')",
+    snapshot_path = "regexp_substr"
+);
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_column.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_column.snap
new file mode 100644
index 000000000..c4d3ac70f
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_column.snap
@@ -0,0 +1,15 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, 'the\\\\W+\\\\w+')\n    FROM VALUES ('It was the best of times, it was the worst of times.'),\n    ('In    the   string   the   extra   spaces  are   redundant.'),\n    ('A thespian theater is nearby.')\""
+---
+Ok(
+    [
+        "+------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"the\\W+\\w+\")) |",
+        "+------------------------------------------+",
+        "| the best                                 |",
+        "| the   string                             |",
+        "|                                          |",
+        "+------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_scalar.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_scalar.snap
new file mode 100644
index 000000000..310a51b42
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_basic_scalar.snap
@@ -0,0 +1,13 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR('nevermore1, nevermore2, nevermore3.', 'nevermore')\""
+---
+Ok(
+    [
+        "+------------------------------------------------------------------------------+",
+        "| regexp_substr(Utf8(\"nevermore1, nevermore2, nevermore3.\"),Utf8(\"nevermore\")) |",
+        "+------------------------------------------------------------------------------+",
+        "| nevermore                                                                    |",
+        "+------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_group_num.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_group_num.snap
new file mode 100644
index 000000000..f4a4269f5
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_group_num.snap
@@ -0,0 +1,15 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, 'the\\\\W+(\\\\w+)', 1, 2, 'e', 1)\n    FROM VALUES ('It was the best of times, it was the worst of times.'),\n    ('In    the   string   the   extra   spaces  are   redundant.'),\n    ('A thespian theater is nearby.')\""
+---
+Ok(
+    [
+        "+---------------------------------------------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"the\\W+(\\w+)\"),Int64(1),Int64(2),Utf8(\"e\"),Int64(1)) |",
+        "+---------------------------------------------------------------------------------+",
+        "| worst                                                                           |",
+        "| extra                                                                           |",
+        "|                                                                                 |",
+        "+---------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_letter_groups.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_letter_groups.snap
new file mode 100644
index 000000000..d0fd5469c
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_letter_groups.snap
@@ -0,0 +1,13 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w)(\\\\w)(\\\\w)', 1, 1, 'e', 1),\n    REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w)(\\\\w)(\\\\w)', 1, 1, 'e', 2),\n    REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w)(\\\\w)(\\\\w)', 1, 1, 'e', 3)\n    FROM VALUES ('A MAN A PLAN A CANAL')\""
+---
+Ok(
+    [
+        "+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"A\\W+(\\w)(\\w)(\\w)\"),Int64(1),Int64(1),Utf8(\"e\"),Int64(1)) | regexp_substr(column1,Utf8(\"A\\W+(\\w)(\\w)(\\w)\"),Int64(1),Int64(1),Utf8(\"e\"),Int64(2)) | regexp_substr(column1,Utf8(\"A\\W+(\\w)(\\w)(\\w)\"),Int64(1),Int64(1),Utf8(\"e\"),Int64(3)) |",
+        "+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+",
+        "| M                                                                                    | A                                                                                    | N                                                                                    |",
+        "+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_occurrence.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_occurrence.snap
new file mode 100644
index 000000000..7d184b765
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_occurrence.snap
@@ -0,0 +1,15 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, 'the\\\\W+\\\\w+', 1, 2)\n    FROM VALUES ('It was the best of times, it was the worst of times.'),\n    ('In    the   string   the   extra   spaces  are   redundant.'),\n    ('A thespian theater is nearby.')\""
+---
+Ok(
+    [
+        "+------------------------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"the\\W+\\w+\"),Int64(1),Int64(2)) |",
+        "+------------------------------------------------------------+",
+        "| the worst                                                  |",
+        "| the   extra                                                |",
+        "|                                                            |",
+        "+------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_1.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_1.snap
new file mode 100644
index 000000000..7433e5e65
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_1.snap
@@ -0,0 +1,13 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR('It was the best of times, it was the worst of times', '[[:alpha:]]{2,}st', 15, 1)\""
+---
+Ok(
+    [
+        "+-------------------------------------------------------------------------------------------------------------------------+",
+        "| regexp_substr(Utf8(\"It was the best of times, it was the worst of times\"),Utf8(\"[[:alpha:]]{2,}st\"),Int64(15),Int64(1)) |",
+        "+-------------------------------------------------------------------------------------------------------------------------+",
+        "| worst                                                                                                                   |",
+        "+-------------------------------------------------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_2.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_2.snap
new file mode 100644
index 000000000..0a2d587d9
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_2.snap
@@ -0,0 +1,16 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, '\\\\b\\\\S*o\\\\S*\\\\b')\n    FROM VALUES ('Hellooo World'),\n    ('How are you doing today?'),\n    ('the quick brown fox jumps over the lazy dog'),\n    ('PACK MY BOX WITH FIVE DOZEN LIQUOR JUGS')\""
+---
+Ok(
+    [
+        "+--------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"\\b\\S*o\\S*\\b\")) |",
+        "+--------------------------------------------+",
+        "| Hellooo                                    |",
+        "| How                                        |",
+        "| brown                                      |",
+        "|                                            |",
+        "+--------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_3.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_3.snap
new file mode 100644
index 000000000..d60f3722f
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_regex_patterns_3.snap
@@ -0,0 +1,16 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, '\\\\b\\\\S*o\\\\S*\\\\b', 3, 3, 'i')\n    FROM VALUES ('Hellooo World'),\n    ('How are you doing today?'),\n    ('the quick brown fox jumps over the lazy dog'),\n    ('PACK MY BOX WITH FIVE DOZEN LIQUOR JUGS')\""
+---
+Ok(
+    [
+        "+------------------------------------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"\\b\\S*o\\S*\\b\"),Int64(3),Int64(3),Utf8(\"i\")) |",
+        "+------------------------------------------------------------------------+",
+        "|                                                                        |",
+        "| today                                                                  |",
+        "| over                                                                   |",
+        "| LIQUOR                                                                 |",
+        "+------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_boundary.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_boundary.snap
new file mode 100644
index 000000000..4a1cc93ab
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_boundary.snap
@@ -0,0 +1,13 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR('It was the best of times, it was the worst of times','\\\\bwas\\\\b', 1, 1)\""
+---
+Ok(
+    [
+        "+--------------------------------------------------------------------------------------------------------------+",
+        "| regexp_substr(Utf8(\"It was the best of times, it was the worst of times\"),Utf8(\"\\bwas\\b\"),Int64(1),Int64(1)) |",
+        "+--------------------------------------------------------------------------------------------------------------+",
+        "| was                                                                                                          |",
+        "+--------------------------------------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_groups.snap b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_groups.snap
new file mode 100644
index 000000000..0c697e589
--- /dev/null
+++ b/crates/embucket-functions/src/tests/regexp/snapshots/regexp_substr/query_regexp_substr_word_groups.snap
@@ -0,0 +1,13 @@
+---
+source: crates/embucket-functions/src/tests/regexp/regexp_substr.rs
+description: "\"SELECT REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w+)', 1, 1, 'e', 1),\n    REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w+)', 1, 2, 'e', 1),\n    REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w+)', 1, 3, 'e', 1),\n    REGEXP_SUBSTR(column1, 'A\\\\W+(\\\\w+)', 1, 4, 'e', 1)\n    FROM VALUES ('A MAN A PLAN A CANAL')\""
+---
+Ok(
+    [
+        "+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+",
+        "| regexp_substr(column1,Utf8(\"A\\W+(\\w+)\"),Int64(1),Int64(1),Utf8(\"e\"),Int64(1)) | regexp_substr(column1,Utf8(\"A\\W+(\\w+)\"),Int64(1),Int64(2),Utf8(\"e\"),Int64(1)) | regexp_substr(column1,Utf8(\"A\\W+(\\w+)\"),Int64(1),Int64(3),Utf8(\"e\"),Int64(1)) | regexp_substr(column1,Utf8(\"A\\W+(\\w+)\"),Int64(1),Int64(4),Utf8(\"e\"),Int64(1)) |",
+        "+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+",
+        "| MAN                                                                           | PLAN                                                                          | CANAL                                                                         |                                                                               |",
+        "+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+",
+    ],
+)
diff --git a/crates/embucket-functions/src/visitors/unimplemented/helper/implemented_functions.csv b/crates/embucket-functions/src/visitors/unimplemented/helper/implemented_functions.csv
index bb3876eee..cbf495a07 100644
--- a/crates/embucket-functions/src/visitors/unimplemented/helper/implemented_functions.csv
+++ b/crates/embucket-functions/src/visitors/unimplemented/helper/implemented_functions.csv
@@ -441,4 +441,4 @@ variant_element
 version
 week
 year
-zeroifnull
\ No newline at end of file
+zeroifnull