From e86c3e3d9cb97d0cbe9a9a50efc06e0dae15a700 Mon Sep 17 00:00:00 2001
From: nuno-faria <nunofpfaria@gmail.com>
Date: Mon, 5 May 2025 22:29:31 +0100
Subject: [PATCH 1/5] feat: Support Parquet writer options

---
 python/datafusion/__init__.py  |   6 +-
 python/datafusion/dataframe.py | 247 +++++++++++++++-------
 python/tests/test_dataframe.py | 373 ++++++++++++++++++++++++++++++---
 src/dataframe.rs               | 154 ++++++++++----
 src/lib.rs                     |   2 +
 5 files changed, 623 insertions(+), 159 deletions(-)

diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index 15ceefbdb..273abbadb 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -31,7 +31,7 @@
 from . import functions, object_store, substrait, unparser
 
 # The following imports are okay to remain as opaque to the user.
-from ._internal import Config
+from ._internal import Config, ParquetWriterOptions
 from .catalog import Catalog, Database, Table
 from .common import (
     DFSchema,
@@ -42,7 +42,7 @@
     SessionContext,
     SQLOptions,
 )
-from .dataframe import DataFrame
+from .dataframe import DataFrame, ParquetColumnOptions
 from .expr import (
     Expr,
     WindowFrame,
@@ -66,6 +66,8 @@
     "ExecutionPlan",
     "Expr",
     "LogicalPlan",
+    "ParquetColumnOptions",
+    "ParquetWriterOptions",
     "RecordBatch",
     "RecordBatchStream",
     "RuntimeEnvBuilder",
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 26fe8f453..96f939e70 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -28,7 +28,6 @@
     Iterable,
     Literal,
     Optional,
-    Union,
     overload,
 )
 
@@ -51,67 +50,58 @@
     from datafusion._internal import DataFrame as DataFrameInternal
     from datafusion._internal import expr as expr_internal
 
-from enum import Enum
-
+from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
+from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 
 
-# excerpt from deltalake
-# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
-class Compression(Enum):
-    """Enum representing the available compression types for Parquet files."""
-
-    UNCOMPRESSED = "uncompressed"
-    SNAPPY = "snappy"
-    GZIP = "gzip"
-    BROTLI = "brotli"
-    LZ4 = "lz4"
-    # lzo is not implemented yet
-    # https://github.com/apache/arrow-rs/issues/6970
-    # LZO = "lzo"
-    ZSTD = "zstd"
-    LZ4_RAW = "lz4_raw"
-
-    @classmethod
-    def from_str(cls: type[Compression], value: str) -> Compression:
-        """Convert a string to a Compression enum value.
-
-        Args:
-            value: The string representation of the compression type.
-
-        Returns:
-            The Compression enum lowercase value.
-
-        Raises:
-            ValueError: If the string does not match any Compression enum value.
-        """
-        try:
-            return cls(value.lower())
-        except ValueError as err:
-            valid_values = str([item.value for item in Compression])
-            error_msg = f"""
-                {value} is not a valid Compression.
-                Valid values are: {valid_values}
-                """
-            raise ValueError(error_msg) from err
-
-    def get_default_level(self) -> Optional[int]:
-        """Get the default compression level for the compression type.
+class ParquetColumnOptions:
+    """Parquet options for individual columns.
+
+    Contains the available options that can be applied for an individual Parquet column,
+    replacing the provided options in the `write_parquet`.
+
+    Attributes:
+        encoding: Sets encoding for the column path. Valid values are: `plain`,
+            `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
+            `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
+            `byte_stream_split`. These values are not case-sensitive. If `None`, uses
+            the default parquet options
+        dictionary_enabled: Sets if dictionary encoding is enabled for the column path.
+            If `None`, uses the default parquet options
+        compression: Sets default parquet compression codec for the column path. Valid
+            values are `uncompressed`, `snappy`, `gzip(level)`, `lzo`, `brotli(level)`,
+            `lz4`, `zstd(level)`, and `lz4_raw`. These values are not case-sensitive. If
+            `None`, uses the default parquet options.
+        statistics_enabled: Sets if statistics are enabled for the column Valid values
+            are: `none`, `chunk`, and `page` These values are not case sensitive. If
+            `None`, uses the default parquet options.
+        bloom_filter_enabled: Sets if bloom filter is enabled for the column path. If
+            `None`, uses the default parquet options.
+        bloom_filter_fpp: Sets bloom filter false positive probability for the column
+            path. If `None`, uses the default parquet options.
+        bloom_filter_ndv: Sets bloom filter number of distinct values. If `None`, uses
+            the default parquet options.
+    """
 
-        Returns:
-            The default compression level for the compression type.
-        """
-        # GZIP, BROTLI default values from deltalake repo
-        # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
-        # ZSTD default value from delta-rs
-        # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223
-        if self == Compression.GZIP:
-            return 6
-        if self == Compression.BROTLI:
-            return 1
-        if self == Compression.ZSTD:
-            return 4
-        return None
+    def __init__(
+        self,
+        encoding: Optional[str] = None,
+        dictionary_enabled: Optional[bool] = None,
+        compression: Optional[str] = None,
+        statistics_enabled: Optional[str] = None,
+        bloom_filter_enabled: Optional[bool] = None,
+        bloom_filter_fpp: Optional[float] = None,
+        bloom_filter_ndv: Optional[int] = None,
+    ) -> None:
+        """Initialize the ParquetColumnOptions."""
+        self.encoding = encoding
+        self.dictionary_enabled = dictionary_enabled
+        self.compression = compression
+        self.statistics_enabled = statistics_enabled
+        self.bloom_filter_enabled = bloom_filter_enabled
+        self.bloom_filter_fpp = bloom_filter_fpp
+        self.bloom_filter_ndv = bloom_filter_ndv
 
 
 class DataFrame:
@@ -704,38 +694,135 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None
     def write_parquet(
         self,
         path: str | pathlib.Path,
-        compression: Union[str, Compression] = Compression.ZSTD,
-        compression_level: int | None = None,
+        data_pagesize_limit: int = 1024 * 1024,
+        write_batch_size: int = 1024,
+        writer_version: str = "1.0",
+        skip_arrow_metadata: bool = False,
+        compression: Optional[str] = "zstd(3)",
+        dictionary_enabled: Optional[bool] = True,
+        dictionary_page_size_limit: int = 1024 * 1024,
+        statistics_enabled: Optional[str] = "page",
+        max_row_group_size: int = 1024 * 1024,
+        created_by: str = "datafusion-python",
+        column_index_truncate_length: Optional[int] = 64,
+        statistics_truncate_length: Optional[int] = None,
+        data_page_row_count_limit: int = 20_000,
+        encoding: Optional[str] = None,
+        bloom_filter_on_write: bool = False,
+        bloom_filter_fpp: Optional[float] = None,
+        bloom_filter_ndv: Optional[int] = None,
+        allow_single_file_parallelism: bool = True,
+        maximum_parallel_row_group_writers: int = 1,
+        maximum_buffered_record_batches_per_stream: int = 2,
+        column_specific_options: Optional[dict[str, ParquetColumnOptions]] = None,
     ) -> None:
         """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
 
         Args:
             path: Path of the Parquet file to write.
-            compression: Compression type to use. Default is "ZSTD".
-                Available compression types are:
+            data_pagesize_limit: Sets best effort maximum size of data page in bytes.
+            write_batch_size: Sets write_batch_size in bytes.
+            writer_version: Sets parquet writer version. Valid values are `1.0` and
+                `2.0`.
+            skip_arrow_metadata: Skip encoding the embedded arrow metadata in the
+                KV_meta.
+            compression: Compression type to use. Default is "zstd(3)".
+                Available compression types are
                 - "uncompressed": No compression.
                 - "snappy": Snappy compression.
-                - "gzip": Gzip compression.
-                - "brotli": Brotli compression.
+                - "gzip(n)": Gzip compression with level n.
+                - "brotli(n)": Brotli compression with level n.
                 - "lz4": LZ4 compression.
                 - "lz4_raw": LZ4_RAW compression.
-                - "zstd": Zstandard compression.
-            Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
-            compression_level: Compression level to use. For ZSTD, the
-                recommended range is 1 to 22, with the default being 4. Higher levels
-                provide better compression but slower speed.
-        """
-        # Convert string to Compression enum if necessary
-        if isinstance(compression, str):
-            compression = Compression.from_str(compression)
-
-        if (
-            compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}
-            and compression_level is None
-        ):
-            compression_level = compression.get_default_level()
+                - "zstd(n)": Zstandard compression with level n.
+            dictionary_enabled: Sets if dictionary encoding is enabled. If None, uses
+                the default parquet writer setting.
+            dictionary_page_size_limit: Sets best effort maximum dictionary page size,
+                in bytes.
+            statistics_enabled: Sets if statistics are enabled for any column Valid
+                values are `none`, `chunk`, and `page`. If None, uses the default
+                parquet writer setting.
+            max_row_group_size: Target maximum number of rows in each row group
+                (defaults to 1M rows). Writing larger row groups requires more memory to
+                write, but can get better compression and be faster to read.
+            created_by: Sets "created by" property.
+            column_index_truncate_length: Sets column index truncate length.
+            statistics_truncate_length: Sets statistics truncate length. If None, uses
+                the default parquet writer setting.
+            data_page_row_count_limit: Sets best effort maximum number of rows in a data
+                page.
+            encoding: Sets default encoding for any column. Valid values are `plain`,
+                `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
+                `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
+                `byte_stream_split`. If None, uses the default parquet writer setting.
+            bloom_filter_on_write: Write bloom filters for all columns when creating
+                parquet files.
+            bloom_filter_fpp: Sets bloom filter false positive probability. If None,
+                uses the default parquet writer setting
+            bloom_filter_ndv: Sets bloom filter number of distinct values. If None, uses
+                the default parquet writer setting.
+            allow_single_file_parallelism: Controls whether DataFusion will attempt to
+                speed up writing parquet files by serializing them in parallel. Each
+                column in each row group in each output file are serialized in parallel
+                leveraging a maximum possible core count of n_files * n_row_groups *
+                n_columns.
+            maximum_parallel_row_group_writers: By default parallel parquet writer is
+                tuned for minimum memory usage in a streaming execution plan. You may
+                see a performance benefit when writing large parquet files by increasing
+                `maximum_parallel_row_group_writers` and
+                `maximum_buffered_record_batches_per_stream` if your system has idle
+                cores and can tolerate additional memory usage. Boosting these values is
+                likely worthwhile when writing out already in-memory data, such as from
+                a cached data frame.
+            maximum_buffered_record_batches_per_stream: See
+                `maximum_parallel_row_group_writers`.
+            column_specific_options: Overrides options for specific columns. If a column
+                is not a part of this dictionary, it will use the parameters provided in
+                the `write_parquet`.
+        """
+        options_internal = ParquetWriterOptionsInternal(
+            data_pagesize_limit,
+            write_batch_size,
+            writer_version,
+            skip_arrow_metadata,
+            compression,
+            dictionary_enabled,
+            dictionary_page_size_limit,
+            statistics_enabled,
+            max_row_group_size,
+            created_by,
+            column_index_truncate_length,
+            statistics_truncate_length,
+            data_page_row_count_limit,
+            encoding,
+            bloom_filter_on_write,
+            bloom_filter_fpp,
+            bloom_filter_ndv,
+            allow_single_file_parallelism,
+            maximum_parallel_row_group_writers,
+            maximum_buffered_record_batches_per_stream,
+        )
+
+        if column_specific_options is None:
+            column_specific_options = {}
+
+        column_specific_options_internal = {}
+        for column, opts in column_specific_options.items():
+            column_specific_options_internal[column] = ParquetColumnOptionsInternal(
+                bloom_filter_enabled=opts.bloom_filter_enabled,
+                encoding=opts.encoding,
+                dictionary_enabled=opts.dictionary_enabled,
+                compression=opts.compression,
+                statistics_enabled=opts.statistics_enabled,
+                bloom_filter_fpp=opts.bloom_filter_fpp,
+                bloom_filter_ndv=opts.bloom_filter_ndv,
+            )
 
-        self.df.write_parquet(str(path), compression.value, compression_level)
+        self.df.write_parquet(
+            str(path),
+            options_internal,
+            column_specific_options_internal,
+        )
 
     def write_json(self, path: str | pathlib.Path) -> None:
         """Execute the :py:class:`DataFrame` and write the results to a JSON file.
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index e01308c86..e1e29c45c 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -23,6 +23,7 @@
 import pytest
 from datafusion import (
     DataFrame,
+    ParquetColumnOptions,
     SessionContext,
     WindowFrame,
     column,
@@ -62,6 +63,21 @@ def df():
     return ctx.from_arrow(batch)
 
 
+@pytest.fixture
+def large_df():
+    ctx = SessionContext()
+
+    rows = 100000
+    data = {
+        "a": list(range(rows)),
+        "b": [f"s-{i}" for i in range(rows)],
+        "c": [float(i + 0.1) for i in range(rows)],
+    }
+    batch = pa.record_batch(data)
+
+    return ctx.from_arrow(batch)
+
+
 @pytest.fixture
 def struct_df():
     ctx = SessionContext()
@@ -1533,16 +1549,26 @@ def test_write_parquet(df, tmp_path, path_to_str):
     assert result == expected
 
 
+def test_write_parquet_default_compression(df, tmp_path):
+    """Test that the default compression is ZSTD."""
+    df.write_parquet(tmp_path)
+
+    for file in tmp_path.rglob("*.parquet"):
+        metadata = pq.ParquetFile(file).metadata.to_dict()
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                assert col["compression"].lower() == "zstd"
+
+
 @pytest.mark.parametrize(
-    ("compression", "compression_level"),
-    [("gzip", 6), ("brotli", 7), ("zstd", 15)],
+    "compression",
+    ["gzip(6)", "brotli(7)", "zstd(15)", "snappy", "uncompressed"],
 )
-def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
-    path = tmp_path
+def test_write_compressed_parquet(df, tmp_path, compression):
+    import re
 
-    df.write_parquet(
-        str(path), compression=compression, compression_level=compression_level
-    )
+    path = tmp_path
+    df.write_parquet(str(path), compression=compression)
 
     # test that the actual compression scheme is the one written
     for _root, _dirs, files in os.walk(path):
@@ -1550,8 +1576,10 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
             if file.endswith(".parquet"):
                 metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict()
                 for row_group in metadata["row_groups"]:
-                    for columns in row_group["columns"]:
-                        assert columns["compression"].lower() == compression
+                    for col in row_group["columns"]:
+                        assert col["compression"].lower() == re.sub(
+                            r"\(\d+\)", "", compression
+                        )
 
     result = pq.read_table(str(path)).to_pydict()
     expected = df.to_pydict()
@@ -1560,40 +1588,323 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
 
 
 @pytest.mark.parametrize(
-    ("compression", "compression_level"),
-    [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)],
+    "compression",
+    ["gzip(12)", "brotli(15)", "zstd(23)"],
 )
-def test_write_compressed_parquet_wrong_compression_level(
-    df, tmp_path, compression, compression_level
-):
+def test_write_compressed_parquet_wrong_compression_level(df, tmp_path, compression):
     path = tmp_path
 
-    with pytest.raises(ValueError):
-        df.write_parquet(
-            str(path),
-            compression=compression,
-            compression_level=compression_level,
-        )
+    with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
+        df.write_parquet(str(path), compression=compression)
 
 
-@pytest.mark.parametrize("compression", ["wrong"])
+@pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
 def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression):
     path = tmp_path
 
-    with pytest.raises(ValueError):
+    with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
         df.write_parquet(str(path), compression=compression)
 
 
-# not testing lzo because it it not implemented yet
-# https://github.com/apache/arrow-rs/issues/6970
-@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"])
-def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression):
-    # Test write_parquet with zstd, brotli, gzip default compression level,
-    # ie don't specify compression level
-    # should complete without error
-    path = tmp_path
+@pytest.mark.parametrize(
+    ("writer_version", "format_version"),
+    [("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
+)
+def test_write_parquet_writer_version(df, tmp_path, writer_version, format_version):
+    """Test the Parquet writer version. Note that writer_version=2.0 results in
+    format_version=2.6"""
+    if writer_version is None:
+        df.write_parquet(tmp_path)
+    else:
+        df.write_parquet(tmp_path, writer_version=writer_version)
 
-    df.write_parquet(str(path), compression=compression)
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+        assert metadata["format_version"] == format_version
+
+
+@pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"])
+def test_write_parquet_wrong_writer_version(df, tmp_path, writer_version):
+    """Test that invalid writer versions in Parquet throw an exception."""
+    with pytest.raises(
+        Exception, match="Unknown or unsupported parquet writer version"
+    ):
+        df.write_parquet(tmp_path, writer_version=writer_version)
+
+
+@pytest.mark.parametrize("dictionary_enabled", [True, False, None])
+def test_write_parquet_dictionary_enabled(df, tmp_path, dictionary_enabled):
+    """Test enabling/disabling the dictionaries in Parquet."""
+    df.write_parquet(tmp_path, dictionary_enabled=dictionary_enabled)
+    # by default, the dictionary is enabled, so None results in True
+    result = dictionary_enabled if dictionary_enabled is not None else True
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                assert col["has_dictionary_page"] == result
+
+
+@pytest.mark.parametrize(
+    ("statistics_enabled", "has_statistics"),
+    [("page", True), ("chunk", True), ("none", False), (None, True)],
+)
+def test_write_parquet_statistics_enabled(
+    df, tmp_path, statistics_enabled, has_statistics
+):
+    """Test configuring the statistics in Parquet. In pyarrow we can only check for
+    column-level statistics, so "page" and "chunk" are tested in the same way."""
+    df.write_parquet(tmp_path, statistics_enabled=statistics_enabled)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                if has_statistics:
+                    assert col["statistics"] is not None
+                else:
+                    assert col["statistics"] is None
+
+
+@pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
+def test_write_parquet_max_row_group_size(large_df, tmp_path, max_row_group_size):
+    """Test configuring the max number of rows per group in Parquet. These test cases
+    guarantee that the number of rows for each row group is max_row_group_size, given
+    the total number of rows is a multiple of max_row_group_size."""
+    large_df.write_parquet(tmp_path, max_row_group_size=max_row_group_size)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+        for row_group in metadata["row_groups"]:
+            assert row_group["num_rows"] == max_row_group_size
+
+
+@pytest.mark.parametrize("created_by", ["datafusion", "datafusion-python", "custom"])
+def test_write_parquet_created_by(df, tmp_path, created_by):
+    """Test configuring the created by metadata in Parquet."""
+    df.write_parquet(tmp_path, created_by=created_by)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+        assert metadata["created_by"] == created_by
+
+
+@pytest.mark.parametrize("statistics_truncate_length", [5, 25, 50])
+def test_write_parquet_statistics_truncate_length(
+    df, tmp_path, statistics_truncate_length
+):
+    """Test configuring the truncate limit in Parquet's row-group-level statistics."""
+    ctx = SessionContext()
+    data = {
+        "a": [
+            "a_the_quick_brown_fox_jumps_over_the_lazy_dog",
+            "m_the_quick_brown_fox_jumps_over_the_lazy_dog",
+            "z_the_quick_brown_fox_jumps_over_the_lazy_dog",
+        ],
+        "b": ["a_smaller", "m_smaller", "z_smaller"],
+    }
+    df = ctx.from_arrow(pa.record_batch(data))
+    df.write_parquet(tmp_path, statistics_truncate_length=statistics_truncate_length)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                statistics = col["statistics"]
+                assert len(statistics["min"]) <= statistics_truncate_length
+                assert len(statistics["max"]) <= statistics_truncate_length
+
+
+def test_write_parquet_default_encoding(tmp_path):
+    """Test that, by default, Parquet files are written with dictionary encoding.
+    Note that dictionary encoding is not used for boolean values, so it is not tested
+    here."""
+    ctx = SessionContext()
+    data = {
+        "a": [1, 2, 3],
+        "b": ["1", "2", "3"],
+        "c": [1.01, 2.02, 3.03],
+    }
+    df = ctx.from_arrow(pa.record_batch(data))
+    df.write_parquet(tmp_path)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                assert col["encodings"] == ("PLAIN", "RLE", "RLE_DICTIONARY")
+
+
+@pytest.mark.parametrize(
+    ("encoding", "data_types", "result"),
+    [
+        ("plain", ["int", "float", "str", "bool"], ("PLAIN", "RLE")),
+        ("rle", ["bool"], ("RLE",)),
+        ("delta_binary_packed", ["int"], ("RLE", "DELTA_BINARY_PACKED")),
+        ("delta_length_byte_array", ["str"], ("RLE", "DELTA_LENGTH_BYTE_ARRAY")),
+        ("delta_byte_array", ["str"], ("RLE", "DELTA_BYTE_ARRAY")),
+        ("byte_stream_split", ["int", "float"], ("RLE", "BYTE_STREAM_SPLIT")),
+    ],
+)
+def test_write_parquet_encoding(tmp_path, encoding, data_types, result):
+    """Test different encodings in Parquet in their respective support column types."""
+    ctx = SessionContext()
+
+    data = {}
+    for data_type in data_types:
+        match data_type:
+            case "int":
+                data["int"] = [1, 2, 3]
+            case "float":
+                data["float"] = [1.01, 2.02, 3.03]
+            case "str":
+                data["str"] = ["a", "b", "c"]
+            case "bool":
+                data["bool"] = [True, False, True]
+
+    df = ctx.from_arrow(pa.record_batch(data))
+    df.write_parquet(tmp_path, encoding=encoding, dictionary_enabled=False)
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                assert col["encodings"] == result
+
+
+@pytest.mark.parametrize("encoding", ["bit_packed"])
+def test_write_parquet_unsupported_encoding(df, tmp_path, encoding):
+    """Test that unsupported Parquet encodings do not work."""
+    # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
+    with pytest.raises(BaseException, match="Encoding .*? is not supported"):
+        df.write_parquet(tmp_path, encoding=encoding)
+
+
+@pytest.mark.parametrize("encoding", ["non_existent", "unknown", "plain123"])
+def test_write_parquet_invalid_encoding(df, tmp_path, encoding):
+    """Test that invalid Parquet encodings do not work."""
+    with pytest.raises(Exception, match="Unknown or unsupported parquet encoding"):
+        df.write_parquet(tmp_path, encoding=encoding)
+
+
+@pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
+def test_write_parquet_dictionary_encoding_fallback(df, tmp_path, encoding):
+    """Test that the dictionary encoding cannot be used as fallback in Parquet."""
+    # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
+    with pytest.raises(
+        BaseException, match="Dictionary encoding can not be used as fallback encoding"
+    ):
+        df.write_parquet(tmp_path, encoding=encoding)
+
+
+def test_write_parquet_bloom_filter(df, tmp_path):
+    """Test Parquet files with and without (default) bloom filters. Since pyarrow does
+    not expose any information about bloom filters, the easiest way to confirm that they
+    are actually written is to compare the file size."""
+    path_no_bloom_filter = tmp_path / "1"
+    path_bloom_filter = tmp_path / "2"
+
+    df.write_parquet(path_no_bloom_filter)
+    df.write_parquet(path_bloom_filter, bloom_filter_on_write=True)
+
+    size_no_bloom_filter = 0
+    for file in path_no_bloom_filter.rglob("*.parquet"):
+        size_no_bloom_filter += os.path.getsize(file)
+
+    size_bloom_filter = 0
+    for file in path_bloom_filter.rglob("*.parquet"):
+        size_bloom_filter += os.path.getsize(file)
+
+    assert size_no_bloom_filter < size_bloom_filter
+
+
+def test_write_parquet_column_options(df, tmp_path):
+    """Test writing Parquet files with different options for each column, which replace
+    the global configs (when provided)."""
+    data = {
+        "a": [1, 2, 3],
+        "b": ["a", "b", "c"],
+        "c": [False, True, False],
+        "d": [1.01, 2.02, 3.03],
+        "e": [4, 5, 6],
+    }
+
+    column_specific_options = {
+        "a": ParquetColumnOptions(statistics_enabled="none"),
+        "b": ParquetColumnOptions(encoding="plain", dictionary_enabled=False),
+        "c": ParquetColumnOptions(
+            compression="snappy", encoding="rle", dictionary_enabled=False
+        ),
+        "d": ParquetColumnOptions(
+            compression="zstd(6)",
+            encoding="byte_stream_split",
+            dictionary_enabled=False,
+            statistics_enabled="none",
+        ),
+        # column "e" will use the global configs
+    }
+
+    results = {
+        "a": {
+            "statistics": False,
+            "compression": "brotli",
+            "encodings": ("PLAIN", "RLE", "RLE_DICTIONARY"),
+        },
+        "b": {
+            "statistics": True,
+            "compression": "brotli",
+            "encodings": ("PLAIN", "RLE"),
+        },
+        "c": {
+            "statistics": True,
+            "compression": "snappy",
+            "encodings": ("RLE",),
+        },
+        "d": {
+            "statistics": False,
+            "compression": "zstd",
+            "encodings": ("RLE", "BYTE_STREAM_SPLIT"),
+        },
+        "e": {
+            "statistics": True,
+            "compression": "brotli",
+            "encodings": ("PLAIN", "RLE", "RLE_DICTIONARY"),
+        },
+    }
+
+    ctx = SessionContext()
+    df = ctx.from_arrow(pa.record_batch(data))
+    df.write_parquet(
+        tmp_path,
+        compression="brotli(8)",
+        column_specific_options=column_specific_options,
+    )
+
+    for file in tmp_path.rglob("*.parquet"):
+        parquet = pq.ParquetFile(file)
+        metadata = parquet.metadata.to_dict()
+
+        for row_group in metadata["row_groups"]:
+            for col in row_group["columns"]:
+                column_name = col["path_in_schema"]
+                result = results[column_name]
+                assert (col["statistics"] is not None) == result["statistics"]
+                assert col["compression"].lower() == result["compression"].lower()
+                assert col["encodings"] == result["encodings"]
 
 
 def test_dataframe_export(df) -> None:
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 211e31bd1..ffb3f36cf 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
 use std::ffi::CString;
 use std::sync::Arc;
 
@@ -27,12 +28,11 @@ use datafusion::arrow::datatypes::Schema;
 use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow};
 use datafusion::arrow::util::pretty;
 use datafusion::common::UnnestOptions;
-use datafusion::config::{CsvOptions, TableParquetOptions};
+use datafusion::config::{CsvOptions, ParquetColumnOptions, ParquetOptions, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::datasource::TableProvider;
 use datafusion::error::DataFusionError;
 use datafusion::execution::SendableRecordBatchStream;
-use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
 use datafusion::prelude::*;
 use futures::{StreamExt, TryStreamExt};
 use pyo3::exceptions::PyValueError;
@@ -165,10 +165,105 @@ fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult<
     // Return the validated config, converting String error to PyErr
     config
         .validate()
-        .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?;
+        .map_err(pyo3::exceptions::PyValueError::new_err)?;
     Ok(config)
 }
 
+/// Python mapping of `ParquetOptions` (includes just the writer-related options).
+#[pyclass(name = "ParquetWriterOptions", module = "datafusion", subclass)]
+#[derive(Clone, Default)]
+pub struct PyParquetWriterOptions {
+    options: ParquetOptions,
+}
+
+#[pymethods]
+impl PyParquetWriterOptions {
+    #[new]
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        data_pagesize_limit: usize,
+        write_batch_size: usize,
+        writer_version: String,
+        skip_arrow_metadata: bool,
+        compression: Option<String>,
+        dictionary_enabled: Option<bool>,
+        dictionary_page_size_limit: usize,
+        statistics_enabled: Option<String>,
+        max_row_group_size: usize,
+        created_by: String,
+        column_index_truncate_length: Option<usize>,
+        statistics_truncate_length: Option<usize>,
+        data_page_row_count_limit: usize,
+        encoding: Option<String>,
+        bloom_filter_on_write: bool,
+        bloom_filter_fpp: Option<f64>,
+        bloom_filter_ndv: Option<u64>,
+        allow_single_file_parallelism: bool,
+        maximum_parallel_row_group_writers: usize,
+        maximum_buffered_record_batches_per_stream: usize,
+    ) -> Self {
+        Self {
+            options: ParquetOptions {
+                data_pagesize_limit,
+                write_batch_size,
+                writer_version,
+                skip_arrow_metadata,
+                compression,
+                dictionary_enabled,
+                dictionary_page_size_limit,
+                statistics_enabled,
+                max_row_group_size,
+                created_by,
+                column_index_truncate_length,
+                statistics_truncate_length,
+                data_page_row_count_limit,
+                encoding,
+                bloom_filter_on_write,
+                bloom_filter_fpp,
+                bloom_filter_ndv,
+                allow_single_file_parallelism,
+                maximum_parallel_row_group_writers,
+                maximum_buffered_record_batches_per_stream,
+                ..Default::default()
+            },
+        }
+    }
+}
+
+/// Python mapping of `ParquetColumnOptions`.
+#[pyclass(name = "ParquetColumnOptions", module = "datafusion", subclass)]
+#[derive(Clone, Default)]
+pub struct PyParquetColumnOptions {
+    options: ParquetColumnOptions,
+}
+
+#[pymethods]
+impl PyParquetColumnOptions {
+    #[new]
+    pub fn new(
+        bloom_filter_enabled: Option<bool>,
+        encoding: Option<String>,
+        dictionary_enabled: Option<bool>,
+        compression: Option<String>,
+        statistics_enabled: Option<String>,
+        bloom_filter_fpp: Option<f64>,
+        bloom_filter_ndv: Option<u64>,
+    ) -> Self {
+        Self {
+            options: ParquetColumnOptions {
+                bloom_filter_enabled,
+                encoding,
+                dictionary_enabled,
+                compression,
+                statistics_enabled,
+                bloom_filter_fpp,
+                bloom_filter_ndv,
+                ..Default::default()
+            },
+        }
+    }
+}
+
 /// A PyDataFrame is a representation of a logical plan and an API to compose statements.
 /// Use it to build a plan and `.collect()` to execute the plan and collect the result.
 /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment.
@@ -613,61 +708,28 @@ impl PyDataFrame {
     }
 
     /// Write a `DataFrame` to a Parquet file.
-    #[pyo3(signature = (
-        path,
-        compression="zstd",
-        compression_level=None
-        ))]
     fn write_parquet(
         &self,
         path: &str,
-        compression: &str,
-        compression_level: Option<u32>,
+        options: PyParquetWriterOptions,
+        column_specific_options: HashMap<String, PyParquetColumnOptions>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        fn verify_compression_level(cl: Option<u32>) -> Result<u32, PyErr> {
-            cl.ok_or(PyValueError::new_err("compression_level is not defined"))
-        }
-
-        let _validated = match compression.to_lowercase().as_str() {
-            "snappy" => Compression::SNAPPY,
-            "gzip" => Compression::GZIP(
-                GzipLevel::try_new(compression_level.unwrap_or(6))
-                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
-            ),
-            "brotli" => Compression::BROTLI(
-                BrotliLevel::try_new(verify_compression_level(compression_level)?)
-                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
-            ),
-            "zstd" => Compression::ZSTD(
-                ZstdLevel::try_new(verify_compression_level(compression_level)? as i32)
-                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
-            ),
-            "lzo" => Compression::LZO,
-            "lz4" => Compression::LZ4,
-            "lz4_raw" => Compression::LZ4_RAW,
-            "uncompressed" => Compression::UNCOMPRESSED,
-            _ => {
-                return Err(PyDataFusionError::Common(format!(
-                    "Unrecognized compression type {compression}"
-                )));
-            }
+        let table_options = TableParquetOptions {
+            global: options.options,
+            column_specific_options: column_specific_options
+                .into_iter()
+                .map(|(k, v)| (k, v.options))
+                .collect(),
+            ..Default::default()
         };
 
-        let mut compression_string = compression.to_string();
-        if let Some(level) = compression_level {
-            compression_string.push_str(&format!("({level})"));
-        }
-
-        let mut options = TableParquetOptions::default();
-        options.global.compression = Some(compression_string);
-
         wait_for_future(
             py,
             self.df.as_ref().clone().write_parquet(
                 path,
                 DataFrameWriteOptions::new(),
-                Option::from(options),
+                Option::from(table_options),
             ),
         )?;
         Ok(())
diff --git a/src/lib.rs b/src/lib.rs
index 6eeda0878..990231c66 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -85,6 +85,8 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<context::PySessionContext>()?;
     m.add_class::<context::PySQLOptions>()?;
     m.add_class::<dataframe::PyDataFrame>()?;
+    m.add_class::<dataframe::PyParquetColumnOptions>()?;
+    m.add_class::<dataframe::PyParquetWriterOptions>()?;
     m.add_class::<udf::PyScalarUDF>()?;
     m.add_class::<udaf::PyAggregateUDF>()?;
     m.add_class::<udwf::PyWindowUDF>()?;

From 5d5b1baf3f7bd6681f4f3346c09d560e791db14f Mon Sep 17 00:00:00 2001
From: nuno-faria <nunofpfaria@gmail.com>
Date: Sat, 14 Jun 2025 20:03:33 +0100
Subject: [PATCH 2/5] Create dedicated write_parquet_options function

---
 python/datafusion/__init__.py  |   4 +-
 python/datafusion/dataframe.py | 338 +++++++++++++++++++++++----------
 python/tests/test_dataframe.py | 159 +++++++++++-----
 src/dataframe.rs               |  62 ++++++
 4 files changed, 411 insertions(+), 152 deletions(-)

diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index 273abbadb..fd7cd000a 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -31,7 +31,7 @@
 from . import functions, object_store, substrait, unparser
 
 # The following imports are okay to remain as opaque to the user.
-from ._internal import Config, ParquetWriterOptions
+from ._internal import Config
 from .catalog import Catalog, Database, Table
 from .common import (
     DFSchema,
@@ -42,7 +42,7 @@
     SessionContext,
     SQLOptions,
 )
-from .dataframe import DataFrame, ParquetColumnOptions
+from .dataframe import DataFrame, ParquetColumnOptions, ParquetWriterOptions
 from .expr import (
     Expr,
     WindowFrame,
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 96f939e70..3c8c09b38 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -28,6 +28,7 @@
     Iterable,
     Literal,
     Optional,
+    Union,
     overload,
 )
 
@@ -50,16 +51,194 @@
     from datafusion._internal import DataFrame as DataFrameInternal
     from datafusion._internal import expr as expr_internal
 
+from enum import Enum
+
 from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
 from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 
 
+# excerpt from deltalake
+# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
+class Compression(Enum):
+    """Enum representing the available compression types for Parquet files."""
+
+    UNCOMPRESSED = "uncompressed"
+    SNAPPY = "snappy"
+    GZIP = "gzip"
+    BROTLI = "brotli"
+    LZ4 = "lz4"
+    # lzo is not implemented yet
+    # https://github.com/apache/arrow-rs/issues/6970
+    # LZO = "lzo"
+    ZSTD = "zstd"
+    LZ4_RAW = "lz4_raw"
+
+    @classmethod
+    def from_str(cls: type[Compression], value: str) -> Compression:
+        """Convert a string to a Compression enum value.
+
+        Args:
+            value: The string representation of the compression type.
+
+        Returns:
+            The Compression enum lowercase value.
+
+        Raises:
+            ValueError: If the string does not match any Compression enum value.
+        """
+        try:
+            return cls(value.lower())
+        except ValueError as err:
+            valid_values = str([item.value for item in Compression])
+            error_msg = f"""
+                {value} is not a valid Compression.
+                Valid values are: {valid_values}
+                """
+            raise ValueError(error_msg) from err
+
+    def get_default_level(self) -> Optional[int]:
+        """Get the default compression level for the compression type.
+
+        Returns:
+            The default compression level for the compression type.
+        """
+        # GZIP, BROTLI default values from deltalake repo
+        # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
+        # ZSTD default value from delta-rs
+        # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223
+        if self == Compression.GZIP:
+            return 6
+        if self == Compression.BROTLI:
+            return 1
+        if self == Compression.ZSTD:
+            return 4
+        return None
+
+
+class ParquetWriterOptions:
+    """Advanced parquet writer options.
+
+    Allows settings the writer options that apply to the entire file. Some options can
+    also be set on a column by column basis, with the field `column_specific_options`
+    (see `ParquetColumnOptions`).
+
+    Attributes:
+        data_pagesize_limit: Sets best effort maximum size of data page in bytes.
+        write_batch_size: Sets write_batch_size in bytes.
+        writer_version: Sets parquet writer version. Valid values are `1.0` and
+            `2.0`.
+        skip_arrow_metadata: Skip encoding the embedded arrow metadata in the
+            KV_meta.
+        compression: Compression type to use. Default is "zstd(3)".
+            Available compression types are
+            - "uncompressed": No compression.
+            - "snappy": Snappy compression.
+            - "gzip(n)": Gzip compression with level n.
+            - "brotli(n)": Brotli compression with level n.
+            - "lz4": LZ4 compression.
+            - "lz4_raw": LZ4_RAW compression.
+            - "zstd(n)": Zstandard compression with level n.
+        dictionary_enabled: Sets if dictionary encoding is enabled. If None, uses
+            the default parquet writer setting.
+        dictionary_page_size_limit: Sets best effort maximum dictionary page size,
+            in bytes.
+        statistics_enabled: Sets if statistics are enabled for any column Valid
+            values are `none`, `chunk`, and `page`. If None, uses the default
+            parquet writer setting.
+        max_row_group_size: Target maximum number of rows in each row group
+            (defaults to 1M rows). Writing larger row groups requires more memory to
+            write, but can get better compression and be faster to read.
+        created_by: Sets "created by" property.
+        column_index_truncate_length: Sets column index truncate length.
+        statistics_truncate_length: Sets statistics truncate length. If None, uses
+            the default parquet writer setting.
+        data_page_row_count_limit: Sets best effort maximum number of rows in a data
+            page.
+        encoding: Sets default encoding for any column. Valid values are `plain`,
+            `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
+            `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
+            `byte_stream_split`. If None, uses the default parquet writer setting.
+        bloom_filter_on_write: Write bloom filters for all columns when creating
+            parquet files.
+        bloom_filter_fpp: Sets bloom filter false positive probability. If None,
+            uses the default parquet writer setting
+        bloom_filter_ndv: Sets bloom filter number of distinct values. If None, uses
+            the default parquet writer setting.
+        allow_single_file_parallelism: Controls whether DataFusion will attempt to
+            speed up writing parquet files by serializing them in parallel. Each
+            column in each row group in each output file are serialized in parallel
+            leveraging a maximum possible core count of n_files * n_row_groups *
+            n_columns.
+        maximum_parallel_row_group_writers: By default parallel parquet writer is
+            tuned for minimum memory usage in a streaming execution plan. You may
+            see a performance benefit when writing large parquet files by increasing
+            `maximum_parallel_row_group_writers` and
+            `maximum_buffered_record_batches_per_stream` if your system has idle
+            cores and can tolerate additional memory usage. Boosting these values is
+            likely worthwhile when writing out already in-memory data, such as from
+            a cached data frame.
+        maximum_buffered_record_batches_per_stream: See
+            `maximum_parallel_row_group_writers`.
+        column_specific_options: Overrides options for specific columns. If a column
+            is not a part of this dictionary, it will use the parameters provided here.
+    """
+
+    def __init__(
+        self,
+        data_pagesize_limit: int = 1024 * 1024,
+        write_batch_size: int = 1024,
+        writer_version: str = "1.0",
+        skip_arrow_metadata: bool = False,
+        compression: Optional[str] = "zstd(3)",
+        dictionary_enabled: Optional[bool] = True,
+        dictionary_page_size_limit: int = 1024 * 1024,
+        statistics_enabled: Optional[str] = "page",
+        max_row_group_size: int = 1024 * 1024,
+        created_by: str = "datafusion-python",
+        column_index_truncate_length: Optional[int] = 64,
+        statistics_truncate_length: Optional[int] = None,
+        data_page_row_count_limit: int = 20_000,
+        encoding: Optional[str] = None,
+        bloom_filter_on_write: bool = False,
+        bloom_filter_fpp: Optional[float] = None,
+        bloom_filter_ndv: Optional[int] = None,
+        allow_single_file_parallelism: bool = True,
+        maximum_parallel_row_group_writers: int = 1,
+        maximum_buffered_record_batches_per_stream: int = 2,
+        column_specific_options: Optional[dict[str, ParquetColumnOptions]] = None,
+    ) -> None:
+        """Initialize the ParquetWriterOptions."""
+        self.data_pagesize_limit = data_pagesize_limit
+        self.write_batch_size = write_batch_size
+        self.writer_version = writer_version
+        self.skip_arrow_metadata = skip_arrow_metadata
+        self.compression = compression
+        self.dictionary_enabled = dictionary_enabled
+        self.dictionary_page_size_limit = dictionary_page_size_limit
+        self.statistics_enabled = statistics_enabled
+        self.max_row_group_size = max_row_group_size
+        self.created_by = created_by
+        self.column_index_truncate_length = column_index_truncate_length
+        self.statistics_truncate_length = statistics_truncate_length
+        self.data_page_row_count_limit = data_page_row_count_limit
+        self.encoding = encoding
+        self.bloom_filter_on_write = bloom_filter_on_write
+        self.bloom_filter_fpp = bloom_filter_fpp
+        self.bloom_filter_ndv = bloom_filter_ndv
+        self.allow_single_file_parallelism = allow_single_file_parallelism
+        self.maximum_parallel_row_group_writers = maximum_parallel_row_group_writers
+        self.maximum_buffered_record_batches_per_stream = (
+            maximum_buffered_record_batches_per_stream
+        )
+        self.column_specific_options = column_specific_options
+
+
 class ParquetColumnOptions:
     """Parquet options for individual columns.
 
     Contains the available options that can be applied for an individual Parquet column,
-    replacing the provided options in the `write_parquet`.
+    replacing the global options in `ParquetWriterOptions`.
 
     Attributes:
         encoding: Sets encoding for the column path. Valid values are: `plain`,
@@ -694,120 +873,75 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None
     def write_parquet(
         self,
         path: str | pathlib.Path,
-        data_pagesize_limit: int = 1024 * 1024,
-        write_batch_size: int = 1024,
-        writer_version: str = "1.0",
-        skip_arrow_metadata: bool = False,
-        compression: Optional[str] = "zstd(3)",
-        dictionary_enabled: Optional[bool] = True,
-        dictionary_page_size_limit: int = 1024 * 1024,
-        statistics_enabled: Optional[str] = "page",
-        max_row_group_size: int = 1024 * 1024,
-        created_by: str = "datafusion-python",
-        column_index_truncate_length: Optional[int] = 64,
-        statistics_truncate_length: Optional[int] = None,
-        data_page_row_count_limit: int = 20_000,
-        encoding: Optional[str] = None,
-        bloom_filter_on_write: bool = False,
-        bloom_filter_fpp: Optional[float] = None,
-        bloom_filter_ndv: Optional[int] = None,
-        allow_single_file_parallelism: bool = True,
-        maximum_parallel_row_group_writers: int = 1,
-        maximum_buffered_record_batches_per_stream: int = 2,
-        column_specific_options: Optional[dict[str, ParquetColumnOptions]] = None,
+        compression: Union[str, Compression] = Compression.ZSTD,
+        compression_level: int | None = None,
     ) -> None:
         """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
 
         Args:
             path: Path of the Parquet file to write.
-            data_pagesize_limit: Sets best effort maximum size of data page in bytes.
-            write_batch_size: Sets write_batch_size in bytes.
-            writer_version: Sets parquet writer version. Valid values are `1.0` and
-                `2.0`.
-            skip_arrow_metadata: Skip encoding the embedded arrow metadata in the
-                KV_meta.
-            compression: Compression type to use. Default is "zstd(3)".
-                Available compression types are
+            compression: Compression type to use. Default is "ZSTD".
+                Available compression types are:
                 - "uncompressed": No compression.
                 - "snappy": Snappy compression.
-                - "gzip(n)": Gzip compression with level n.
-                - "brotli(n)": Brotli compression with level n.
+                - "gzip": Gzip compression.
+                - "brotli": Brotli compression.
                 - "lz4": LZ4 compression.
                 - "lz4_raw": LZ4_RAW compression.
-                - "zstd(n)": Zstandard compression with level n.
-            dictionary_enabled: Sets if dictionary encoding is enabled. If None, uses
-                the default parquet writer setting.
-            dictionary_page_size_limit: Sets best effort maximum dictionary page size,
-                in bytes.
-            statistics_enabled: Sets if statistics are enabled for any column Valid
-                values are `none`, `chunk`, and `page`. If None, uses the default
-                parquet writer setting.
-            max_row_group_size: Target maximum number of rows in each row group
-                (defaults to 1M rows). Writing larger row groups requires more memory to
-                write, but can get better compression and be faster to read.
-            created_by: Sets "created by" property.
-            column_index_truncate_length: Sets column index truncate length.
-            statistics_truncate_length: Sets statistics truncate length. If None, uses
-                the default parquet writer setting.
-            data_page_row_count_limit: Sets best effort maximum number of rows in a data
-                page.
-            encoding: Sets default encoding for any column. Valid values are `plain`,
-                `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
-                `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
-                `byte_stream_split`. If None, uses the default parquet writer setting.
-            bloom_filter_on_write: Write bloom filters for all columns when creating
-                parquet files.
-            bloom_filter_fpp: Sets bloom filter false positive probability. If None,
-                uses the default parquet writer setting
-            bloom_filter_ndv: Sets bloom filter number of distinct values. If None, uses
-                the default parquet writer setting.
-            allow_single_file_parallelism: Controls whether DataFusion will attempt to
-                speed up writing parquet files by serializing them in parallel. Each
-                column in each row group in each output file are serialized in parallel
-                leveraging a maximum possible core count of n_files * n_row_groups *
-                n_columns.
-            maximum_parallel_row_group_writers: By default parallel parquet writer is
-                tuned for minimum memory usage in a streaming execution plan. You may
-                see a performance benefit when writing large parquet files by increasing
-                `maximum_parallel_row_group_writers` and
-                `maximum_buffered_record_batches_per_stream` if your system has idle
-                cores and can tolerate additional memory usage. Boosting these values is
-                likely worthwhile when writing out already in-memory data, such as from
-                a cached data frame.
-            maximum_buffered_record_batches_per_stream: See
-                `maximum_parallel_row_group_writers`.
-            column_specific_options: Overrides options for specific columns. If a column
-                is not a part of this dictionary, it will use the parameters provided in
-                the `write_parquet`.
+                - "zstd": Zstandard compression.
+            Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
+            compression_level: Compression level to use. For ZSTD, the
+                recommended range is 1 to 22, with the default being 4. Higher levels
+                provide better compression but slower speed.
+        """
+        # Convert string to Compression enum if necessary
+        if isinstance(compression, str):
+            compression = Compression.from_str(compression)
+
+        if (
+            compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}
+            and compression_level is None
+        ):
+            compression_level = compression.get_default_level()
+
+        self.df.write_parquet(str(path), compression.value, compression_level)
+
+    def write_parquet_options(
+        self, path: str | pathlib.Path, options: ParquetWriterOptions
+    ) -> None:
+        """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
+
+        Allows advanced writer options to be set with `ParquetWriterOptions`.
+
+        Args:
+            path: Path of the Parquet file to write.
+            options: Sets the writer parquet options (see `ParquetWriterOptions`).
         """
         options_internal = ParquetWriterOptionsInternal(
-            data_pagesize_limit,
-            write_batch_size,
-            writer_version,
-            skip_arrow_metadata,
-            compression,
-            dictionary_enabled,
-            dictionary_page_size_limit,
-            statistics_enabled,
-            max_row_group_size,
-            created_by,
-            column_index_truncate_length,
-            statistics_truncate_length,
-            data_page_row_count_limit,
-            encoding,
-            bloom_filter_on_write,
-            bloom_filter_fpp,
-            bloom_filter_ndv,
-            allow_single_file_parallelism,
-            maximum_parallel_row_group_writers,
-            maximum_buffered_record_batches_per_stream,
+            options.data_pagesize_limit,
+            options.write_batch_size,
+            options.writer_version,
+            options.skip_arrow_metadata,
+            options.compression,
+            options.dictionary_enabled,
+            options.dictionary_page_size_limit,
+            options.statistics_enabled,
+            options.max_row_group_size,
+            options.created_by,
+            options.column_index_truncate_length,
+            options.statistics_truncate_length,
+            options.data_page_row_count_limit,
+            options.encoding,
+            options.bloom_filter_on_write,
+            options.bloom_filter_fpp,
+            options.bloom_filter_ndv,
+            options.allow_single_file_parallelism,
+            options.maximum_parallel_row_group_writers,
+            options.maximum_buffered_record_batches_per_stream,
         )
 
-        if column_specific_options is None:
-            column_specific_options = {}
-
         column_specific_options_internal = {}
-        for column, opts in column_specific_options.items():
+        for column, opts in (options.column_specific_options or {}).items():
             column_specific_options_internal[column] = ParquetColumnOptionsInternal(
                 bloom_filter_enabled=opts.bloom_filter_enabled,
                 encoding=opts.encoding,
@@ -818,7 +952,7 @@ def write_parquet(
                 bloom_filter_ndv=opts.bloom_filter_ndv,
             )
 
-        self.df.write_parquet(
+        self.df.write_parquet_options(
             str(path),
             options_internal,
             column_specific_options_internal,
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index e1e29c45c..8ad62f79f 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -23,6 +23,7 @@
 import pytest
 from datafusion import (
     DataFrame,
+    ParquetWriterOptions,
     ParquetColumnOptions,
     SessionContext,
     WindowFrame,
@@ -1549,7 +1550,70 @@ def test_write_parquet(df, tmp_path, path_to_str):
     assert result == expected
 
 
-def test_write_parquet_default_compression(df, tmp_path):
+@pytest.mark.parametrize(
+    ("compression", "compression_level"),
+    [("gzip", 6), ("brotli", 7), ("zstd", 15)],
+)
+def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
+    path = tmp_path
+
+    df.write_parquet(
+        str(path), compression=compression, compression_level=compression_level
+    )
+
+    # test that the actual compression scheme is the one written
+    for _root, _dirs, files in os.walk(path):
+        for file in files:
+            if file.endswith(".parquet"):
+                metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict()
+                for row_group in metadata["row_groups"]:
+                    for columns in row_group["columns"]:
+                        assert columns["compression"].lower() == compression
+
+    result = pq.read_table(str(path)).to_pydict()
+    expected = df.to_pydict()
+
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ("compression", "compression_level"),
+    [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)],
+)
+def test_write_compressed_parquet_wrong_compression_level(
+    df, tmp_path, compression, compression_level
+):
+    path = tmp_path
+
+    with pytest.raises(ValueError):
+        df.write_parquet(
+            str(path),
+            compression=compression,
+            compression_level=compression_level,
+        )
+
+
+@pytest.mark.parametrize("compression", ["wrong"])
+def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression):
+    path = tmp_path
+
+    with pytest.raises(ValueError):
+        df.write_parquet(str(path), compression=compression)
+
+
+# not testing lzo because it it not implemented yet
+# https://github.com/apache/arrow-rs/issues/6970
+@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"])
+def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression):
+    # Test write_parquet with zstd, brotli, gzip default compression level,
+    # ie don't specify compression level
+    # should complete without error
+    path = tmp_path
+
+    df.write_parquet(str(path), compression=compression)
+
+
+def test_write_parquet_options_default_compression(df, tmp_path):
     """Test that the default compression is ZSTD."""
     df.write_parquet(tmp_path)
 
@@ -1564,11 +1628,11 @@ def test_write_parquet_default_compression(df, tmp_path):
     "compression",
     ["gzip(6)", "brotli(7)", "zstd(15)", "snappy", "uncompressed"],
 )
-def test_write_compressed_parquet(df, tmp_path, compression):
+def test_write_parquet_options_compression(df, tmp_path, compression):
     import re
 
     path = tmp_path
-    df.write_parquet(str(path), compression=compression)
+    df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
 
     # test that the actual compression scheme is the one written
     for _root, _dirs, files in os.walk(path):
@@ -1591,32 +1655,32 @@ def test_write_compressed_parquet(df, tmp_path, compression):
     "compression",
     ["gzip(12)", "brotli(15)", "zstd(23)"],
 )
-def test_write_compressed_parquet_wrong_compression_level(df, tmp_path, compression):
+def test_write_parquet_options_wrong_compression_level(df, tmp_path, compression):
     path = tmp_path
 
     with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
-        df.write_parquet(str(path), compression=compression)
+        df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
 
 
 @pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
-def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression):
+def test_write_parquet_options_invalid_compression(df, tmp_path, compression):
     path = tmp_path
 
     with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
-        df.write_parquet(str(path), compression=compression)
+        df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
 
 
 @pytest.mark.parametrize(
     ("writer_version", "format_version"),
     [("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
 )
-def test_write_parquet_writer_version(df, tmp_path, writer_version, format_version):
+def test_write_parquet_options_writer_version(df, tmp_path, writer_version, format_version):
     """Test the Parquet writer version. Note that writer_version=2.0 results in
     format_version=2.6"""
     if writer_version is None:
-        df.write_parquet(tmp_path)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions())
     else:
-        df.write_parquet(tmp_path, writer_version=writer_version)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1625,18 +1689,18 @@ def test_write_parquet_writer_version(df, tmp_path, writer_version, format_versi
 
 
 @pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"])
-def test_write_parquet_wrong_writer_version(df, tmp_path, writer_version):
+def test_write_parquet_options_wrong_writer_version(df, tmp_path, writer_version):
     """Test that invalid writer versions in Parquet throw an exception."""
     with pytest.raises(
         Exception, match="Unknown or unsupported parquet writer version"
     ):
-        df.write_parquet(tmp_path, writer_version=writer_version)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
 
 
 @pytest.mark.parametrize("dictionary_enabled", [True, False, None])
-def test_write_parquet_dictionary_enabled(df, tmp_path, dictionary_enabled):
+def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
     """Test enabling/disabling the dictionaries in Parquet."""
-    df.write_parquet(tmp_path, dictionary_enabled=dictionary_enabled)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
     # by default, the dictionary is enabled, so None results in True
     result = dictionary_enabled if dictionary_enabled is not None else True
 
@@ -1653,12 +1717,12 @@ def test_write_parquet_dictionary_enabled(df, tmp_path, dictionary_enabled):
     ("statistics_enabled", "has_statistics"),
     [("page", True), ("chunk", True), ("none", False), (None, True)],
 )
-def test_write_parquet_statistics_enabled(
+def test_write_parquet_options_statistics_enabled(
     df, tmp_path, statistics_enabled, has_statistics
 ):
     """Test configuring the statistics in Parquet. In pyarrow we can only check for
     column-level statistics, so "page" and "chunk" are tested in the same way."""
-    df.write_parquet(tmp_path, statistics_enabled=statistics_enabled)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1673,11 +1737,11 @@ def test_write_parquet_statistics_enabled(
 
 
 @pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
-def test_write_parquet_max_row_group_size(large_df, tmp_path, max_row_group_size):
+def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
     """Test configuring the max number of rows per group in Parquet. These test cases
     guarantee that the number of rows for each row group is max_row_group_size, given
     the total number of rows is a multiple of max_row_group_size."""
-    large_df.write_parquet(tmp_path, max_row_group_size=max_row_group_size)
+    large_df.write_parquet_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1687,9 +1751,9 @@ def test_write_parquet_max_row_group_size(large_df, tmp_path, max_row_group_size
 
 
 @pytest.mark.parametrize("created_by", ["datafusion", "datafusion-python", "custom"])
-def test_write_parquet_created_by(df, tmp_path, created_by):
+def test_write_parquet_options_created_by(df, tmp_path, created_by):
     """Test configuring the created by metadata in Parquet."""
-    df.write_parquet(tmp_path, created_by=created_by)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions(created_by=created_by))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1698,7 +1762,7 @@ def test_write_parquet_created_by(df, tmp_path, created_by):
 
 
 @pytest.mark.parametrize("statistics_truncate_length", [5, 25, 50])
-def test_write_parquet_statistics_truncate_length(
+def test_write_parquet_options_statistics_truncate_length(
     df, tmp_path, statistics_truncate_length
 ):
     """Test configuring the truncate limit in Parquet's row-group-level statistics."""
@@ -1712,7 +1776,7 @@ def test_write_parquet_statistics_truncate_length(
         "b": ["a_smaller", "m_smaller", "z_smaller"],
     }
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet(tmp_path, statistics_truncate_length=statistics_truncate_length)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1725,7 +1789,7 @@ def test_write_parquet_statistics_truncate_length(
                 assert len(statistics["max"]) <= statistics_truncate_length
 
 
-def test_write_parquet_default_encoding(tmp_path):
+def test_write_parquet_options_default_encoding(tmp_path):
     """Test that, by default, Parquet files are written with dictionary encoding.
     Note that dictionary encoding is not used for boolean values, so it is not tested
     here."""
@@ -1736,7 +1800,7 @@ def test_write_parquet_default_encoding(tmp_path):
         "c": [1.01, 2.02, 3.03],
     }
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet(tmp_path)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions())
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1758,24 +1822,23 @@ def test_write_parquet_default_encoding(tmp_path):
         ("byte_stream_split", ["int", "float"], ("RLE", "BYTE_STREAM_SPLIT")),
     ],
 )
-def test_write_parquet_encoding(tmp_path, encoding, data_types, result):
+def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
     """Test different encodings in Parquet in their respective support column types."""
     ctx = SessionContext()
 
     data = {}
     for data_type in data_types:
-        match data_type:
-            case "int":
-                data["int"] = [1, 2, 3]
-            case "float":
-                data["float"] = [1.01, 2.02, 3.03]
-            case "str":
-                data["str"] = ["a", "b", "c"]
-            case "bool":
-                data["bool"] = [True, False, True]
+        if data_type == "int":
+            data["int"] = [1, 2, 3]
+        elif data_type == "float":
+            data["float"] = [1.01, 2.02, 3.03]
+        elif data_type == "str":
+            data["str"] = ["a", "b", "c"]
+        elif  data_type == "bool":
+            data["bool"] = [True, False, True]
 
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet(tmp_path, encoding=encoding, dictionary_enabled=False)
+    df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1787,39 +1850,39 @@ def test_write_parquet_encoding(tmp_path, encoding, data_types, result):
 
 
 @pytest.mark.parametrize("encoding", ["bit_packed"])
-def test_write_parquet_unsupported_encoding(df, tmp_path, encoding):
+def test_write_parquet_options_unsupported_encoding(df, tmp_path, encoding):
     """Test that unsupported Parquet encodings do not work."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
     with pytest.raises(BaseException, match="Encoding .*? is not supported"):
-        df.write_parquet(tmp_path, encoding=encoding)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
 @pytest.mark.parametrize("encoding", ["non_existent", "unknown", "plain123"])
-def test_write_parquet_invalid_encoding(df, tmp_path, encoding):
+def test_write_parquet_options_invalid_encoding(df, tmp_path, encoding):
     """Test that invalid Parquet encodings do not work."""
     with pytest.raises(Exception, match="Unknown or unsupported parquet encoding"):
-        df.write_parquet(tmp_path, encoding=encoding)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
 @pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
-def test_write_parquet_dictionary_encoding_fallback(df, tmp_path, encoding):
+def test_write_parquet_options_dictionary_encoding_fallback(df, tmp_path, encoding):
     """Test that the dictionary encoding cannot be used as fallback in Parquet."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
     with pytest.raises(
         BaseException, match="Dictionary encoding can not be used as fallback encoding"
     ):
-        df.write_parquet(tmp_path, encoding=encoding)
+        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
-def test_write_parquet_bloom_filter(df, tmp_path):
+def test_write_parquet_options_bloom_filter(df, tmp_path):
     """Test Parquet files with and without (default) bloom filters. Since pyarrow does
     not expose any information about bloom filters, the easiest way to confirm that they
     are actually written is to compare the file size."""
     path_no_bloom_filter = tmp_path / "1"
     path_bloom_filter = tmp_path / "2"
 
-    df.write_parquet(path_no_bloom_filter)
-    df.write_parquet(path_bloom_filter, bloom_filter_on_write=True)
+    df.write_parquet_options(path_no_bloom_filter, ParquetWriterOptions())
+    df.write_parquet_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
 
     size_no_bloom_filter = 0
     for file in path_no_bloom_filter.rglob("*.parquet"):
@@ -1832,7 +1895,7 @@ def test_write_parquet_bloom_filter(df, tmp_path):
     assert size_no_bloom_filter < size_bloom_filter
 
 
-def test_write_parquet_column_options(df, tmp_path):
+def test_write_parquet_options_column_options(df, tmp_path):
     """Test writing Parquet files with different options for each column, which replace
     the global configs (when provided)."""
     data = {
@@ -1888,10 +1951,10 @@ def test_write_parquet_column_options(df, tmp_path):
 
     ctx = SessionContext()
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet(
+    df.write_parquet_options(
         tmp_path,
-        compression="brotli(8)",
-        column_specific_options=column_specific_options,
+        ParquetWriterOptions(compression="brotli(8)",
+        column_specific_options=column_specific_options),
     )
 
     for file in tmp_path.rglob("*.parquet"):
diff --git a/src/dataframe.rs b/src/dataframe.rs
index ffb3f36cf..eca039c44 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -33,6 +33,7 @@ use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::datasource::TableProvider;
 use datafusion::error::DataFusionError;
 use datafusion::execution::SendableRecordBatchStream;
+use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
 use datafusion::prelude::*;
 use futures::{StreamExt, TryStreamExt};
 use pyo3::exceptions::PyValueError;
@@ -708,7 +709,68 @@ impl PyDataFrame {
     }
 
     /// Write a `DataFrame` to a Parquet file.
+    #[pyo3(signature = (
+        path,
+        compression="zstd",
+        compression_level=None
+        ))]
     fn write_parquet(
+        &self,
+        path: &str,
+        compression: &str,
+        compression_level: Option<u32>,
+        py: Python,
+    ) -> PyDataFusionResult<()> {
+        fn verify_compression_level(cl: Option<u32>) -> Result<u32, PyErr> {
+            cl.ok_or(PyValueError::new_err("compression_level is not defined"))
+        }
+
+        let _validated = match compression.to_lowercase().as_str() {
+            "snappy" => Compression::SNAPPY,
+            "gzip" => Compression::GZIP(
+                GzipLevel::try_new(compression_level.unwrap_or(6))
+                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
+            ),
+            "brotli" => Compression::BROTLI(
+                BrotliLevel::try_new(verify_compression_level(compression_level)?)
+                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
+            ),
+            "zstd" => Compression::ZSTD(
+                ZstdLevel::try_new(verify_compression_level(compression_level)? as i32)
+                    .map_err(|e| PyValueError::new_err(format!("{e}")))?,
+            ),
+            "lzo" => Compression::LZO,
+            "lz4" => Compression::LZ4,
+            "lz4_raw" => Compression::LZ4_RAW,
+            "uncompressed" => Compression::UNCOMPRESSED,
+            _ => {
+                return Err(PyDataFusionError::Common(format!(
+                    "Unrecognized compression type {compression}"
+                )));
+            }
+        };
+
+        let mut compression_string = compression.to_string();
+        if let Some(level) = compression_level {
+            compression_string.push_str(&format!("({level})"));
+        }
+
+        let mut options = TableParquetOptions::default();
+        options.global.compression = Some(compression_string);
+
+        wait_for_future(
+            py,
+            self.df.as_ref().clone().write_parquet(
+                path,
+                DataFrameWriteOptions::new(),
+                Option::from(options),
+            ),
+        )?;
+        Ok(())
+    }
+
+    /// Write a `DataFrame` to a Parquet file, using advanced options.
+    fn write_parquet_options(
         &self,
         path: &str,
         options: PyParquetWriterOptions,

From b738b19a18c2485cd0ad7de3c205cf6b98788396 Mon Sep 17 00:00:00 2001
From: nuno-faria <nunofpfaria@gmail.com>
Date: Fri, 20 Jun 2025 08:58:58 +0100
Subject: [PATCH 3/5] Rename write_parquet_options to
 write_parquet_with_options

---
 python/datafusion/dataframe.py |  4 +-
 python/tests/test_dataframe.py | 74 +++++++++++++++++-----------------
 src/dataframe.rs               |  2 +-
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 3c8c09b38..014331541 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -906,7 +906,7 @@ def write_parquet(
 
         self.df.write_parquet(str(path), compression.value, compression_level)
 
-    def write_parquet_options(
+    def write_parquet_with_options(
         self, path: str | pathlib.Path, options: ParquetWriterOptions
     ) -> None:
         """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
@@ -952,7 +952,7 @@ def write_parquet_options(
                 bloom_filter_ndv=opts.bloom_filter_ndv,
             )
 
-        self.df.write_parquet_options(
+        self.df.write_parquet_with_options(
             str(path),
             options_internal,
             column_specific_options_internal,
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 8ad62f79f..daa4331df 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -1613,7 +1613,7 @@ def test_write_compressed_parquet_default_compression_level(df, tmp_path, compre
     df.write_parquet(str(path), compression=compression)
 
 
-def test_write_parquet_options_default_compression(df, tmp_path):
+def test_write_parquet_with_options_default_compression(df, tmp_path):
     """Test that the default compression is ZSTD."""
     df.write_parquet(tmp_path)
 
@@ -1628,11 +1628,11 @@ def test_write_parquet_options_default_compression(df, tmp_path):
     "compression",
     ["gzip(6)", "brotli(7)", "zstd(15)", "snappy", "uncompressed"],
 )
-def test_write_parquet_options_compression(df, tmp_path, compression):
+def test_write_parquet_with_options_compression(df, tmp_path, compression):
     import re
 
     path = tmp_path
-    df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
+    df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
 
     # test that the actual compression scheme is the one written
     for _root, _dirs, files in os.walk(path):
@@ -1655,32 +1655,32 @@ def test_write_parquet_options_compression(df, tmp_path, compression):
     "compression",
     ["gzip(12)", "brotli(15)", "zstd(23)"],
 )
-def test_write_parquet_options_wrong_compression_level(df, tmp_path, compression):
+def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compression):
     path = tmp_path
 
     with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
-        df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
+        df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
 
 
 @pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
-def test_write_parquet_options_invalid_compression(df, tmp_path, compression):
+def test_write_parquet_with_options_invalid_compression(df, tmp_path, compression):
     path = tmp_path
 
     with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
-        df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
+        df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
 
 
 @pytest.mark.parametrize(
     ("writer_version", "format_version"),
     [("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
 )
-def test_write_parquet_options_writer_version(df, tmp_path, writer_version, format_version):
+def test_write_parquet_with_options_writer_version(df, tmp_path, writer_version, format_version):
     """Test the Parquet writer version. Note that writer_version=2.0 results in
     format_version=2.6"""
     if writer_version is None:
-        df.write_parquet_options(tmp_path, ParquetWriterOptions())
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
     else:
-        df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1689,18 +1689,18 @@ def test_write_parquet_options_writer_version(df, tmp_path, writer_version, form
 
 
 @pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"])
-def test_write_parquet_options_wrong_writer_version(df, tmp_path, writer_version):
+def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_version):
     """Test that invalid writer versions in Parquet throw an exception."""
     with pytest.raises(
         Exception, match="Unknown or unsupported parquet writer version"
     ):
-        df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
 
 
 @pytest.mark.parametrize("dictionary_enabled", [True, False, None])
-def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
+def test_write_parquet_with_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
     """Test enabling/disabling the dictionaries in Parquet."""
-    df.write_parquet_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
     # by default, the dictionary is enabled, so None results in True
     result = dictionary_enabled if dictionary_enabled is not None else True
 
@@ -1717,12 +1717,12 @@ def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabl
     ("statistics_enabled", "has_statistics"),
     [("page", True), ("chunk", True), ("none", False), (None, True)],
 )
-def test_write_parquet_options_statistics_enabled(
+def test_write_parquet_with_options_statistics_enabled(
     df, tmp_path, statistics_enabled, has_statistics
 ):
     """Test configuring the statistics in Parquet. In pyarrow we can only check for
     column-level statistics, so "page" and "chunk" are tested in the same way."""
-    df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1737,11 +1737,11 @@ def test_write_parquet_options_statistics_enabled(
 
 
 @pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
-def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
+def test_write_parquet_with_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
     """Test configuring the max number of rows per group in Parquet. These test cases
     guarantee that the number of rows for each row group is max_row_group_size, given
     the total number of rows is a multiple of max_row_group_size."""
-    large_df.write_parquet_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
+    large_df.write_parquet_with_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1751,9 +1751,9 @@ def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_gr
 
 
 @pytest.mark.parametrize("created_by", ["datafusion", "datafusion-python", "custom"])
-def test_write_parquet_options_created_by(df, tmp_path, created_by):
+def test_write_parquet_with_options_created_by(df, tmp_path, created_by):
     """Test configuring the created by metadata in Parquet."""
-    df.write_parquet_options(tmp_path, ParquetWriterOptions(created_by=created_by))
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(created_by=created_by))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1762,7 +1762,7 @@ def test_write_parquet_options_created_by(df, tmp_path, created_by):
 
 
 @pytest.mark.parametrize("statistics_truncate_length", [5, 25, 50])
-def test_write_parquet_options_statistics_truncate_length(
+def test_write_parquet_with_options_statistics_truncate_length(
     df, tmp_path, statistics_truncate_length
 ):
     """Test configuring the truncate limit in Parquet's row-group-level statistics."""
@@ -1776,7 +1776,7 @@ def test_write_parquet_options_statistics_truncate_length(
         "b": ["a_smaller", "m_smaller", "z_smaller"],
     }
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1789,7 +1789,7 @@ def test_write_parquet_options_statistics_truncate_length(
                 assert len(statistics["max"]) <= statistics_truncate_length
 
 
-def test_write_parquet_options_default_encoding(tmp_path):
+def test_write_parquet_with_options_default_encoding(tmp_path):
     """Test that, by default, Parquet files are written with dictionary encoding.
     Note that dictionary encoding is not used for boolean values, so it is not tested
     here."""
@@ -1800,7 +1800,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
         "c": [1.01, 2.02, 3.03],
     }
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_options(tmp_path, ParquetWriterOptions())
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1822,7 +1822,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
         ("byte_stream_split", ["int", "float"], ("RLE", "BYTE_STREAM_SPLIT")),
     ],
 )
-def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
+def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, result):
     """Test different encodings in Parquet in their respective support column types."""
     ctx = SessionContext()
 
@@ -1838,7 +1838,7 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
             data["bool"] = [True, False, True]
 
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
+    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1850,39 +1850,39 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
 
 
 @pytest.mark.parametrize("encoding", ["bit_packed"])
-def test_write_parquet_options_unsupported_encoding(df, tmp_path, encoding):
+def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding):
     """Test that unsupported Parquet encodings do not work."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
     with pytest.raises(BaseException, match="Encoding .*? is not supported"):
-        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
 @pytest.mark.parametrize("encoding", ["non_existent", "unknown", "plain123"])
-def test_write_parquet_options_invalid_encoding(df, tmp_path, encoding):
+def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding):
     """Test that invalid Parquet encodings do not work."""
     with pytest.raises(Exception, match="Unknown or unsupported parquet encoding"):
-        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
 @pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
-def test_write_parquet_options_dictionary_encoding_fallback(df, tmp_path, encoding):
+def test_write_parquet_with_options_dictionary_encoding_fallback(df, tmp_path, encoding):
     """Test that the dictionary encoding cannot be used as fallback in Parquet."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
     with pytest.raises(
         BaseException, match="Dictionary encoding can not be used as fallback encoding"
     ):
-        df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
+        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
-def test_write_parquet_options_bloom_filter(df, tmp_path):
+def test_write_parquet_with_options_bloom_filter(df, tmp_path):
     """Test Parquet files with and without (default) bloom filters. Since pyarrow does
     not expose any information about bloom filters, the easiest way to confirm that they
     are actually written is to compare the file size."""
     path_no_bloom_filter = tmp_path / "1"
     path_bloom_filter = tmp_path / "2"
 
-    df.write_parquet_options(path_no_bloom_filter, ParquetWriterOptions())
-    df.write_parquet_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
+    df.write_parquet_with_options(path_no_bloom_filter, ParquetWriterOptions())
+    df.write_parquet_with_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
 
     size_no_bloom_filter = 0
     for file in path_no_bloom_filter.rglob("*.parquet"):
@@ -1895,7 +1895,7 @@ def test_write_parquet_options_bloom_filter(df, tmp_path):
     assert size_no_bloom_filter < size_bloom_filter
 
 
-def test_write_parquet_options_column_options(df, tmp_path):
+def test_write_parquet_with_options_column_options(df, tmp_path):
     """Test writing Parquet files with different options for each column, which replace
     the global configs (when provided)."""
     data = {
@@ -1951,7 +1951,7 @@ def test_write_parquet_options_column_options(df, tmp_path):
 
     ctx = SessionContext()
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_options(
+    df.write_parquet_with_options(
         tmp_path,
         ParquetWriterOptions(compression="brotli(8)",
         column_specific_options=column_specific_options),
diff --git a/src/dataframe.rs b/src/dataframe.rs
index eca039c44..afe25ea7c 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -770,7 +770,7 @@ impl PyDataFrame {
     }
 
     /// Write a `DataFrame` to a Parquet file, using advanced options.
-    fn write_parquet_options(
+    fn write_parquet_with_options(
         &self,
         path: &str,
         options: PyParquetWriterOptions,

From 567955f8b10eb841f3d7bd21aec8de699862c51d Mon Sep 17 00:00:00 2001
From: nuno-faria <nunofpfaria@gmail.com>
Date: Fri, 20 Jun 2025 09:14:53 +0100
Subject: [PATCH 4/5] Merge remote-tracking branch 'origin/main' into
 write_parquet_options

---
 .github/workflows/build.yml                   |    8 +-
 .github/workflows/docs.yaml                   |    2 +-
 .github/workflows/test.yaml                   |   13 +-
 Cargo.lock                                    | 1064 +++++++------
 Cargo.toml                                    |   16 +-
 dev/changelog/47.0.0.md                       |   64 +
 dev/release/README.md                         |   20 +-
 docs/source/api/dataframe.rst                 |  387 +++++
 docs/source/api/index.rst                     |   27 +
 docs/source/conf.py                           |    4 +
 docs/source/index.rst                         |    2 +
 .../common-operations/functions.rst           |   21 +
 .../common-operations/udf-and-udfa.rst        |   44 +-
 docs/source/user-guide/dataframe.rst          |    3 +-
 .../.cargo/config.toml                        |    0
 .../Cargo.lock                                | 1333 ++++++++++-------
 .../Cargo.toml                                |   12 +-
 .../build.rs                                  |    0
 .../pyproject.toml                            |    2 +-
 .../python/tests/_test_table_function.py      |  134 ++
 .../python/tests/_test_table_provider.py      |    4 +-
 examples/datafusion-ffi-example/src/lib.rs    |   30 +
 .../src/table_function.rs                     |   56 +
 .../src/table_provider.rs}                    |   48 +-
 examples/python-udwf.py                       |    2 +-
 pyproject.toml                                |    4 +
 python/datafusion/__init__.py                 |   33 +-
 python/datafusion/catalog.py                  |   12 +
 python/datafusion/context.py                  |   76 +-
 python/datafusion/dataframe.py                |   30 +-
 python/datafusion/expr.py                     |   18 +
 python/datafusion/io.py                       |    8 +-
 python/datafusion/udf.py                      |  756 +---------
 python/datafusion/user_defined.py             |  845 +++++++++++
 python/tests/test_dataframe.py                |  387 +++++
 python/tests/test_expr.py                     |  523 ++++++-
 python/tests/test_functions.py                |   61 +
 python/tests/test_imports.py                  |    2 +-
 python/tests/test_sql.py                      |   26 +-
 python/tests/test_udwf.py                     |    2 +-
 python/tests/test_wrapper_coverage.py         |    7 +-
 src/catalog.rs                                |    2 +-
 src/config.rs                                 |   21 +-
 src/context.rs                                |  195 ++-
 src/dataframe.rs                              |   90 +-
 src/errors.rs                                 |    4 +-
 src/expr.rs                                   |   48 +-
 src/expr/literal.rs                           |   16 +-
 src/expr/window.rs                            |   29 +-
 src/functions.rs                              |    6 +-
 src/lib.rs                                    |    2 +
 src/pyarrow_filter_expression.rs              |    4 +-
 src/record_batch.rs                           |    2 +-
 src/substrait.rs                              |   11 +-
 src/udtf.rs                                   |  127 ++
 src/udwf.rs                                   |    8 +-
 src/utils.rs                                  |   60 +-
 57 files changed, 4659 insertions(+), 2052 deletions(-)
 create mode 100644 dev/changelog/47.0.0.md
 create mode 100644 docs/source/api/dataframe.rst
 create mode 100644 docs/source/api/index.rst
 rename examples/{ffi-table-provider => datafusion-ffi-example}/.cargo/config.toml (100%)
 rename examples/{ffi-table-provider => datafusion-ffi-example}/Cargo.lock (71%)
 rename examples/{ffi-table-provider => datafusion-ffi-example}/Cargo.toml (83%)
 rename examples/{ffi-table-provider => datafusion-ffi-example}/build.rs (100%)
 rename examples/{ffi-table-provider => datafusion-ffi-example}/pyproject.toml (97%)
 create mode 100644 examples/datafusion-ffi-example/python/tests/_test_table_function.py
 rename examples/{ffi-table-provider => datafusion-ffi-example}/python/tests/_test_table_provider.py (94%)
 create mode 100644 examples/datafusion-ffi-example/src/lib.rs
 create mode 100644 examples/datafusion-ffi-example/src/table_function.rs
 rename examples/{ffi-table-provider/src/lib.rs => datafusion-ffi-example/src/table_provider.rs} (71%)
 create mode 100644 python/datafusion/user_defined.py
 create mode 100644 src/udtf.rs

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index acabad3ca..61896e43d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -33,7 +33,7 @@ jobs:
         with:
           python-version: "3.12"
 
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
@@ -52,7 +52,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
@@ -94,7 +94,7 @@ jobs:
           version: "27.4"
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
@@ -150,7 +150,7 @@ jobs:
           version: "27.4"
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 9037e0a5c..9341488a0 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -58,7 +58,7 @@ jobs:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Install dependencies and build
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index da3582766..4ae081406 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -76,10 +76,17 @@ jobs:
         run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure
 
       - name: Install dependencies and build
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
             enable-cache: true
 
+      - name: Check documentation
+        if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
+        run: |
+          uv sync --dev --group docs --no-install-package datafusion
+          uv run --no-project maturin develop --uv
+          uv run --no-project docs/build.sh
+
       - name: Run tests
         env:
           RUST_BACKTRACE: 1
@@ -91,9 +98,9 @@ jobs:
 
       - name: FFI unit tests
         run: |
-          cd examples/ffi-table-provider
+          cd examples/datafusion-ffi-example
           uv run --no-project maturin develop --uv
-          uv run --no-project pytest python/tests/_test_table_provider.py
+          uv run --no-project pytest python/tests/_test*.py
 
       - name: Cache the generated dataset
         id: cache-tpch-dataset
diff --git a/Cargo.lock b/Cargo.lock
index b32d19d4d..112167cb4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -73,13 +73,13 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
 
 [[package]]
 name = "ahash"
-version = "0.8.11"
+version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.2.15",
+ "getrandom 0.3.3",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -132,9 +132,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.95"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 
 [[package]]
 name = "apache-avro"
@@ -150,7 +150,7 @@ dependencies = [
  "log",
  "num-bigint",
  "quad-rand",
- "rand",
+ "rand 0.8.5",
  "regex-lite",
  "serde",
  "serde_bytes",
@@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d"
+checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -201,9 +201,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00752064ff47cee746e816ddb8450520c3a52cbad1e256f6fa861a35f86c45e7"
+checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -215,9 +215,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472"
+checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -226,15 +226,15 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "num",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824"
+checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce"
 dependencies = [
  "bytes",
  "half",
@@ -243,9 +243,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335f769c5a218ea823d3760a743feba1ef7857cba114c01399a891c2fff34285"
+checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -264,9 +264,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "510db7dfbb4d5761826516cc611d97b3a68835d0ece95b034a052601109c0b1b"
+checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -280,9 +280,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b"
+checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -292,9 +292,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69880a9e6934d9cba2b8630dd08a3463a91db8693b16b499d54026b6137af284"
+checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,9 +306,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8dafd17a05449e31e0114d740530e0ada7379d7cb9c338fd65b09a8130960b0"
+checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -328,9 +328,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "895644523af4e17502d42c3cb6b27cb820f0cb77954c22d75c23a85247c849e1"
+checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -341,9 +341,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be8a2a4e5e7d9c822b2b8095ecd77010576d824f654d347817640acfc97d229"
+checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -354,18 +354,20 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49"
+checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b"
 dependencies = [
  "bitflags",
+ "serde",
+ "serde_json",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa5f5a93c75f46ef48e4001535e7b6c922eeb0aa20b73cf58d09e13d057490d8"
+checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -377,9 +379,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e7005d858d84b56428ba2a98a107fe88c0132c61793cf6b8232a1f9bfc0452b"
+checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -438,7 +440,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -449,7 +451,7 @@ checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -475,9 +477,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
 [[package]]
 name = "backtrace"
-version = "0.3.74"
+version = "0.3.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
 dependencies = [
  "addr2line",
  "cfg-if",
@@ -485,7 +487,7 @@ dependencies = [
  "miniz_oxide",
  "object",
  "rustc-demangle",
- "windows-targets",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -516,9 +518,9 @@ dependencies = [
 
 [[package]]
 name = "bitflags"
-version = "2.8.0"
+version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
 
 [[package]]
 name = "blake2"
@@ -531,9 +533,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.8.1"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "389a099b34312839e16420d499a9cad9650541715937ffbdd40d36f49e77eeb3"
+checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -553,9 +555,9 @@ dependencies = [
 
 [[package]]
 name = "brotli"
-version = "7.0.0"
+version = "8.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -564,9 +566,9 @@ dependencies = [
 
 [[package]]
 name = "brotli-decompressor"
-version = "4.0.2"
+version = "5.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37"
+checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -586,9 +588,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 
 [[package]]
 name = "bzip2"
@@ -621,9 +623,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.14"
+version = "1.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9"
+checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766"
 dependencies = [
  "jobserver",
  "libc",
@@ -644,9 +646,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.40"
+version = "0.4.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
@@ -657,9 +659,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz"
-version = "0.10.1"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f"
+checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3"
 dependencies = [
  "chrono",
  "chrono-tz-build",
@@ -668,9 +670,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz-build"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7"
+checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402"
 dependencies = [
  "parse-zoneinfo",
  "phf_codegen",
@@ -710,7 +712,7 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "once_cell",
  "tiny-keccak",
 ]
@@ -787,9 +789,9 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.14"
+version = "0.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
 dependencies = [
  "crossbeam-utils",
 ]
@@ -859,9 +861,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080"
+checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -887,7 +889,6 @@ dependencies = [
  "datafusion-functions-nested",
  "datafusion-functions-table",
  "datafusion-functions-window",
- "datafusion-macros",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -902,7 +903,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand",
+ "rand 0.9.1",
  "regex",
  "sqlparser",
  "tempfile",
@@ -915,9 +916,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66"
+checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee"
 dependencies = [
  "arrow",
  "async-trait",
@@ -941,9 +942,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00"
+checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038"
 dependencies = [
  "arrow",
  "async-trait",
@@ -964,9 +965,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c"
+checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -989,9 +990,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b"
+checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2"
 dependencies = [
  "futures",
  "log",
@@ -1000,9 +1001,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1"
+checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1025,7 +1026,7 @@ dependencies = [
  "log",
  "object_store",
  "parquet",
- "rand",
+ "rand 0.9.1",
  "tempfile",
  "tokio",
  "tokio-util",
@@ -1036,9 +1037,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4ea5111aab9d3f2a8bff570343cccb03ce4c203875ef5a566b7d6f1eb72559e"
+checksum = "1371cb4ef13c2e3a15685d37a07398cf13e3b0a85e705024b769fc4c511f5fef"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -1061,9 +1062,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6"
+checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1086,9 +1087,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352"
+checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1111,9 +1112,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923"
+checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1136,21 +1137,21 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand",
+ "rand 0.9.1",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-doc"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292"
+checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea"
 
 [[package]]
 name = "datafusion-execution"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84"
+checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1160,16 +1161,16 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot",
- "rand",
+ "rand 0.9.1",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964"
+checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b"
 dependencies = [
  "arrow",
  "chrono",
@@ -1188,9 +1189,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839"
+checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1201,9 +1202,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-ffi"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c"
+checksum = "980cca31de37f5dadf7ea18e4ffc2b6833611f45bed5ef9de0831d2abb50f1ef"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -1211,7 +1212,9 @@ dependencies = [
  "async-ffi",
  "async-trait",
  "datafusion",
+ "datafusion-functions-aggregate-common",
  "datafusion-proto",
+ "datafusion-proto-common",
  "futures",
  "log",
  "prost",
@@ -1221,9 +1224,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e"
+checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1241,7 +1244,7 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "md-5",
- "rand",
+ "rand 0.9.1",
  "regex",
  "sha2",
  "unicode-segmentation",
@@ -1250,9 +1253,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba"
+checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4"
 dependencies = [
  "ahash",
  "arrow",
@@ -1271,9 +1274,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4"
+checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696"
 dependencies = [
  "ahash",
  "arrow",
@@ -1284,9 +1287,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537"
+checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -1305,9 +1308,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a"
+checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1321,10 +1324,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193"
+checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4"
 dependencies = [
+ "arrow",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-expr",
@@ -1338,9 +1342,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313"
+checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1348,20 +1352,20 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1"
+checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8"
 dependencies = [
  "datafusion-expr",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515"
+checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058"
 dependencies = [
  "arrow",
  "chrono",
@@ -1378,9 +1382,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65"
+checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6"
 dependencies = [
  "ahash",
  "arrow",
@@ -1395,14 +1399,14 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "paste",
- "petgraph",
+ "petgraph 0.8.2",
 ]
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863"
+checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df"
 dependencies = [
  "ahash",
  "arrow",
@@ -1414,9 +1418,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2"
+checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1433,9 +1437,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55"
+checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23"
 dependencies = [
  "ahash",
  "arrow",
@@ -1463,9 +1467,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83"
+checksum = "d8f5d9acd7d96e3bf2a7bb04818373cab6e51de0356e3694b94905fee7b4e8b6"
 dependencies = [
  "arrow",
  "chrono",
@@ -1479,9 +1483,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f"
+checksum = "09ecb5ec152c4353b60f7a5635489834391f7a291d2b39a4820cd469e318b78e"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1490,7 +1494,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-python"
-version = "46.0.0"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1513,9 +1517,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f"
+checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1537,9 +1541,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607"
+checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -1554,9 +1558,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "061efc0937f0ce3abb37ed0d56cfa01dd0e654b90e408656d05e846c8b7599fe"
+checksum = "f2f3973b1a4f6e9ee7fd99a22d58e1c06e6723a28dc911a60df575974c8339aa"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -1590,20 +1594,20 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "dyn-clone"
-version = "1.0.18"
+version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35"
+checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005"
 
 [[package]]
 name = "either"
-version = "1.13.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "equivalent"
@@ -1613,9 +1617,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.10"
+version = "0.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
 dependencies = [
  "libc",
  "windows-sys 0.59.0",
@@ -1662,9 +1666,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "foldhash"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
 [[package]]
 name = "form_urlencoded"
@@ -1731,7 +1735,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1785,9 +1789,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -1798,14 +1802,16 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.3.1"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
- "wasi 0.13.3+wasi-0.2.2",
- "windows-targets",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -1822,9 +1828,9 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
 
 [[package]]
 name = "h2"
-version = "0.4.7"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e"
+checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1862,9 +1868,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.2"
+version = "0.15.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -1885,9 +1891,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
 [[package]]
 name = "http"
-version = "1.2.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
 dependencies = [
  "bytes",
  "fnv",
@@ -1906,12 +1912,12 @@ dependencies = [
 
 [[package]]
 name = "http-body-util"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
- "futures-util",
+ "futures-core",
  "http",
  "http-body",
  "pin-project-lite",
@@ -1919,15 +1925,15 @@ dependencies = [
 
 [[package]]
 name = "httparse"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
 
 [[package]]
 name = "humantime"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
 
 [[package]]
 name = "hyper"
@@ -1969,9 +1975,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
+checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1979,6 +1985,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
+ "libc",
  "pin-project-lite",
  "socket2",
  "tokio",
@@ -1988,14 +1995,15 @@ dependencies = [
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.61"
+version = "0.1.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
+ "log",
  "wasm-bindgen",
  "windows-core",
 ]
@@ -2011,21 +2019,22 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
 dependencies = [
  "displaydoc",
+ "potential_utf",
  "yoke",
  "zerofrom",
  "zerovec",
 ]
 
 [[package]]
-name = "icu_locid"
-version = "1.5.0"
+name = "icu_locale_core"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -2034,31 +2043,11 @@ dependencies = [
  "zerovec",
 ]
 
-[[package]]
-name = "icu_locid_transform"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
-dependencies = [
- "displaydoc",
- "icu_locid",
- "icu_locid_transform_data",
- "icu_provider",
- "tinystr",
- "zerovec",
-]
-
-[[package]]
-name = "icu_locid_transform_data"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
-
 [[package]]
 name = "icu_normalizer"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
 dependencies = [
  "displaydoc",
  "icu_collections",
@@ -2066,67 +2055,54 @@ dependencies = [
  "icu_properties",
  "icu_provider",
  "smallvec",
- "utf16_iter",
- "utf8_iter",
- "write16",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_normalizer_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
 
 [[package]]
 name = "icu_properties"
-version = "1.5.1"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+checksum = "2549ca8c7241c82f59c80ba2a6f415d931c5b58d24fb8412caa1a1f02c49139a"
 dependencies = [
  "displaydoc",
  "icu_collections",
- "icu_locid_transform",
+ "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "tinystr",
+ "potential_utf",
+ "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+checksum = "8197e866e47b68f8f7d95249e172903bec06004b18b2937f1095d40a0c57de04"
 
 [[package]]
 name = "icu_provider"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
 dependencies = [
  "displaydoc",
- "icu_locid",
- "icu_provider_macros",
+ "icu_locale_core",
  "stable_deref_trait",
  "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
+ "zerotrie",
  "zerovec",
 ]
 
-[[package]]
-name = "icu_provider_macros"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "idna"
 version = "1.0.3"
@@ -2140,9 +2116,9 @@ dependencies = [
 
 [[package]]
 name = "idna_adapter"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
 dependencies = [
  "icu_normalizer",
  "icu_properties",
@@ -2155,14 +2131,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
 ]
 
 [[package]]
 name = "indoc"
-version = "2.0.5"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
+checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
 
 [[package]]
 name = "integer-encoding"
@@ -2196,16 +2172,17 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.14"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 
 [[package]]
 name = "jobserver"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
 dependencies = [
+ "getrandom 0.3.3",
  "libc",
 ]
 
@@ -2291,9 +2268,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.171"
+version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
 [[package]]
 name = "libflate"
@@ -2331,15 +2308,15 @@ dependencies = [
 
 [[package]]
 name = "libm"
-version = "0.2.11"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
 
 [[package]]
 name = "libmimalloc-sys"
-version = "0.1.39"
+version = "0.1.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44"
+checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4"
 dependencies = [
  "cc",
  "libc",
@@ -2356,15 +2333,15 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.15"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
 [[package]]
 name = "litemap"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
 
 [[package]]
 name = "lock_api"
@@ -2378,9 +2355,15 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.25"
+version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
 
 [[package]]
 name = "lz4_flex"
@@ -2429,9 +2412,9 @@ dependencies = [
 
 [[package]]
 name = "mimalloc"
-version = "0.1.43"
+version = "0.1.46"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633"
+checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af"
 dependencies = [
  "libmimalloc-sys",
 ]
@@ -2464,9 +2447,9 @@ dependencies = [
 
 [[package]]
 name = "multimap"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
+checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
 
 [[package]]
 name = "num"
@@ -2554,9 +2537,9 @@ dependencies = [
 
 [[package]]
 name = "object_store"
-version = "0.12.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9ce831b09395f933addbc56d894d889e4b226eba304d4e7adbab591e26daf1e"
+checksum = "d94ac16b433c0ccf75326388c893d2835ab7457ea35ab8ba5d745c053ef5fa16"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -2574,25 +2557,27 @@ dependencies = [
  "parking_lot",
  "percent-encoding",
  "quick-xml",
- "rand",
+ "rand 0.9.1",
  "reqwest",
  "ring",
  "rustls-pemfile",
  "serde",
  "serde_json",
  "serde_urlencoded",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tokio",
  "tracing",
  "url",
  "walkdir",
+ "wasm-bindgen-futures",
+ "web-time",
 ]
 
 [[package]]
 name = "once_cell"
-version = "1.20.3"
+version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "openssl-probe"
@@ -2629,14 +2614,14 @@ dependencies = [
  "libc",
  "redox_syscall",
  "smallvec",
- "windows-targets",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
 name = "parquet"
-version = "55.0.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd31a8290ac5b19f09ad77ee7a1e6a541f1be7674ad410547d5f1eef6eef4a9c"
+checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2653,7 +2638,7 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "lz4_flex",
  "num",
  "num-bigint",
@@ -2736,6 +2721,18 @@ dependencies = [
  "indexmap",
 ]
 
+[[package]]
+name = "petgraph"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+dependencies = [
+ "fixedbitset",
+ "hashbrown 0.15.3",
+ "indexmap",
+ "serde",
+]
+
 [[package]]
 name = "phf"
 version = "0.11.3"
@@ -2762,7 +2759,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
 dependencies = [
  "phf_shared",
- "rand",
+ "rand 0.8.5",
 ]
 
 [[package]]
@@ -2788,21 +2785,30 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "portable-atomic"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
+dependencies = [
+ "zerovec",
+]
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 dependencies = [
  "zerocopy",
 ]
@@ -2814,14 +2820,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6"
 dependencies = [
  "proc-macro2",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.93"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 dependencies = [
  "unicode-ident",
 ]
@@ -2843,16 +2849,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
  "heck",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "log",
  "multimap",
  "once_cell",
- "petgraph",
+ "petgraph 0.7.1",
  "prettyplease",
  "prost",
  "prost-types",
  "regex",
- "syn 2.0.100",
+ "syn 2.0.101",
  "tempfile",
 ]
 
@@ -2863,10 +2869,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
  "anyhow",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2889,18 +2895,18 @@ dependencies = [
 
 [[package]]
 name = "psm"
-version = "0.1.25"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88"
+checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "pyo3"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229"
+checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
 dependencies = [
  "cfg-if",
  "indoc",
@@ -2929,9 +2935,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1"
+checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
 dependencies = [
  "once_cell",
  "target-lexicon",
@@ -2939,9 +2945,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc"
+checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -2949,27 +2955,27 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44"
+checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.24.1"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855"
+checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
 dependencies = [
  "heck",
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2980,9 +2986,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
 
 [[package]]
 name = "quick-xml"
-version = "0.37.2"
+version = "0.37.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003"
+checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
 dependencies = [
  "memchr",
  "serde",
@@ -2990,37 +2996,40 @@ dependencies = [
 
 [[package]]
 name = "quinn"
-version = "0.11.6"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
+checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8"
 dependencies = [
  "bytes",
+ "cfg_aliases",
  "pin-project-lite",
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
  "rustls",
  "socket2",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tokio",
  "tracing",
+ "web-time",
 ]
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.9"
+version = "0.11.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
+checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e"
 dependencies = [
  "bytes",
- "getrandom 0.2.15",
- "rand",
+ "getrandom 0.3.3",
+ "lru-slab",
+ "rand 0.9.1",
  "ring",
  "rustc-hash",
  "rustls",
  "rustls-pki-types",
  "slab",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tinyvec",
  "tracing",
  "web-time",
@@ -3028,9 +3037,9 @@ dependencies = [
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.10"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
+checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842"
 dependencies = [
  "cfg_aliases",
  "libc",
@@ -3049,6 +3058,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -3056,8 +3071,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
  "libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.3",
 ]
 
 [[package]]
@@ -3067,7 +3092,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.3",
 ]
 
 [[package]]
@@ -3076,7 +3111,16 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+dependencies = [
+ "getrandom 0.3.3",
 ]
 
 [[package]]
@@ -3096,14 +3140,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.8"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
+checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
 dependencies = [
  "bitflags",
 ]
@@ -3149,7 +3193,7 @@ version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ef7fa9ed0256d64a688a3747d0fef7a88851c18a5e1d57f115f38ec2e09366"
 dependencies = [
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "memchr",
 ]
 
@@ -3164,9 +3208,9 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.12.12"
+version = "0.12.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
+checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb"
 dependencies = [
  "base64 0.22.1",
  "bytes",
@@ -3210,13 +3254,13 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.9"
+version = "0.17.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
 dependencies = [
  "cc",
  "cfg-if",
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "libc",
  "untrusted",
  "windows-sys 0.52.0",
@@ -3251,9 +3295,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.44"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
 dependencies = [
  "bitflags",
  "errno",
@@ -3264,9 +3308,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.23"
+version = "0.23.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395"
+checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321"
 dependencies = [
  "once_cell",
  "ring",
@@ -3299,18 +3343,19 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
+checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
 dependencies = [
  "web-time",
+ "zeroize",
 ]
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.8"
+version = "0.103.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
+checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
 dependencies = [
  "ring",
  "rustls-pki-types",
@@ -3319,15 +3364,15 @@ dependencies = [
 
 [[package]]
 name = "rustversion"
-version = "1.0.19"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
+checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
 
 [[package]]
 name = "ryu"
-version = "1.0.19"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 
 [[package]]
 name = "same-file"
@@ -3368,7 +3413,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3411,9 +3456,9 @@ dependencies = [
 
 [[package]]
 name = "seq-macro"
-version = "0.3.5"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
 
 [[package]]
 name = "serde"
@@ -3426,9 +3471,9 @@ dependencies = [
 
 [[package]]
 name = "serde_bytes"
-version = "0.11.15"
+version = "0.11.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a"
+checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96"
 dependencies = [
  "serde",
 ]
@@ -3441,7 +3486,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3452,7 +3497,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3476,7 +3521,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3506,9 +3551,9 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.10.8"
+version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
 dependencies = [
  "cfg-if",
  "cpufeatures",
@@ -3544,9 +3589,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.14.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
+checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
 
 [[package]]
 name = "snap"
@@ -3556,9 +3601,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "socket2"
-version = "0.5.8"
+version = "0.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
+checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef"
 dependencies = [
  "libc",
  "windows-sys 0.52.0",
@@ -3583,7 +3628,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3594,9 +3639,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
 
 [[package]]
 name = "stacker"
-version = "0.1.18"
+version = "0.1.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d08feb8f695b465baed819b03c128dc23f57a694510ab1f06c77f763975685e"
+checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
 dependencies = [
  "cc",
  "cfg-if",
@@ -3627,14 +3672,14 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "substrait"
-version = "0.55.1"
+version = "0.56.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "048fe52a3664881ccdfdc9bdb0f4e8805f3444ee64abf299d365c54f6a2ffabb"
+checksum = "13de2e20128f2a018dab1cfa30be83ae069219a65968c6f89df66ad124de2397"
 dependencies = [
  "heck",
  "pbjson",
@@ -3651,7 +3696,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.100",
+ "syn 2.0.101",
  "typify",
  "walkdir",
 ]
@@ -3675,9 +3720,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.100"
+version = "2.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3695,13 +3740,13 @@ dependencies = [
 
 [[package]]
 name = "synstructure"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3712,13 +3757,12 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
 name = "tempfile"
-version = "3.16.0"
+version = "3.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
 dependencies = [
- "cfg-if",
  "fastrand",
- "getrandom 0.3.1",
+ "getrandom 0.3.3",
  "once_cell",
  "rustix",
  "windows-sys 0.59.0",
@@ -3735,11 +3779,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
 dependencies = [
- "thiserror-impl 2.0.11",
+ "thiserror-impl 2.0.12",
 ]
 
 [[package]]
@@ -3750,18 +3794,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
+checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3786,9 +3830,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.7.6"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -3796,9 +3840,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
+checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3811,9 +3855,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.44.2"
+version = "1.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
+checksum = "2513ca694ef9ede0fb23fe71a4ee4107cb102b9dc1930f6d0fd77aae068ae165"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3833,14 +3877,14 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "tokio-rustls"
-version = "0.26.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
+checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
 dependencies = [
  "rustls",
  "tokio",
@@ -3848,9 +3892,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.14"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034"
+checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
 dependencies = [
  "bytes",
  "futures-core",
@@ -3905,7 +3949,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3977,20 +4021,20 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "typenum"
-version = "1.17.0"
+version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
 
 [[package]]
 name = "typify"
-version = "0.3.0"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e03ba3643450cfd95a1aca2e1938fef63c1c1994489337998aff4ad771f21ef8"
+checksum = "6c6c647a34e851cf0260ccc14687f17cdcb8302ff1a8a687a24b97ca0f82406f"
 dependencies = [
  "typify-impl",
  "typify-macro",
@@ -3998,9 +4042,9 @@ dependencies = [
 
 [[package]]
 name = "typify-impl"
-version = "0.3.0"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bce48219a2f3154aaa2c56cbf027728b24a3c8fe0a47ed6399781de2b3f3eeaf"
+checksum = "741b7f1e2e1338c0bee5ad5a7d3a9bbd4e24c33765c08b7691810e68d879365d"
 dependencies = [
  "heck",
  "log",
@@ -4011,16 +4055,16 @@ dependencies = [
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.100",
- "thiserror 2.0.11",
+ "syn 2.0.101",
+ "thiserror 2.0.12",
  "unicode-ident",
 ]
 
 [[package]]
 name = "typify-macro"
-version = "0.3.0"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68b5780d745920ed73c5b7447496a9b5c42ed2681a9b70859377aec423ecf02b"
+checksum = "7560adf816a1e8dad7c63d8845ef6e31e673e39eab310d225636779230cbedeb"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4029,15 +4073,15 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.100",
+ "syn 2.0.101",
  "typify-impl",
 ]
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.16"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
 [[package]]
 name = "unicode-segmentation"
@@ -4053,9 +4097,9 @@ checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
 
 [[package]]
 name = "unindent"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
 
 [[package]]
 name = "unsafe-libyaml"
@@ -4080,12 +4124,6 @@ dependencies = [
  "percent-encoding",
 ]
 
-[[package]]
-name = "utf16_iter"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
-
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -4094,11 +4132,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
 [[package]]
 name = "uuid"
-version = "1.16.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
+checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
 dependencies = [
- "getrandom 0.3.1",
+ "getrandom 0.3.3",
  "js-sys",
  "serde",
  "wasm-bindgen",
@@ -4137,9 +4175,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasi"
-version = "0.13.3+wasi-0.2.2"
+version = "0.14.2+wasi-0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
 dependencies = [
  "wit-bindgen-rt",
 ]
@@ -4166,7 +4204,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
  "wasm-bindgen-shared",
 ]
 
@@ -4201,7 +4239,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4281,11 +4319,37 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows-core"
-version = "0.52.0"
+version = "0.61.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46ec44dc15085cea82cf9c78f85a9114c463a369786585ad2882d1ff0b0acf40"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings 0.4.1",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
 dependencies = [
- "windows-targets",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -4296,32 +4360,40 @@ checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
 
 [[package]]
 name = "windows-registry"
-version = "0.2.0"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3"
 dependencies = [
  "windows-result",
- "windows-strings",
- "windows-targets",
+ "windows-strings 0.3.1",
+ "windows-targets 0.53.0",
 ]
 
 [[package]]
 name = "windows-result"
-version = "0.2.0"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+checksum = "4b895b5356fc36103d0f64dd1e94dfa7ac5633f1c9dd6e80fe9ec4adef69e09d"
 dependencies = [
- "windows-targets",
+ "windows-link",
 ]
 
 [[package]]
 name = "windows-strings"
-version = "0.1.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
 dependencies = [
- "windows-result",
- "windows-targets",
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a7ab927b2637c19b3dbe0965e75d8f2d30bdd697a1516191cad2ec4df8fb28a"
+dependencies = [
+ "windows-link",
 ]
 
 [[package]]
@@ -4330,7 +4402,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4339,7 +4411,7 @@ version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4348,14 +4420,30 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
 ]
 
 [[package]]
@@ -4364,68 +4452,110 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
 [[package]]
 name = "wit-bindgen-rt"
-version = "0.33.0"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
 dependencies = [
  "bitflags",
 ]
 
-[[package]]
-name = "write16"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
-
 [[package]]
 name = "writeable"
-version = "0.5.5"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
 
 [[package]]
 name = "xz2"
@@ -4438,9 +4568,9 @@ dependencies = [
 
 [[package]]
 name = "yoke"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
 dependencies = [
  "serde",
  "stable_deref_trait",
@@ -4450,55 +4580,54 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
  "synstructure",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
 dependencies = [
- "byteorder",
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "zerofrom"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
  "synstructure",
 ]
 
@@ -4508,11 +4637,22 @@ version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 
+[[package]]
+name = "zerotrie"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
 [[package]]
 name = "zerovec"
-version = "0.10.4"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -4521,13 +4661,13 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.10.3"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.100",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -4538,27 +4678,27 @@ checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8"
 
 [[package]]
 name = "zstd"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "7.2.1"
+version = "7.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
 dependencies = [
  "zstd-sys",
 ]
 
 [[package]]
 name = "zstd-sys"
-version = "2.0.13+zstd.1.5.6"
+version = "2.0.15+zstd.1.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
+checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
 dependencies = [
  "cc",
  "pkg-config",
diff --git a/Cargo.toml b/Cargo.toml
index 2c4188bb0..4135e64e2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "datafusion-python"
-version = "46.0.0"
+version = "47.0.0"
 homepage = "https://datafusion.apache.org/python"
 repository = "https://github.com/apache/datafusion-python"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
@@ -34,20 +34,20 @@ protoc = [ "datafusion-substrait/protoc" ]
 substrait = ["dep:datafusion-substrait"]
 
 [dependencies]
-tokio = { version = "1.44", features = ["macros", "rt", "rt-multi-thread", "sync"] }
+tokio = { version = "1.45", features = ["macros", "rt", "rt-multi-thread", "sync"] }
 pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] }
 pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]}
-arrow = { version = "55.0.0", features = ["pyarrow"] }
-datafusion = { version = "47.0.0", features = ["avro", "unicode_expressions"] }
-datafusion-substrait = { version = "47.0.0", optional = true }
-datafusion-proto = { version = "47.0.0" }
-datafusion-ffi = { version = "47.0.0" }
+arrow = { version = "55.1.0", features = ["pyarrow"] }
+datafusion = { version = "48.0.0", features = ["avro", "unicode_expressions"] }
+datafusion-substrait = { version = "48.0.0", optional = true }
+datafusion-proto = { version = "48.0.0" }
+datafusion-ffi = { version = "48.0.0" }
 prost = "0.13.1" # keep in line with `datafusion-substrait`
 uuid = { version = "1.16", features = ["v4"] }
 mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
 async-trait = "0.1.88"
 futures = "0.3"
-object_store = { version = "0.12.0", features = ["aws", "gcp", "azure", "http"] }
+object_store = { version = "0.12.1", features = ["aws", "gcp", "azure", "http"] }
 url = "2"
 
 [build-dependencies]
diff --git a/dev/changelog/47.0.0.md b/dev/changelog/47.0.0.md
new file mode 100644
index 000000000..a7ed90313
--- /dev/null
+++ b/dev/changelog/47.0.0.md
@@ -0,0 +1,64 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Python 47.0.0 Changelog
+
+This release consists of 23 commits from 5 contributors. See credits at the end of this changelog for more information.
+
+**Implemented enhancements:**
+
+- feat: support unparser [#1088](https://github.com/apache/datafusion-python/pull/1088) (chenkovsky)
+- feat: update datafusion dependency 47 [#1107](https://github.com/apache/datafusion-python/pull/1107) (timsaucer)
+- feat: alias with metadata [#1111](https://github.com/apache/datafusion-python/pull/1111) (chenkovsky)
+- feat: add missing PyLogicalPlan to_variant [#1085](https://github.com/apache/datafusion-python/pull/1085) (chenkovsky)
+- feat: add user defined table function support [#1113](https://github.com/apache/datafusion-python/pull/1113) (timsaucer)
+
+**Fixed bugs:**
+
+- fix: recursive import [#1117](https://github.com/apache/datafusion-python/pull/1117) (chenkovsky)
+
+**Other:**
+
+- Update changelog and version number [#1089](https://github.com/apache/datafusion-python/pull/1089) (timsaucer)
+- Documentation updates: mention correct dataset on basics page [#1081](https://github.com/apache/datafusion-python/pull/1081) (floscha)
+- Add Configurable HTML Table Formatter for DataFusion DataFrames in Python [#1100](https://github.com/apache/datafusion-python/pull/1100) (kosiew)
+- Add DataFrame usage guide with HTML rendering customization options [#1108](https://github.com/apache/datafusion-python/pull/1108) (kosiew)
+- 1075/enhancement/Make col class with __getattr__ [#1076](https://github.com/apache/datafusion-python/pull/1076) (deanm0000)
+- 1064/enhancement/add functions to Expr class [#1074](https://github.com/apache/datafusion-python/pull/1074) (deanm0000)
+- ci: require approving review [#1122](https://github.com/apache/datafusion-python/pull/1122) (timsaucer)
+- Partial fix for 1078: Enhance DataFrame Formatter Configuration with Memory and Display Controls [#1119](https://github.com/apache/datafusion-python/pull/1119) (kosiew)
+- Add fill_null method to DataFrame API for handling missing values [#1019](https://github.com/apache/datafusion-python/pull/1019) (kosiew)
+- minor: reduce error size [#1126](https://github.com/apache/datafusion-python/pull/1126) (timsaucer)
+- Move the udf module to user_defined [#1112](https://github.com/apache/datafusion-python/pull/1112) (timsaucer)
+- add unit tests for expression functions [#1121](https://github.com/apache/datafusion-python/pull/1121) (timsaucer)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    12	Tim Saucer
+     4	Chen Chongchen
+     4	kosiew
+     2	deanm0000
+     1	Florian Schäfer
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
+
diff --git a/dev/release/README.md b/dev/release/README.md
index f0b333999..692473930 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -56,6 +56,8 @@ Before creating a new release:
 - a PR should be created and merged to update the major version number of the project
 - A new release branch should be created, such as `branch-0.8`
 
+## Preparing a Release Candidate
+
 ### Change Log
 
 We maintain a `CHANGELOG.md` so our users know what has been changed between releases.
@@ -76,21 +78,17 @@ Categorizing pull requests
 Generating changelog content
 ```
 
-This process is not fully automated, so there are some additional manual steps:
-
-- Add the ASF header to the generated file
-- Add a link to this changelog from the top-level `/datafusion/CHANGELOG.md`
-- Add the following content (copy from the previous version's changelog and update as appropriate:
+### Update the version number
 
-```
-## [24.0.0](https://github.com/apache/datafusion-python/tree/24.0.0) (2023-05-06)
+The only place you should need to update the version is in the root `Cargo.toml`.
 
-[Full Changelog](https://github.com/apache/datafusion-python/compare/23.0.0...24.0.0)
-```
+### Tag the Repository
 
-### Preparing a Release Candidate
+Commit the changes to the changelog and version.
 
-### Tag the Repository
+Assuming you have set up a remote to the `apache` repository rather than your personal fork,
+you need to push a tag to start the CI process for release candidates. The following assumes
+the upstream repository is called `apache`.
 
 ```bash
 git tag 0.8.0-rc1
diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst
new file mode 100644
index 000000000..a9e9e47c8
--- /dev/null
+++ b/docs/source/api/dataframe.rst
@@ -0,0 +1,387 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=================
+DataFrame API
+=================
+
+Overview
+--------
+
+The ``DataFrame`` class is the core abstraction in DataFusion that represents tabular data and operations
+on that data. DataFrames provide a flexible API for transforming data through various operations such as
+filtering, projection, aggregation, joining, and more.
+
+A DataFrame represents a logical plan that is lazily evaluated. The actual execution occurs only when 
+terminal operations like ``collect()``, ``show()``, or ``to_pandas()`` are called.
+
+Creating DataFrames
+-------------------
+
+DataFrames can be created in several ways:
+
+* From SQL queries via a ``SessionContext``:
+
+  .. code-block:: python
+
+      from datafusion import SessionContext
+      
+      ctx = SessionContext()
+      df = ctx.sql("SELECT * FROM your_table")
+
+* From registered tables:
+
+  .. code-block:: python
+
+      df = ctx.table("your_table")
+
+* From various data sources:
+
+  .. code-block:: python
+
+      # From CSV files (see :ref:`io_csv` for detailed options)
+      df = ctx.read_csv("path/to/data.csv")
+      
+      # From Parquet files (see :ref:`io_parquet` for detailed options)
+      df = ctx.read_parquet("path/to/data.parquet")
+      
+      # From JSON files (see :ref:`io_json` for detailed options)
+      df = ctx.read_json("path/to/data.json")
+      
+      # From Avro files (see :ref:`io_avro` for detailed options)
+      df = ctx.read_avro("path/to/data.avro")
+      
+      # From Pandas DataFrame
+      import pandas as pd
+      pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+      df = ctx.from_pandas(pandas_df)
+      
+      # From Arrow data
+      import pyarrow as pa
+      batch = pa.RecordBatch.from_arrays(
+          [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
+          names=["a", "b"]
+      )
+      df = ctx.from_arrow(batch)
+
+  For detailed information about reading from different data sources, see the :doc:`I/O Guide <../user-guide/io/index>`.
+  For custom data sources, see :ref:`io_custom_table_provider`.
+
+Common DataFrame Operations
+---------------------------
+
+DataFusion's DataFrame API offers a wide range of operations:
+
+.. code-block:: python
+
+    from datafusion import column, literal
+    
+    # Select specific columns
+    df = df.select("col1", "col2")
+    
+    # Select with expressions
+    df = df.select(column("a") + column("b"), column("a") - column("b"))
+    
+    # Filter rows
+    df = df.filter(column("age") > literal(25))
+    
+    # Add computed columns
+    df = df.with_column("full_name", column("first_name") + literal(" ") + column("last_name"))
+    
+    # Multiple column additions
+    df = df.with_columns(
+        (column("a") + column("b")).alias("sum"),
+        (column("a") * column("b")).alias("product")
+    )
+    
+    # Sort data
+    df = df.sort(column("age").sort(ascending=False))
+    
+    # Join DataFrames
+    df = df1.join(df2, on="user_id", how="inner")
+    
+    # Aggregate data
+    from datafusion import functions as f
+    df = df.aggregate(
+        [],  # Group by columns (empty for global aggregation)
+        [f.sum(column("amount")).alias("total_amount")]
+    )
+    
+    # Limit rows
+    df = df.limit(100)
+    
+    # Drop columns
+    df = df.drop("temporary_column")
+
+Terminal Operations
+-------------------
+
+To materialize the results of your DataFrame operations:
+
+.. code-block:: python
+
+    # Collect all data as PyArrow RecordBatches
+    result_batches = df.collect()
+    
+    # Convert to various formats
+    pandas_df = df.to_pandas()        # Pandas DataFrame
+    polars_df = df.to_polars()        # Polars DataFrame
+    arrow_table = df.to_arrow_table() # PyArrow Table
+    py_dict = df.to_pydict()          # Python dictionary
+    py_list = df.to_pylist()          # Python list of dictionaries
+    
+    # Display results
+    df.show()                         # Print tabular format to console
+    
+    # Count rows
+    count = df.count()
+
+HTML Rendering in Jupyter
+-------------------------
+
+When working in Jupyter notebooks or other environments that support rich HTML display, 
+DataFusion DataFrames automatically render as nicely formatted HTML tables. This functionality
+is provided by the ``_repr_html_`` method, which is automatically called by Jupyter.
+
+Basic HTML Rendering
+~~~~~~~~~~~~~~~~~~~~
+
+In a Jupyter environment, simply displaying a DataFrame object will trigger HTML rendering:
+
+.. code-block:: python
+
+    # Will display as HTML table in Jupyter
+    df
+
+    # Explicit display also uses HTML rendering
+    display(df)
+
+HTML Rendering Customization
+----------------------------
+
+DataFusion provides extensive customization options for HTML table rendering through the
+``datafusion.html_formatter`` module.
+
+Configuring the HTML Formatter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can customize how DataFrames are rendered by configuring the formatter:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import configure_formatter
+    
+    configure_formatter(
+        max_cell_length=30,              # Maximum length of cell content before truncation
+        max_width=800,                   # Maximum width of table in pixels
+        max_height=400,                  # Maximum height of table in pixels
+        max_memory_bytes=2 * 1024 * 1024,# Maximum memory used for rendering (2MB)
+        min_rows_display=10,             # Minimum rows to display
+        repr_rows=20,                    # Number of rows to display in representation
+        enable_cell_expansion=True,      # Allow cells to be expandable on click
+        custom_css=None,                 # Custom CSS to apply
+        show_truncation_message=True,    # Show message when data is truncated
+        style_provider=None,             # Custom style provider class
+        use_shared_styles=True           # Share styles across tables to reduce duplication
+    )
+
+Custom Style Providers
+~~~~~~~~~~~~~~~~~~~~~~
+
+For advanced styling needs, you can create a custom style provider class:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import configure_formatter
+    
+    class CustomStyleProvider:
+        def get_cell_style(self) -> str:
+            return "background-color: #f5f5f5; color: #333; padding: 8px; border: 1px solid #ddd;"
+    
+        def get_header_style(self) -> str:
+            return "background-color: #4285f4; color: white; font-weight: bold; padding: 10px;"
+    
+    # Apply custom styling
+    configure_formatter(style_provider=CustomStyleProvider())
+
+Custom Type Formatters
+~~~~~~~~~~~~~~~~~~~~~~
+
+You can register custom formatters for specific data types:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import get_formatter
+    
+    formatter = get_formatter()
+    
+    # Format integers with color based on value
+    def format_int(value):
+        return f'<span style="color: {"red" if value > 100 else "blue"}">{value}</span>'
+    
+    formatter.register_formatter(int, format_int)
+    
+    # Format date values
+    def format_date(value):
+        return f'<span class="date-value">{value.isoformat()}</span>'
+    
+    formatter.register_formatter(datetime.date, format_date)
+
+Custom Cell Builders
+~~~~~~~~~~~~~~~~~~~~
+
+For complete control over cell rendering:
+
+.. code-block:: python
+
+    formatter = get_formatter()
+    
+    def custom_cell_builder(value, row, col, table_id):
+        try:
+            num_value = float(value)
+            if num_value > 0:  # Positive values get green
+                return f'<td style="background-color: #d9f0d3">{value}</td>'
+            if num_value < 0:  # Negative values get red
+                return f'<td style="background-color: #f0d3d3">{value}</td>'
+        except (ValueError, TypeError):
+            pass
+        
+        # Default styling for non-numeric or zero values
+        return f'<td style="border: 1px solid #ddd">{value}</td>'
+    
+    formatter.set_custom_cell_builder(custom_cell_builder)
+
+Custom Header Builders
+~~~~~~~~~~~~~~~~~~~~~~
+
+Similarly, you can customize the rendering of table headers:
+
+.. code-block:: python
+
+    def custom_header_builder(field):
+        tooltip = f"Type: {field.type}"
+        return f'<th style="background-color: #333; color: white" title="{tooltip}">{field.name}</th>'
+    
+    formatter.set_custom_header_builder(custom_header_builder)
+
+Managing Formatter State
+-----------------------~
+
+The HTML formatter maintains global state that can be managed:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import reset_formatter, reset_styles_loaded_state, get_formatter
+    
+    # Reset the formatter to default settings
+    reset_formatter()
+    
+    # Reset only the styles loaded state (useful when styles were loaded but need reloading)
+    reset_styles_loaded_state()
+    
+    # Get the current formatter instance to make changes
+    formatter = get_formatter()
+
+Advanced Example: Dashboard-Style Formatting
+------------------------------------------~~
+
+This example shows how to create a dashboard-like styling for your DataFrames:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import configure_formatter, get_formatter
+    
+    # Define custom CSS
+    custom_css = """
+    .datafusion-table {
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+        border-collapse: collapse;
+        width: 100%;
+        box-shadow: 0 2px 3px rgba(0,0,0,0.1);
+    }
+    .datafusion-table th {
+        position: sticky;
+        top: 0;
+        z-index: 10;
+    }
+    .datafusion-table tr:hover td {
+        background-color: #f1f7fa !important;
+    }
+    .datafusion-table .numeric-positive {
+        color: #0a7c00;
+    }
+    .datafusion-table .numeric-negative {
+        color: #d13438;
+    }
+    """
+    
+    class DashboardStyleProvider:
+        def get_cell_style(self) -> str:
+            return "padding: 8px 12px; border-bottom: 1px solid #e0e0e0;"
+        
+        def get_header_style(self) -> str:
+            return ("background-color: #0078d4; color: white; font-weight: 600; "
+                    "padding: 12px; text-align: left; border-bottom: 2px solid #005a9e;")
+    
+    # Apply configuration
+    configure_formatter(
+        max_height=500,
+        enable_cell_expansion=True,
+        custom_css=custom_css,
+        style_provider=DashboardStyleProvider(),
+        max_cell_length=50
+    )
+    
+    # Add custom formatters for numbers
+    formatter = get_formatter()
+    
+    def format_number(value):
+        try:
+            num = float(value)
+            cls = "numeric-positive" if num > 0 else "numeric-negative" if num < 0 else ""
+            return f'<span class="{cls}">{value:,}</span>' if cls else f'{value:,}'
+        except (ValueError, TypeError):
+            return str(value)
+    
+    formatter.register_formatter(int, format_number)
+    formatter.register_formatter(float, format_number)
+
+Best Practices
+--------------
+
+1. **Memory Management**: For large datasets, use ``max_memory_bytes`` to limit memory usage.
+
+2. **Responsive Design**: Set reasonable ``max_width`` and ``max_height`` values to ensure tables display well on different screens.
+
+3. **Style Optimization**: Use ``use_shared_styles=True`` to avoid duplicate style definitions when displaying multiple tables.
+
+4. **Reset When Needed**: Call ``reset_formatter()`` when you want to start fresh with default settings.
+
+5. **Cell Expansion**: Use ``enable_cell_expansion=True`` when cells might contain longer content that users may want to see in full.
+
+Additional Resources
+--------------------
+
+* :doc:`../user-guide/dataframe` - Complete guide to using DataFrames
+* :doc:`../user-guide/io/index` - I/O Guide for reading data from various sources
+* :doc:`../user-guide/data-sources` - Comprehensive data sources guide
+* :ref:`io_csv` - CSV file reading
+* :ref:`io_parquet` - Parquet file reading  
+* :ref:`io_json` - JSON file reading
+* :ref:`io_avro` - Avro file reading
+* :ref:`io_custom_table_provider` - Custom table providers
+* `API Reference <https://arrow.apache.org/datafusion-python/api/index.html>`_ - Full API reference
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
new file mode 100644
index 000000000..7f58227ca
--- /dev/null
+++ b/docs/source/api/index.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=============
+API Reference
+=============
+
+This section provides detailed API documentation for the DataFusion Python library.
+
+.. toctree::
+   :maxdepth: 2
+   
+   dataframe
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0be03d81d..28db17d35 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -71,6 +71,7 @@
 autoapi_member_order = "groupwise"
 suppress_warnings = ["autoapi.python_import_resolution"]
 autoapi_python_class_content = "both"
+autoapi_keep_files = False  # set to True for debugging generated files
 
 
 def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa: ARG001
@@ -79,6 +80,9 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa
         ("class", "datafusion.DataFrame"),
         ("class", "datafusion.SessionContext"),
         ("module", "datafusion.common"),
+        # Duplicate modules (skip module-level docs to avoid duplication)
+        ("module", "datafusion.col"),
+        ("module", "datafusion.udf"),
         # Deprecated
         ("class", "datafusion.substrait.serde"),
         ("class", "datafusion.substrait.plan"),
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c18793822..ff1e47280 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -93,3 +93,5 @@ Example
    :hidden:
    :maxdepth: 1
    :caption: API
+   
+   api/index
diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst
index 12097be8f..ccb47a4e7 100644
--- a/docs/source/user-guide/common-operations/functions.rst
+++ b/docs/source/user-guide/common-operations/functions.rst
@@ -129,3 +129,24 @@ The function :py:func:`~datafusion.functions.in_list` allows to check a column f
           .limit(20)
           .to_pandas()
     )
+
+
+Handling Missing Values 
+=======================
+
+DataFusion provides methods to handle missing values in DataFrames:
+
+fill_null
+---------
+
+The ``fill_null()`` method replaces NULL values in specified columns with a provided value:
+
+.. code-block:: python
+
+    # Fill all NULL values with 0 where possible
+    df = df.fill_null(0)
+
+    # Fill NULL values only in specific string columns
+    df = df.fill_null("missing", subset=["name", "category"]) 
+
+The fill value will be cast to match each column's type. If casting fails for a column, that column remains unchanged.
diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst b/docs/source/user-guide/common-operations/udf-and-udfa.rst
index ffd7a05cb..0830fa81c 100644
--- a/docs/source/user-guide/common-operations/udf-and-udfa.rst
+++ b/docs/source/user-guide/common-operations/udf-and-udfa.rst
@@ -26,7 +26,7 @@ Scalar Functions
 
 When writing a user-defined function that can operate on a row by row basis, these are called Scalar
 Functions. You can define your own scalar function by calling
-:py:func:`~datafusion.udf.ScalarUDF.udf` .
+:py:func:`~datafusion.user_defined.ScalarUDF.udf` .
 
 The basic definition of a scalar UDF is a python function that takes one or more
 `pyarrow <https://arrow.apache.org/docs/python/index.html>`_ arrays and returns a single array as
@@ -93,9 +93,9 @@ converting to Python objects to do the evaluation.
 Aggregate Functions
 -------------------
 
-The :py:func:`~datafusion.udf.AggregateUDF.udaf` function allows you to define User-Defined
+The :py:func:`~datafusion.user_defined.AggregateUDF.udaf` function allows you to define User-Defined
 Aggregate Functions (UDAFs). To use this you must implement an
-:py:class:`~datafusion.udf.Accumulator` that determines how the aggregation is performed.
+:py:class:`~datafusion.user_defined.Accumulator` that determines how the aggregation is performed.
 
 When defining a UDAF there are four methods you need to implement. The ``update`` function takes the
 array(s) of input and updates the internal state of the accumulator. You should define this function
@@ -153,8 +153,8 @@ Window Functions
 ----------------
 
 To implement a User-Defined Window Function (UDWF) you must call the
-:py:func:`~datafusion.udf.WindowUDF.udwf` function using a class that implements the abstract
-class :py:class:`~datafusion.udf.WindowEvaluator`.
+:py:func:`~datafusion.user_defined.WindowUDF.udwf` function using a class that implements the abstract
+class :py:class:`~datafusion.user_defined.WindowEvaluator`.
 
 There are three methods of evaluation of UDWFs.
 
@@ -207,7 +207,7 @@ determine which evaluate functions are called.
 
     import pyarrow as pa
     from datafusion import udwf, col, SessionContext
-    from datafusion.udf import WindowEvaluator
+    from datafusion.user_defined import WindowEvaluator
 
     class ExponentialSmooth(WindowEvaluator):
         def __init__(self, alpha: float) -> None:
@@ -242,3 +242,35 @@ determine which evaluate functions are called.
     })
 
     df.select("a", exp_smooth(col("a")).alias("smooth_a")).show()
+
+Table Functions
+---------------
+
+User Defined Table Functions are slightly different than the other functions
+described here. These functions take any number of `Expr` arguments, but only
+literal expressions are supported. Table functions must return a Table
+Provider as described in the ref:`_io_custom_table_provider` page.
+
+Once you have a table function, you can register it with the session context
+by using :py:func:`datafusion.context.SessionContext.register_udtf`.
+
+There are examples of both rust backed and python based table functions in the
+examples folder of the repository. If you have a rust backed table function
+that you wish to expose via PyO3, you need to expose it as a ``PyCapsule``.
+
+.. code-block:: rust
+
+    #[pymethods]
+    impl MyTableFunction {
+        fn __datafusion_table_function__<'py>(
+            &self,
+            py: Python<'py>,
+        ) -> PyResult<Bound<'py, PyCapsule>> {
+            let name = cr"datafusion_table_function".into();
+
+            let func = self.clone();
+            let provider = FFI_TableFunction::new(Arc::new(func), None);
+
+            PyCapsule::new(py, provider, Some(name))
+        }
+    }
diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst
index 11e3d7e72..23c65b5f6 100644
--- a/docs/source/user-guide/dataframe.rst
+++ b/docs/source/user-guide/dataframe.rst
@@ -122,7 +122,8 @@ Performance Optimization with Shared Styles
 The ``use_shared_styles`` parameter (enabled by default) optimizes performance when displaying 
 multiple DataFrames in notebook environments:
 
- .. code-block:: python
+.. code-block:: python
+
     from datafusion.html_formatter import StyleProvider, configure_formatter
     # Default: Use shared styles (recommended for notebooks)
     configure_formatter(use_shared_styles=True)
diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/datafusion-ffi-example/.cargo/config.toml
similarity index 100%
rename from examples/ffi-table-provider/.cargo/config.toml
rename to examples/datafusion-ffi-example/.cargo/config.toml
diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock
similarity index 71%
rename from examples/ffi-table-provider/Cargo.lock
rename to examples/datafusion-ffi-example/Cargo.lock
index 8d0edd515..075ebd5a1 100644
--- a/examples/ffi-table-provider/Cargo.lock
+++ b/examples/datafusion-ffi-example/Cargo.lock
@@ -67,13 +67,13 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
 
 [[package]]
 name = "ahash"
-version = "0.8.11"
+version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom",
+ "getrandom 0.3.3",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -105,9 +105,9 @@ dependencies = [
 
 [[package]]
 name = "allocator-api2"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 
 [[package]]
 name = "android-tzdata"
@@ -126,9 +126,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.93"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 
 [[package]]
 name = "arrayref"
@@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc"
+checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -165,9 +165,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248"
+checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -179,9 +179,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
+checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -190,15 +190,15 @@ dependencies = [
  "chrono",
  "chrono-tz",
  "half",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.3",
  "num",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
+checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce"
 dependencies = [
  "bytes",
  "half",
@@ -207,9 +207,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
+checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -228,9 +228,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6"
+checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -244,9 +244,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
+checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -256,9 +256,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
+checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -270,9 +270,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c"
+checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -283,16 +283,18 @@ dependencies = [
  "half",
  "indexmap",
  "lexical-core",
+ "memchr",
  "num",
  "serde",
  "serde_json",
+ "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da"
+checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -303,9 +305,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c"
+checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -316,18 +318,18 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
+checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b"
 dependencies = [
- "bitflags 2.6.0",
+ "bitflags",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
+checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -339,9 +341,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0"
+checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -368,11 +370,11 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.17"
+version = "0.4.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857"
+checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
 dependencies = [
- "bzip2 0.4.4",
+ "bzip2",
  "flate2",
  "futures-core",
  "memchr",
@@ -394,13 +396,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.83"
+version = "0.1.88"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd"
+checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -420,9 +422,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
 [[package]]
 name = "backtrace"
-version = "0.3.74"
+version = "0.3.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
 dependencies = [
  "addr2line",
  "cfg-if",
@@ -441,9 +443,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
 [[package]]
 name = "bigdecimal"
-version = "0.4.7"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c"
+checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013"
 dependencies = [
  "autocfg",
  "libm",
@@ -454,15 +456,9 @@ dependencies = [
 
 [[package]]
 name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.6.0"
+version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
 
 [[package]]
 name = "blake2"
@@ -475,9 +471,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.4"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
+checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -497,9 +493,9 @@ dependencies = [
 
 [[package]]
 name = "brotli"
-version = "7.0.0"
+version = "8.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -508,9 +504,9 @@ dependencies = [
 
 [[package]]
 name = "brotli-decompressor"
-version = "4.0.1"
+version = "5.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362"
+checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -518,9 +514,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.16.0"
+version = "3.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
+checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
 
 [[package]]
 name = "byteorder"
@@ -530,46 +526,34 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.8.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
-
-[[package]]
-name = "bzip2"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
-dependencies = [
- "bzip2-sys",
- "libc",
-]
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 
 [[package]]
 name = "bzip2"
-version = "0.5.0"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58"
+checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
 dependencies = [
  "bzip2-sys",
- "libc",
 ]
 
 [[package]]
 name = "bzip2-sys"
-version = "0.1.11+1.0.8"
+version = "0.1.13+1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
 dependencies = [
  "cc",
- "libc",
  "pkg-config",
 ]
 
 [[package]]
 name = "cc"
-version = "1.1.37"
+version = "1.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf"
+checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766"
 dependencies = [
  "jobserver",
  "libc",
@@ -584,21 +568,21 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "chrono"
-version = "0.4.38"
+version = "0.4.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "num-traits",
- "windows-targets",
+ "windows-link",
 ]
 
 [[package]]
 name = "chrono-tz"
-version = "0.10.0"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6"
+checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3"
 dependencies = [
  "chrono",
  "chrono-tz-build",
@@ -607,9 +591,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz-build"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7"
+checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402"
 dependencies = [
  "parse-zoneinfo",
  "phf_codegen",
@@ -617,12 +601,11 @@ dependencies = [
 
 [[package]]
 name = "comfy-table"
-version = "7.1.1"
+version = "7.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
+checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
 dependencies = [
- "strum",
- "strum_macros",
+ "unicode-segmentation",
  "unicode-width",
 ]
 
@@ -641,16 +624,16 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.16",
  "once_cell",
  "tiny-keccak",
 ]
 
 [[package]]
 name = "const_panic"
-version = "0.2.10"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81"
+checksum = "2459fc9262a1aa204eb4b5764ad4f189caec88aea9634389c0a25f8be7f6265e"
 
 [[package]]
 name = "constant_time_eq"
@@ -681,9 +664,9 @@ checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.14"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
 dependencies = [
  "libc",
 ]
@@ -699,24 +682,24 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.13"
+version = "0.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
 dependencies = [
  "crossbeam-utils",
 ]
 
 [[package]]
 name = "crossbeam-utils"
-version = "0.8.20"
+version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
 [[package]]
 name = "crunchy"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
 
 [[package]]
 name = "crypto-common"
@@ -742,9 +725,9 @@ dependencies = [
 
 [[package]]
 name = "csv-core"
-version = "0.1.11"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
 dependencies = [
  "memchr",
 ]
@@ -765,39 +748,44 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
+checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080"
 dependencies = [
  "arrow",
- "arrow-array",
  "arrow-ipc",
  "arrow-schema",
- "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.5.0",
+ "bzip2",
  "chrono",
  "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
  "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-datasource-csv",
+ "datafusion-datasource-json",
+ "datafusion-datasource-parquet",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-nested",
  "datafusion-functions-table",
  "datafusion-functions-window",
+ "datafusion-macros",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-optimizer",
  "datafusion-physical-plan",
+ "datafusion-session",
  "datafusion-sql",
  "flate2",
  "futures",
- "glob",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "object_store",
  "parking_lot",
@@ -807,7 +795,6 @@ dependencies = [
  "sqlparser",
  "tempfile",
  "tokio",
- "tokio-util",
  "url",
  "uuid",
  "xz2",
@@ -816,37 +803,62 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
+checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66"
 dependencies = [
  "arrow",
  "async-trait",
  "dashmap",
  "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-physical-expr",
  "datafusion-physical-plan",
+ "datafusion-session",
  "datafusion-sql",
  "futures",
- "itertools 0.14.0",
+ "itertools",
  "log",
+ "object_store",
  "parking_lot",
- "sqlparser",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
+checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ipc",
- "arrow-schema",
  "base64",
  "half",
  "hashbrown 0.14.5",
@@ -864,25 +876,143 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "45.0.0"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b"
+dependencies = [
+ "futures",
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools",
+ "log",
+ "object_store",
+ "parquet",
+ "rand",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
+[[package]]
+name = "datafusion-datasource-csv"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "object_store",
+ "regex",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource-json"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "object_store",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource-parquet"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
+checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923"
 dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "itertools",
  "log",
+ "object_store",
+ "parking_lot",
+ "parquet",
+ "rand",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-doc"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
+checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292"
 
 [[package]]
 name = "datafusion-execution"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
+checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84"
 dependencies = [
  "arrow",
  "dashmap",
@@ -899,9 +1029,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
+checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964"
 dependencies = [
  "arrow",
  "chrono",
@@ -920,25 +1050,25 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
+checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839"
 dependencies = [
  "arrow",
  "datafusion-common",
- "itertools 0.14.0",
+ "indexmap",
+ "itertools",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-ffi"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
+checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c"
 dependencies = [
  "abi_stable",
  "arrow",
- "arrow-array",
  "arrow-schema",
  "async-ffi",
  "async-trait",
@@ -953,9 +1083,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
+checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -969,9 +1099,8 @@ dependencies = [
  "datafusion-expr",
  "datafusion-expr-common",
  "datafusion-macros",
- "hashbrown 0.14.5",
  "hex",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "md-5",
  "rand",
@@ -983,14 +1112,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
+checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1006,9 +1133,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
+checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4"
 dependencies = [
  "ahash",
  "arrow",
@@ -1019,15 +1146,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
+checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1036,16 +1160,16 @@ dependencies = [
  "datafusion-functions-aggregate",
  "datafusion-macros",
  "datafusion-physical-expr-common",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-functions-table"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
+checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1059,9 +1183,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
+checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1076,9 +1200,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
+checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1086,20 +1210,20 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
+checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1"
 dependencies = [
  "datafusion-expr",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
+checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515"
 dependencies = [
  "arrow",
  "chrono",
@@ -1107,7 +1231,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-physical-expr",
  "indexmap",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "recursive",
  "regex",
@@ -1116,15 +1240,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
+checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-expr-common",
@@ -1133,7 +1254,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "indexmap",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "paste",
  "petgraph",
@@ -1141,27 +1262,25 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
+checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
  "datafusion-common",
  "datafusion-expr-common",
  "hashbrown 0.14.5",
- "itertools 0.14.0",
+ "itertools",
 ]
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
+checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2"
 dependencies = [
  "arrow",
- "arrow-schema",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
@@ -1169,23 +1288,19 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
- "futures",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "recursive",
- "url",
 ]
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
+checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
  "arrow-schema",
  "async-trait",
@@ -1201,7 +1316,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "indexmap",
- "itertools 0.14.0",
+ "itertools",
  "log",
  "parking_lot",
  "pin-project-lite",
@@ -1210,9 +1325,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
+checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83"
 dependencies = [
  "arrow",
  "chrono",
@@ -1226,24 +1341,46 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
+checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f"
 dependencies = [
  "arrow",
  "datafusion-common",
  "prost",
 ]
 
+[[package]]
+name = "datafusion-session"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-sql",
+ "futures",
+ "itertools",
+ "log",
+ "object_store",
+ "parking_lot",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-sql"
-version = "45.0.0"
+version = "47.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
+checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-schema",
  "bigdecimal",
  "datafusion-common",
  "datafusion-expr",
@@ -1273,36 +1410,36 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "either"
-version = "1.13.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "equivalent"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.9"
+version = "0.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
 dependencies = [
  "libc",
- "windows-sys 0.52.0",
+ "windows-sys",
 ]
 
 [[package]]
 name = "fastrand"
-version = "2.2.0"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
 [[package]]
 name = "ffi-table-provider"
@@ -1325,24 +1462,31 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
 [[package]]
 name = "flatbuffers"
-version = "24.12.23"
+version = "25.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096"
+checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
  "rustc_version",
 ]
 
 [[package]]
 name = "flate2"
-version = "1.0.34"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0"
+checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
 dependencies = [
  "crc32fast",
+ "libz-rs-sys",
  "miniz_oxide",
 ]
 
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -1408,7 +1552,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1462,13 +1606,25 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.15"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
 ]
 
 [[package]]
@@ -1479,15 +1635,15 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
 
 [[package]]
 name = "glob"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
 
 [[package]]
 name = "half"
-version = "2.4.1"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -1506,9 +1662,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.1"
+version = "0.15.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
+checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
 
 [[package]]
 name = "heck"
@@ -1522,22 +1678,34 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
+[[package]]
+name = "http"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
 [[package]]
 name = "humantime"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.61"
+version = "0.1.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
+ "log",
  "wasm-bindgen",
  "windows-core",
 ]
@@ -1553,21 +1721,22 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
 dependencies = [
  "displaydoc",
+ "potential_utf",
  "yoke",
  "zerofrom",
  "zerovec",
 ]
 
 [[package]]
-name = "icu_locid"
-version = "1.5.0"
+name = "icu_locale_core"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -1576,31 +1745,11 @@ dependencies = [
  "zerovec",
 ]
 
-[[package]]
-name = "icu_locid_transform"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
-dependencies = [
- "displaydoc",
- "icu_locid",
- "icu_locid_transform_data",
- "icu_provider",
- "tinystr",
- "zerovec",
-]
-
-[[package]]
-name = "icu_locid_transform_data"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
-
 [[package]]
 name = "icu_normalizer"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
 dependencies = [
  "displaydoc",
  "icu_collections",
@@ -1608,67 +1757,54 @@ dependencies = [
  "icu_properties",
  "icu_provider",
  "smallvec",
- "utf16_iter",
- "utf8_iter",
- "write16",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_normalizer_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
 
 [[package]]
 name = "icu_properties"
-version = "1.5.1"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+checksum = "2549ca8c7241c82f59c80ba2a6f415d931c5b58d24fb8412caa1a1f02c49139a"
 dependencies = [
  "displaydoc",
  "icu_collections",
- "icu_locid_transform",
+ "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "tinystr",
+ "potential_utf",
+ "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+checksum = "8197e866e47b68f8f7d95249e172903bec06004b18b2937f1095d40a0c57de04"
 
 [[package]]
 name = "icu_provider"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
 dependencies = [
  "displaydoc",
- "icu_locid",
- "icu_provider_macros",
+ "icu_locale_core",
  "stable_deref_trait",
  "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
+ "zerotrie",
  "zerovec",
 ]
 
-[[package]]
-name = "icu_provider_macros"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.87",
-]
-
 [[package]]
 name = "idna"
 version = "1.0.3"
@@ -1682,9 +1818,9 @@ dependencies = [
 
 [[package]]
 name = "idna_adapter"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
 dependencies = [
  "icu_normalizer",
  "icu_properties",
@@ -1692,19 +1828,19 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.7.1"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.3",
 ]
 
 [[package]]
 name = "indoc"
-version = "2.0.5"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
+checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
 
 [[package]]
 name = "integer-encoding"
@@ -1712,15 +1848,6 @@ version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
-[[package]]
-name = "itertools"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -1732,25 +1859,27 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.11"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 
 [[package]]
 name = "jobserver"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
 dependencies = [
+ "getrandom 0.3.3",
  "libc",
 ]
 
 [[package]]
 name = "js-sys"
-version = "0.3.72"
+version = "0.3.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
 dependencies = [
+ "once_cell",
  "wasm-bindgen",
 ]
 
@@ -1762,9 +1891,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "lexical-core"
-version = "1.0.2"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458"
+checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958"
 dependencies = [
  "lexical-parse-float",
  "lexical-parse-integer",
@@ -1775,9 +1904,9 @@ dependencies = [
 
 [[package]]
 name = "lexical-parse-float"
-version = "1.0.2"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0"
+checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2"
 dependencies = [
  "lexical-parse-integer",
  "lexical-util",
@@ -1786,9 +1915,9 @@ dependencies = [
 
 [[package]]
 name = "lexical-parse-integer"
-version = "1.0.2"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61"
+checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e"
 dependencies = [
  "lexical-util",
  "static_assertions",
@@ -1796,18 +1925,18 @@ dependencies = [
 
 [[package]]
 name = "lexical-util"
-version = "1.0.3"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0"
+checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3"
 dependencies = [
  "static_assertions",
 ]
 
 [[package]]
 name = "lexical-write-float"
-version = "1.0.2"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809"
+checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd"
 dependencies = [
  "lexical-util",
  "lexical-write-integer",
@@ -1816,9 +1945,9 @@ dependencies = [
 
 [[package]]
 name = "lexical-write-integer"
-version = "1.0.2"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162"
+checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978"
 dependencies = [
  "lexical-util",
  "static_assertions",
@@ -1826,9 +1955,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.162"
+version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
 [[package]]
 name = "libloading"
@@ -1842,21 +1971,30 @@ dependencies = [
 
 [[package]]
 name = "libm"
-version = "0.2.11"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+
+[[package]]
+name = "libz-rs-sys"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
+checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a"
+dependencies = [
+ "zlib-rs",
+]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.14"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
 [[package]]
 name = "litemap"
-version = "0.7.3"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
+checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
 
 [[package]]
 name = "lock_api"
@@ -1870,9 +2008,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.22"
+version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
 
 [[package]]
 name = "lz4_flex"
@@ -1880,7 +2018,7 @@ version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
 dependencies = [
- "twox-hash",
+ "twox-hash 1.6.3",
 ]
 
 [[package]]
@@ -1921,9 +2059,9 @@ dependencies = [
 
 [[package]]
 name = "miniz_oxide"
-version = "0.8.0"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
 dependencies = [
  "adler2",
 ]
@@ -2004,39 +2142,42 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.36.5"
+version = "0.36.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "object_store"
-version = "0.11.1"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3"
+checksum = "d94ac16b433c0ccf75326388c893d2835ab7457ea35ab8ba5d745c053ef5fa16"
 dependencies = [
  "async-trait",
  "bytes",
  "chrono",
  "futures",
+ "http",
  "humantime",
- "itertools 0.13.0",
+ "itertools",
  "parking_lot",
  "percent-encoding",
- "snafu",
+ "thiserror",
  "tokio",
  "tracing",
  "url",
  "walkdir",
+ "wasm-bindgen-futures",
+ "web-time",
 ]
 
 [[package]]
 name = "once_cell"
-version = "1.20.2"
+version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "ordered-float"
@@ -2072,9 +2213,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.1.0"
+version = "55.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
+checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2091,7 +2232,7 @@ dependencies = [
  "flate2",
  "futures",
  "half",
- "hashbrown 0.15.1",
+ "hashbrown 0.15.3",
  "lz4_flex",
  "num",
  "num-bigint",
@@ -2102,9 +2243,8 @@ dependencies = [
  "snap",
  "thrift",
  "tokio",
- "twox-hash",
+ "twox-hash 2.1.0",
  "zstd",
- "zstd-sys",
 ]
 
 [[package]]
@@ -2140,18 +2280,18 @@ dependencies = [
 
 [[package]]
 name = "phf"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
 dependencies = [
  "phf_shared",
 ]
 
 [[package]]
 name = "phf_codegen"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
 dependencies = [
  "phf_generator",
  "phf_shared",
@@ -2159,9 +2299,9 @@ dependencies = [
 
 [[package]]
 name = "phf_generator"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
 dependencies = [
  "phf_shared",
  "rand",
@@ -2169,18 +2309,18 @@ dependencies = [
 
 [[package]]
 name = "phf_shared"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
 dependencies = [
  "siphasher",
 ]
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
 [[package]]
 name = "pin-utils"
@@ -2190,39 +2330,48 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "portable-atomic"
-version = "1.9.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
+dependencies = [
+ "zerovec",
+]
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 dependencies = [
  "zerocopy",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.89"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "prost"
-version = "0.13.3"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2230,31 +2379,31 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.3"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
  "anyhow",
- "itertools 0.13.0",
+ "itertools",
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "psm"
-version = "0.1.24"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810"
+checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "pyo3"
-version = "0.23.4"
+version = "0.23.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc"
+checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
 dependencies = [
  "cfg-if",
  "indoc",
@@ -2270,9 +2419,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.23.4"
+version = "0.23.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7"
+checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
 dependencies = [
  "once_cell",
  "target-lexicon",
@@ -2280,9 +2429,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.23.4"
+version = "0.23.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d"
+checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -2290,38 +2439,44 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.23.4"
+version = "0.23.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7"
+checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.23.4"
+version = "0.23.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4"
+checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
 dependencies = [
  "heck",
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.37"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
 dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -2349,7 +2504,7 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.16",
 ]
 
 [[package]]
@@ -2369,16 +2524,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.7"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f"
+checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
 dependencies = [
- "bitflags 2.6.0",
+ "bitflags",
 ]
 
 [[package]]
@@ -2395,9 +2550,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2436,28 +2591,28 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.40"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
 dependencies = [
- "bitflags 2.6.0",
+ "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.52.0",
+ "windows-sys",
 ]
 
 [[package]]
 name = "rustversion"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248"
+checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
 
 [[package]]
 name = "ryu"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 
 [[package]]
 name = "same-file"
@@ -2476,41 +2631,41 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "semver"
-version = "1.0.25"
+version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
 
 [[package]]
 name = "seq-macro"
-version = "0.3.5"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
 
 [[package]]
 name = "serde"
-version = "1.0.214"
+version = "1.0.219"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.214"
+version = "1.0.219"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.132"
+version = "1.0.140"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
 dependencies = [
  "itoa",
  "memchr",
@@ -2520,9 +2675,9 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.10.8"
+version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
 dependencies = [
  "cfg-if",
  "cpufeatures",
@@ -2543,9 +2698,9 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 
 [[package]]
 name = "siphasher"
-version = "0.3.11"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
 
 [[package]]
 name = "slab"
@@ -2558,30 +2713,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.13.2"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
-
-[[package]]
-name = "snafu"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
-dependencies = [
- "snafu-derive",
-]
-
-[[package]]
-name = "snafu-derive"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "syn 2.0.87",
-]
+checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
 
 [[package]]
 name = "snap"
@@ -2591,11 +2725,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "sqlparser"
-version = "0.53.0"
+version = "0.55.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
+checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11"
 dependencies = [
  "log",
+ "recursive",
  "sqlparser_derive",
 ]
 
@@ -2607,7 +2742,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2618,15 +2753,15 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
 
 [[package]]
 name = "stacker"
-version = "0.1.17"
+version = "0.1.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b"
+checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
 dependencies = [
  "cc",
  "cfg-if",
  "libc",
  "psm",
- "windows-sys 0.59.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -2635,25 +2770,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
-[[package]]
-name = "strum"
-version = "0.26.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
-
-[[package]]
-name = "strum_macros"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.87",
-]
-
 [[package]]
 name = "subtle"
 version = "2.6.1"
@@ -2673,9 +2789,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.87"
+version = "2.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2684,13 +2800,13 @@ dependencies = [
 
 [[package]]
 name = "synstructure"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2701,15 +2817,35 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
 
 [[package]]
 name = "tempfile"
-version = "3.14.0"
+version = "3.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
 dependencies = [
- "cfg-if",
  "fastrand",
+ "getrandom 0.3.3",
  "once_cell",
  "rustix",
- "windows-sys 0.59.0",
+ "windows-sys",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2734,9 +2870,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.7.6"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -2744,9 +2880,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.41.1"
+version = "1.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33"
+checksum = "2513ca694ef9ede0fb23fe71a4ee4107cb102b9dc1930f6d0fd77aae068ae165"
 dependencies = [
  "backtrace",
  "bytes",
@@ -2756,20 +2892,20 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "tokio-util"
-version = "0.7.12"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a"
+checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
 dependencies = [
  "bytes",
  "futures-core",
@@ -2780,9 +2916,9 @@ dependencies = [
 
 [[package]]
 name = "tracing"
-version = "0.1.40"
+version = "0.1.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
 dependencies = [
  "pin-project-lite",
  "tracing-attributes",
@@ -2791,20 +2927,20 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.27"
+version = "0.1.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
+checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "tracing-core"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
+checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
 dependencies = [
  "once_cell",
 ]
@@ -2834,6 +2970,12 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "twox-hash"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908"
+
 [[package]]
 name = "typed-arena"
 version = "2.0.2"
@@ -2842,15 +2984,15 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
 
 [[package]]
 name = "typenum"
-version = "1.17.0"
+version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.13"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
 [[package]]
 name = "unicode-segmentation"
@@ -2860,15 +3002,15 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 
 [[package]]
 name = "unicode-width"
-version = "0.1.14"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
 
 [[package]]
 name = "unindent"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
 
 [[package]]
 name = "url"
@@ -2881,12 +3023,6 @@ dependencies = [
  "percent-encoding",
 ]
 
-[[package]]
-name = "utf16_iter"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
-
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -2895,11 +3031,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
 [[package]]
 name = "uuid"
-version = "1.11.0"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
+checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
 dependencies = [
- "getrandom",
+ "getrandom 0.3.3",
+ "js-sys",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -2924,37 +3062,59 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.95"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
 dependencies = [
  "cfg-if",
  "once_cell",
+ "rustversion",
  "wasm-bindgen-macro",
 ]
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.95"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
 dependencies = [
  "bumpalo",
  "log",
- "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
  "wasm-bindgen-shared",
 ]
 
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.95"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -2962,22 +3122,35 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.95"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.95"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
+checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
 
 [[package]]
 name = "web-time"
@@ -3011,7 +3184,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -3022,20 +3195,61 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows-core"
-version = "0.52.0"
+version = "0.61.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "46ec44dc15085cea82cf9c78f85a9114c463a369786585ad2882d1ff0b0acf40"
 dependencies = [
- "windows-targets",
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
 ]
 
 [[package]]
-name = "windows-sys"
-version = "0.52.0"
+name = "windows-implement"
+version = "0.60.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
 dependencies = [
- "windows-targets",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
+
+[[package]]
+name = "windows-result"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b895b5356fc36103d0f64dd1e94dfa7ac5633f1c9dd6e80fe9ec4adef69e09d"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a7ab927b2637c19b3dbe0965e75d8f2d30bdd697a1516191cad2ec4df8fb28a"
+dependencies = [
+ "windows-link",
 ]
 
 [[package]]
@@ -3112,16 +3326,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
-name = "write16"
-version = "1.0.0"
+name = "wit-bindgen-rt"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
 
 [[package]]
 name = "writeable"
-version = "0.5.5"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
 
 [[package]]
 name = "xz2"
@@ -3134,9 +3351,9 @@ dependencies = [
 
 [[package]]
 name = "yoke"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
+checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
 dependencies = [
  "serde",
  "stable_deref_trait",
@@ -3146,63 +3363,73 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
+checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
  "synstructure",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
 dependencies = [
- "byteorder",
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "zerofrom"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
  "synstructure",
 ]
 
+[[package]]
+name = "zerotrie"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
 [[package]]
 name = "zerovec"
-version = "0.10.4"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -3211,38 +3438,44 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.10.3"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.87",
+ "syn 2.0.101",
 ]
 
+[[package]]
+name = "zlib-rs"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8"
+
 [[package]]
 name = "zstd"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "7.2.1"
+version = "7.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
 dependencies = [
  "zstd-sys",
 ]
 
 [[package]]
 name = "zstd-sys"
-version = "2.0.13+zstd.1.5.6"
+version = "2.0.15+zstd.1.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
+checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
 dependencies = [
  "cc",
  "pkg-config",
diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml
similarity index 83%
rename from examples/ffi-table-provider/Cargo.toml
rename to examples/datafusion-ffi-example/Cargo.toml
index f4e4fda79..0e17567b9 100644
--- a/examples/ffi-table-provider/Cargo.toml
+++ b/examples/datafusion-ffi-example/Cargo.toml
@@ -21,16 +21,16 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-datafusion = { version = "45.0.0" }
-datafusion-ffi = { version = "45.0.0" }
+datafusion = { version = "47.0.0" }
+datafusion-ffi = { version = "47.0.0" }
 pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] }
-arrow = { version = "54" }
-arrow-array = { version = "54" }
-arrow-schema = { version = "54" }
+arrow = { version = "55.0.0" }
+arrow-array = { version = "55.0.0" }
+arrow-schema = { version = "55.0.0" }
 
 [build-dependencies]
 pyo3-build-config = "0.23"
 
 [lib]
-name = "ffi_table_provider"
+name = "datafusion_ffi_example"
 crate-type = ["cdylib", "rlib"]
diff --git a/examples/ffi-table-provider/build.rs b/examples/datafusion-ffi-example/build.rs
similarity index 100%
rename from examples/ffi-table-provider/build.rs
rename to examples/datafusion-ffi-example/build.rs
diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/datafusion-ffi-example/pyproject.toml
similarity index 97%
rename from examples/ffi-table-provider/pyproject.toml
rename to examples/datafusion-ffi-example/pyproject.toml
index 9cd25b423..0c54df95c 100644
--- a/examples/ffi-table-provider/pyproject.toml
+++ b/examples/datafusion-ffi-example/pyproject.toml
@@ -20,7 +20,7 @@ requires = ["maturin>=1.6,<2.0"]
 build-backend = "maturin"
 
 [project]
-name = "ffi_table_provider"
+name = "datafusion_ffi_example"
 requires-python = ">=3.9"
 classifiers = [
     "Programming Language :: Rust",
diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py
new file mode 100644
index 000000000..f3c56a90a
--- /dev/null
+++ b/examples/datafusion-ffi-example/python/tests/_test_table_function.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pyarrow as pa
+from datafusion import Expr, SessionContext, udtf
+from datafusion_ffi_example import MyTableFunction, MyTableProvider
+
+if TYPE_CHECKING:
+    from datafusion.context import TableProviderExportable
+
+
+def test_ffi_table_function_register():
+    ctx = SessionContext()
+    table_func = MyTableFunction()
+    table_udtf = udtf(table_func, "my_table_func")
+    ctx.register_udtf(table_udtf)
+    result = ctx.sql("select * from my_table_func()").collect()
+
+    assert len(result) == 2
+    assert result[0].num_columns == 4
+    print(result)
+
+    result = [r.column(0) for r in result]
+    expected = [
+        pa.array([0, 1, 2], type=pa.int32()),
+        pa.array([3, 4, 5, 6], type=pa.int32()),
+    ]
+
+    assert result == expected
+
+
+def test_ffi_table_function_call_directly():
+    ctx = SessionContext()
+    table_func = MyTableFunction()
+    table_udtf = udtf(table_func, "my_table_func")
+
+    my_table = table_udtf()
+    ctx.register_table_provider("t", my_table)
+    result = ctx.table("t").collect()
+
+    assert len(result) == 2
+    assert result[0].num_columns == 4
+    print(result)
+
+    result = [r.column(0) for r in result]
+    expected = [
+        pa.array([0, 1, 2], type=pa.int32()),
+        pa.array([3, 4, 5, 6], type=pa.int32()),
+    ]
+
+    assert result == expected
+
+
+class PythonTableFunction:
+    """Python based table function.
+
+    This class is used as a Python implementation of a table function.
+    We use the existing TableProvider to create the underlying
+    provider, and this function takes no arguments
+    """
+
+    def __call__(
+        self, num_cols: Expr, num_rows: Expr, num_batches: Expr
+    ) -> TableProviderExportable:
+        args = [
+            num_cols.to_variant().value_i64(),
+            num_rows.to_variant().value_i64(),
+            num_batches.to_variant().value_i64(),
+        ]
+        return MyTableProvider(*args)
+
+
+def common_table_function_test(test_ctx: SessionContext) -> None:
+    result = test_ctx.sql("select * from my_table_func(3,2,4)").collect()
+
+    assert len(result) == 4
+    assert result[0].num_columns == 3
+    print(result)
+
+    result = [r.column(0) for r in result]
+    expected = [
+        pa.array([0, 1], type=pa.int32()),
+        pa.array([2, 3, 4], type=pa.int32()),
+        pa.array([4, 5, 6, 7], type=pa.int32()),
+        pa.array([6, 7, 8, 9, 10], type=pa.int32()),
+    ]
+
+    assert result == expected
+
+
+def test_python_table_function():
+    ctx = SessionContext()
+    table_func = PythonTableFunction()
+    table_udtf = udtf(table_func, "my_table_func")
+    ctx.register_udtf(table_udtf)
+
+    common_table_function_test(ctx)
+
+
+def test_python_table_function_decorator():
+    ctx = SessionContext()
+
+    @udtf("my_table_func")
+    def my_udtf(
+        num_cols: Expr, num_rows: Expr, num_batches: Expr
+    ) -> TableProviderExportable:
+        args = [
+            num_cols.to_variant().value_i64(),
+            num_rows.to_variant().value_i64(),
+            num_batches.to_variant().value_i64(),
+        ]
+        return MyTableProvider(*args)
+
+    ctx.register_udtf(my_udtf)
+
+    common_table_function_test(ctx)
diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py
similarity index 94%
rename from examples/ffi-table-provider/python/tests/_test_table_provider.py
rename to examples/datafusion-ffi-example/python/tests/_test_table_provider.py
index 0db3ec561..6b24da06c 100644
--- a/examples/ffi-table-provider/python/tests/_test_table_provider.py
+++ b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py
@@ -15,9 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
+
 import pyarrow as pa
 from datafusion import SessionContext
-from ffi_table_provider import MyTableProvider
+from datafusion_ffi_example import MyTableProvider
 
 
 def test_table_loading():
diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs
new file mode 100644
index 000000000..ae08c3b65
--- /dev/null
+++ b/examples/datafusion-ffi-example/src/lib.rs
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::table_function::MyTableFunction;
+use crate::table_provider::MyTableProvider;
+use pyo3::prelude::*;
+
+pub(crate) mod table_function;
+pub(crate) mod table_provider;
+
+#[pymodule]
+fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<MyTableProvider>()?;
+    m.add_class::<MyTableFunction>()?;
+    Ok(())
+}
diff --git a/examples/datafusion-ffi-example/src/table_function.rs b/examples/datafusion-ffi-example/src/table_function.rs
new file mode 100644
index 000000000..2d7b356e3
--- /dev/null
+++ b/examples/datafusion-ffi-example/src/table_function.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::table_provider::MyTableProvider;
+use datafusion::catalog::{TableFunctionImpl, TableProvider};
+use datafusion::error::Result as DataFusionResult;
+use datafusion::prelude::Expr;
+use datafusion_ffi::udtf::FFI_TableFunction;
+use pyo3::types::PyCapsule;
+use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
+use std::sync::Arc;
+
+#[pyclass(name = "MyTableFunction", module = "datafusion_ffi_example", subclass)]
+#[derive(Debug, Clone)]
+pub(crate) struct MyTableFunction {}
+
+#[pymethods]
+impl MyTableFunction {
+    #[new]
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn __datafusion_table_function__<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = cr"datafusion_table_function".into();
+
+        let func = self.clone();
+        let provider = FFI_TableFunction::new(Arc::new(func), None);
+
+        PyCapsule::new(py, provider, Some(name))
+    }
+}
+
+impl TableFunctionImpl for MyTableFunction {
+    fn call(&self, _args: &[Expr]) -> DataFusionResult<Arc<dyn TableProvider>> {
+        let provider = MyTableProvider::new(4, 3, 2).create_table()?;
+        Ok(Arc::new(provider))
+    }
+}
diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/datafusion-ffi-example/src/table_provider.rs
similarity index 71%
rename from examples/ffi-table-provider/src/lib.rs
rename to examples/datafusion-ffi-example/src/table_provider.rs
index 88deeece2..e884585b5 100644
--- a/examples/ffi-table-provider/src/lib.rs
+++ b/examples/datafusion-ffi-example/src/table_provider.rs
@@ -15,25 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::CString, sync::Arc};
-
-use arrow_array::ArrayRef;
-use datafusion::{
-    arrow::{
-        array::RecordBatch,
-        datatypes::{DataType, Field, Schema},
-    },
-    datasource::MemTable,
-    error::{DataFusionError, Result},
-};
+use arrow_array::{ArrayRef, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+use datafusion::catalog::MemTable;
+use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion_ffi::table_provider::FFI_TableProvider;
-use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule};
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::types::PyCapsule;
+use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
+use std::sync::Arc;
 
 /// In order to provide a test that demonstrates different sized record batches,
 /// the first batch will have num_rows, the second batch num_rows+1, and so on.
-#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)]
+#[pyclass(name = "MyTableProvider", module = "datafusion_ffi_example", subclass)]
 #[derive(Clone)]
-struct MyTableProvider {
+pub(crate) struct MyTableProvider {
     num_cols: usize,
     num_rows: usize,
     num_batches: usize,
@@ -44,21 +40,19 @@ fn create_record_batch(
     num_cols: usize,
     start_value: i32,
     num_values: usize,
-) -> Result<RecordBatch> {
+) -> DataFusionResult<RecordBatch> {
     let end_value = start_value + num_values as i32;
     let row_values: Vec<i32> = (start_value..end_value).collect();
 
     let columns: Vec<_> = (0..num_cols)
-        .map(|_| {
-            std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef
-        })
+        .map(|_| Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef)
         .collect();
 
     RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from)
 }
 
 impl MyTableProvider {
-    fn create_table(&self) -> Result<MemTable> {
+    pub fn create_table(&self) -> DataFusionResult<MemTable> {
         let fields: Vec<_> = (0..self.num_cols)
             .map(|idx| (b'A' + idx as u8) as char)
             .map(|col_name| Field::new(col_name, DataType::Int32, true))
@@ -66,7 +60,7 @@ impl MyTableProvider {
 
         let schema = Arc::new(Schema::new(fields));
 
-        let batches: Result<Vec<_>> = (0..self.num_batches)
+        let batches: DataFusionResult<Vec<_>> = (0..self.num_batches)
             .map(|batch_idx| {
                 let start_value = batch_idx * self.num_rows;
                 create_record_batch(
@@ -85,7 +79,7 @@ impl MyTableProvider {
 #[pymethods]
 impl MyTableProvider {
     #[new]
-    fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self {
+    pub fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self {
         Self {
             num_cols,
             num_rows,
@@ -93,23 +87,17 @@ impl MyTableProvider {
         }
     }
 
-    fn __datafusion_table_provider__<'py>(
+    pub fn __datafusion_table_provider__<'py>(
         &self,
         py: Python<'py>,
     ) -> PyResult<Bound<'py, PyCapsule>> {
-        let name = CString::new("datafusion_table_provider").unwrap();
+        let name = cr"datafusion_table_provider".into();
 
         let provider = self
             .create_table()
             .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
         let provider = FFI_TableProvider::new(Arc::new(provider), false, None);
 
-        PyCapsule::new_bound(py, provider, Some(name.clone()))
+        PyCapsule::new(py, provider, Some(name))
     }
 }
-
-#[pymodule]
-fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<MyTableProvider>()?;
-    Ok(())
-}
diff --git a/examples/python-udwf.py b/examples/python-udwf.py
index 98d118bf2..645ded188 100644
--- a/examples/python-udwf.py
+++ b/examples/python-udwf.py
@@ -22,7 +22,7 @@
 from datafusion import col, lit, udwf
 from datafusion import functions as f
 from datafusion.expr import WindowFrame
-from datafusion.udf import WindowEvaluator
+from datafusion.user_defined import WindowEvaluator
 
 # This example creates five different examples of user defined window functions in order
 # to demonstrate the variety of ways a user may need to implement.
diff --git a/pyproject.toml b/pyproject.toml
index d86b657ec..728cedb2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,6 +63,10 @@ exclude = [".github/**", "ci/**", ".asf.yaml"]
 locked = true
 features = ["substrait"]
 
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
+
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
 select = ["ALL" ]
diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index fd7cd000a..16d65f685 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -21,6 +21,10 @@
 See https://datafusion.apache.org/python for more information.
 """
 
+from __future__ import annotations
+
+from typing import Any
+
 try:
     import importlib.metadata as importlib_metadata
 except ImportError:
@@ -51,7 +55,17 @@
 from .io import read_avro, read_csv, read_json, read_parquet
 from .plan import ExecutionPlan, LogicalPlan
 from .record_batch import RecordBatch, RecordBatchStream
-from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf
+from .user_defined import (
+    Accumulator,
+    AggregateUDF,
+    ScalarUDF,
+    TableFunction,
+    WindowUDF,
+    udaf,
+    udf,
+    udtf,
+    udwf,
+)
 
 __version__ = importlib_metadata.version(__name__)
 
@@ -76,6 +90,7 @@
     "SessionConfig",
     "SessionContext",
     "Table",
+    "TableFunction",
     "WindowFrame",
     "WindowUDF",
     "col",
@@ -94,6 +109,7 @@
     "substrait",
     "udaf",
     "udf",
+    "udtf",
     "udwf",
     "unparser",
 ]
@@ -120,3 +136,18 @@ def str_lit(value):
 def lit(value) -> Expr:
     """Create a literal expression."""
     return Expr.literal(value)
+
+
+def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr:
+    """Creates a new expression representing a scalar value with metadata.
+
+    Args:
+        value: A valid PyArrow scalar value or easily castable to one.
+        metadata: Metadata to attach to the expression.
+    """
+    return Expr.literal_with_metadata(value, metadata)
+
+
+def lit_with_metadata(value: Any, metadata: dict[str, str]) -> Expr:
+    """Alias for literal_with_metadata."""
+    return literal_with_metadata(value, metadata)
diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py
index 6c3f188cc..67ab3ead2 100644
--- a/python/datafusion/catalog.py
+++ b/python/datafusion/catalog.py
@@ -34,6 +34,10 @@ def __init__(self, catalog: df_internal.Catalog) -> None:
         """This constructor is not typically called by the end user."""
         self.catalog = catalog
 
+    def __repr__(self) -> str:
+        """Print a string representation of the catalog."""
+        return self.catalog.__repr__()
+
     def names(self) -> list[str]:
         """Returns the list of databases in this catalog."""
         return self.catalog.names()
@@ -50,6 +54,10 @@ def __init__(self, db: df_internal.Database) -> None:
         """This constructor is not typically called by the end user."""
         self.db = db
 
+    def __repr__(self) -> str:
+        """Print a string representation of the database."""
+        return self.db.__repr__()
+
     def names(self) -> set[str]:
         """Returns the list of all tables in this database."""
         return self.db.names()
@@ -66,6 +74,10 @@ def __init__(self, table: df_internal.Table) -> None:
         """This constructor is not typically called by the end user."""
         self.table = table
 
+    def __repr__(self) -> str:
+        """Print a string representation of the table."""
+        return self.table.__repr__()
+
     @property
     def schema(self) -> pa.Schema:
         """Returns the schema associated with this table."""
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 1429a4975..5b99b0d26 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -19,8 +19,11 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import TYPE_CHECKING, Any, Protocol
 
+import pyarrow as pa
+
 try:
     from warnings import deprecated  # Python 3.13+
 except ImportError:
@@ -30,7 +33,7 @@
 from datafusion.dataframe import DataFrame
 from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list
 from datafusion.record_batch import RecordBatchStream
-from datafusion.udf import AggregateUDF, ScalarUDF, WindowUDF
+from datafusion.user_defined import AggregateUDF, ScalarUDF, TableFunction, WindowUDF
 
 from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal
 from ._internal import SessionConfig as SessionConfigInternal
@@ -42,7 +45,6 @@
 
     import pandas as pd
     import polars as pl
-    import pyarrow as pa
 
     from datafusion.plan import ExecutionPlan, LogicalPlan
 
@@ -496,6 +498,10 @@ def __init__(
 
         self.ctx = SessionContextInternal(config, runtime)
 
+    def __repr__(self) -> str:
+        """Print a string representation of the Session Context."""
+        return self.ctx.__repr__()
+
     @classmethod
     def global_ctx(cls) -> SessionContext:
         """Retrieve the global context as a `SessionContext` wrapper.
@@ -535,7 +541,7 @@ def register_listing_table(
         self,
         name: str,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_extension: str = ".parquet",
         schema: pa.Schema | None = None,
         file_sort_order: list[list[Expr | SortExpr]] | None = None,
@@ -556,6 +562,7 @@ def register_listing_table(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         file_sort_order_raw = (
             [sort_list_to_raw_sort_list(f) for f in file_sort_order]
             if file_sort_order is not None
@@ -752,6 +759,10 @@ def register_table_provider(
         """
         self.ctx.register_table_provider(name, provider)
 
+    def register_udtf(self, func: TableFunction) -> None:
+        """Register a user defined table function."""
+        self.ctx.register_udtf(func._udtf)
+
     def register_record_batches(
         self, name: str, partitions: list[list[pa.RecordBatch]]
     ) -> None:
@@ -770,7 +781,7 @@ def register_parquet(
         self,
         name: str,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
@@ -798,6 +809,7 @@ def register_parquet(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         self.ctx.register_parquet(
             name,
             str(path),
@@ -861,7 +873,7 @@ def register_json(
         schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> None:
         """Register a JSON file as a table.
@@ -882,6 +894,7 @@ def register_json(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         self.ctx.register_json(
             name,
             str(path),
@@ -898,7 +911,7 @@ def register_avro(
         path: str | pathlib.Path,
         schema: pa.Schema | None = None,
         file_extension: str = ".avro",
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     ) -> None:
         """Register an Avro file as a table.
 
@@ -914,6 +927,7 @@ def register_avro(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         self.ctx.register_avro(
             name, str(path), schema, file_extension, table_partition_cols
         )
@@ -973,7 +987,7 @@ def read_json(
         schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> DataFrame:
         """Read a line-delimited JSON data source.
@@ -993,6 +1007,7 @@ def read_json(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         return DataFrame(
             self.ctx.read_json(
                 str(path),
@@ -1012,7 +1027,7 @@ def read_csv(
         delimiter: str = ",",
         schema_infer_max_records: int = 1000,
         file_extension: str = ".csv",
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_compression_type: str | None = None,
     ) -> DataFrame:
         """Read a CSV data source.
@@ -1037,6 +1052,7 @@ def read_csv(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
 
         path = [str(p) for p in path] if isinstance(path, list) else str(path)
 
@@ -1056,7 +1072,7 @@ def read_csv(
     def read_parquet(
         self,
         path: str | pathlib.Path,
-        table_partition_cols: list[tuple[str, str]] | None = None,
+        table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
@@ -1085,6 +1101,7 @@ def read_parquet(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        table_partition_cols = self._convert_table_partition_cols(table_partition_cols)
         file_sort_order = (
             [sort_list_to_raw_sort_list(f) for f in file_sort_order]
             if file_sort_order is not None
@@ -1106,7 +1123,7 @@ def read_avro(
         self,
         path: str | pathlib.Path,
         schema: pa.Schema | None = None,
-        file_partition_cols: list[tuple[str, str]] | None = None,
+        file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
         file_extension: str = ".avro",
     ) -> DataFrame:
         """Create a :py:class:`DataFrame` for reading Avro data source.
@@ -1122,6 +1139,7 @@ def read_avro(
         """
         if file_partition_cols is None:
             file_partition_cols = []
+        file_partition_cols = self._convert_table_partition_cols(file_partition_cols)
         return DataFrame(
             self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension)
         )
@@ -1138,3 +1156,41 @@ def read_table(self, table: Table) -> DataFrame:
     def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
         """Execute the ``plan`` and return the results."""
         return RecordBatchStream(self.ctx.execute(plan._raw_plan, partitions))
+
+    @staticmethod
+    def _convert_table_partition_cols(
+        table_partition_cols: list[tuple[str, str | pa.DataType]],
+    ) -> list[tuple[str, pa.DataType]]:
+        warn = False
+        converted_table_partition_cols = []
+
+        for col, data_type in table_partition_cols:
+            if isinstance(data_type, str):
+                warn = True
+                if data_type == "string":
+                    converted_data_type = pa.string()
+                elif data_type == "int":
+                    converted_data_type = pa.int32()
+                else:
+                    message = (
+                        f"Unsupported literal data type '{data_type}' for partition "
+                        "column. Supported types are 'string' and 'int'"
+                    )
+                    raise ValueError(message)
+            else:
+                converted_data_type = data_type
+
+            converted_table_partition_cols.append((col, converted_data_type))
+
+        if warn:
+            message = (
+                "using literals for table_partition_cols data types is deprecated,"
+                "use pyarrow types instead"
+            )
+            warnings.warn(
+                message,
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+
+        return converted_table_partition_cols
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 014331541..769271c7e 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -37,6 +37,10 @@
 except ImportError:
     from typing_extensions import deprecated  # Python 3.12
 
+from datafusion._internal import DataFrame as DataFrameInternal
+from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
+from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
+from datafusion.expr import Expr, SortExpr, sort_or_default
 from datafusion.plan import ExecutionPlan, LogicalPlan
 from datafusion.record_batch import RecordBatchStream
 
@@ -53,10 +57,6 @@
 
 from enum import Enum
 
-from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal
-from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
-from datafusion.expr import Expr, SortExpr, sort_or_default
-
 
 # excerpt from deltalake
 # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
@@ -1090,3 +1090,25 @@ def within_limit(df: DataFrame, limit: int) -> DataFrame:
             DataFrame: After applying func to the original dataframe.
         """
         return func(self, *args)
+
+    def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame:
+        """Fill null values in specified columns with a value.
+
+        Args:
+            value: Value to replace nulls with. Will be cast to match column type.
+            subset: Optional list of column names to fill. If None, fills all columns.
+
+        Returns:
+            DataFrame with null values replaced where type casting is possible
+
+        Examples:
+            >>> df = df.fill_null(0)  # Fill all nulls with 0 where possible
+            >>> # Fill nulls in specific string columns
+            >>> df = df.fill_null("missing", subset=["name", "category"])
+
+        Notes:
+            - Only fills nulls in columns where the value can be cast to the column type
+            - For columns where casting fails, the original column is kept unchanged
+            - For columns not in subset, the original column is kept unchanged
+        """
+        return DataFrame(self.df.fill_null(value, subset))
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 9e58873d0..e785cab06 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -435,6 +435,20 @@ def literal(value: Any) -> Expr:
             value = pa.scalar(value)
         return Expr(expr_internal.RawExpr.literal(value))
 
+    @staticmethod
+    def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr:
+        """Creates a new expression representing a scalar value with metadata.
+
+        Args:
+            value: A valid PyArrow scalar value or easily castable to one.
+            metadata: Metadata to attach to the expression.
+        """
+        if isinstance(value, str):
+            value = pa.scalar(value, type=pa.string_view())
+        value = value if isinstance(value, pa.Scalar) else pa.scalar(value)
+
+        return Expr(expr_internal.RawExpr.literal_with_metadata(value, metadata))
+
     @staticmethod
     def string_literal(value: str) -> Expr:
         """Creates a new expression representing a UTF8 literal value.
@@ -1172,6 +1186,10 @@ def __init__(
                 end_bound = end_bound.cast(pa.uint64())
         self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound)
 
+    def __repr__(self) -> str:
+        """Print a string representation of the window frame."""
+        return self.window_frame.__repr__()
+
     def get_frame_units(self) -> str:
         """Returns the window frame units for the bounds."""
         return self.window_frame.get_frame_units()
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
index ef5ebf96f..551e20a6f 100644
--- a/python/datafusion/io.py
+++ b/python/datafusion/io.py
@@ -34,7 +34,7 @@
 
 def read_parquet(
     path: str | pathlib.Path,
-    table_partition_cols: list[tuple[str, str]] | None = None,
+    table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     parquet_pruning: bool = True,
     file_extension: str = ".parquet",
     skip_metadata: bool = True,
@@ -83,7 +83,7 @@ def read_json(
     schema: pa.Schema | None = None,
     schema_infer_max_records: int = 1000,
     file_extension: str = ".json",
-    table_partition_cols: list[tuple[str, str]] | None = None,
+    table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     file_compression_type: str | None = None,
 ) -> DataFrame:
     """Read a line-delimited JSON data source.
@@ -124,7 +124,7 @@ def read_csv(
     delimiter: str = ",",
     schema_infer_max_records: int = 1000,
     file_extension: str = ".csv",
-    table_partition_cols: list[tuple[str, str]] | None = None,
+    table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     file_compression_type: str | None = None,
 ) -> DataFrame:
     """Read a CSV data source.
@@ -171,7 +171,7 @@ def read_csv(
 def read_avro(
     path: str | pathlib.Path,
     schema: pa.Schema | None = None,
-    file_partition_cols: list[tuple[str, str]] | None = None,
+    file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
     file_extension: str = ".avro",
 ) -> DataFrame:
     """Create a :py:class:`DataFrame` for reading Avro data source.
diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index e93a34ca5..c7265fa09 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -15,753 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Provides the user-defined functions for evaluation of dataframes."""
+"""Deprecated module for user defined functions."""
 
-from __future__ import annotations
+import warnings
 
-import functools
-from abc import ABCMeta, abstractmethod
-from enum import Enum
-from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload
+from datafusion.user_defined import *  # noqa: F403
 
-import pyarrow as pa
-
-import datafusion._internal as df_internal
-from datafusion.expr import Expr
-
-if TYPE_CHECKING:
-    _R = TypeVar("_R", bound=pa.DataType)
-
-
-class Volatility(Enum):
-    """Defines how stable or volatile a function is.
-
-    When setting the volatility of a function, you can either pass this
-    enumeration or a ``str``. The ``str`` equivalent is the lower case value of the
-    name (`"immutable"`, `"stable"`, or `"volatile"`).
-    """
-
-    Immutable = 1
-    """An immutable function will always return the same output when given the
-    same input.
-
-    DataFusion will attempt to inline immutable functions during planning.
-    """
-
-    Stable = 2
-    """
-    Returns the same value for a given input within a single queries.
-
-    A stable function may return different values given the same input across
-    different queries but must return the same value for a given input within a
-    query. An example of this is the ``Now`` function. DataFusion will attempt to
-    inline ``Stable`` functions during planning, when possible. For query
-    ``select col1, now() from t1``, it might take a while to execute but ``now()``
-    column will be the same for each output row, which is evaluated during
-    planning.
-    """
-
-    Volatile = 3
-    """A volatile function may change the return value from evaluation to
-    evaluation.
-
-    Multiple invocations of a volatile function may return different results
-    when used in the same query. An example of this is the random() function.
-    DataFusion can not evaluate such functions during planning. In the query
-    ``select col1, random() from t1``, ``random()`` function will be evaluated
-    for each output row, resulting in a unique random value for each row.
-    """
-
-    def __str__(self) -> str:
-        """Returns the string equivalent."""
-        return self.name.lower()
-
-
-class ScalarUDF:
-    """Class for performing scalar user-defined functions (UDF).
-
-    Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for
-    operating on a group of rows.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        func: Callable[..., _R],
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: _R,
-        volatility: Volatility | str,
-    ) -> None:
-        """Instantiate a scalar user-defined function (UDF).
-
-        See helper method :py:func:`udf` for argument details.
-        """
-        if isinstance(input_types, pa.DataType):
-            input_types = [input_types]
-        self._udf = df_internal.ScalarUDF(
-            name, func, input_types, return_type, str(volatility)
-        )
-
-    def __call__(self, *args: Expr) -> Expr:
-        """Execute the UDF.
-
-        This function is not typically called by an end user. These calls will
-        occur during the evaluation of the dataframe.
-        """
-        args_raw = [arg.expr for arg in args]
-        return Expr(self._udf.__call__(*args_raw))
-
-    @overload
-    @staticmethod
-    def udf(
-        input_types: list[pa.DataType],
-        return_type: _R,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> Callable[..., ScalarUDF]: ...
-
-    @overload
-    @staticmethod
-    def udf(
-        func: Callable[..., _R],
-        input_types: list[pa.DataType],
-        return_type: _R,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> ScalarUDF: ...
-
-    @staticmethod
-    def udf(*args: Any, **kwargs: Any):  # noqa: D417
-        """Create a new User-Defined Function (UDF).
-
-        This class can be used both as a **function** and as a **decorator**.
-
-        Usage:
-            - **As a function**: Call `udf(func, input_types, return_type, volatility,
-              name)`.
-            - **As a decorator**: Use `@udf(input_types, return_type, volatility,
-              name)`. In this case, do **not** pass `func` explicitly.
-
-        Args:
-            func (Callable, optional): **Only needed when calling as a function.**
-                Skip this argument when using `udf` as a decorator.
-            input_types (list[pa.DataType]): The data types of the arguments
-                to `func`. This list must be of the same length as the number of
-                arguments.
-            return_type (_R): The data type of the return value from the function.
-            volatility (Volatility | str): See `Volatility` for allowed values.
-            name (Optional[str]): A descriptive name for the function.
-
-        Returns:
-            A user-defined function that can be used in SQL expressions,
-            data aggregation, or window function calls.
-
-        Example:
-            **Using `udf` as a function:**
-            ```
-            def double_func(x):
-                return x * 2
-            double_udf = udf(double_func, [pa.int32()], pa.int32(),
-            "volatile", "double_it")
-            ```
-
-            **Using `udf` as a decorator:**
-            ```
-            @udf([pa.int32()], pa.int32(), "volatile", "double_it")
-            def double_udf(x):
-                return x * 2
-            ```
-        """
-
-        def _function(
-            func: Callable[..., _R],
-            input_types: list[pa.DataType],
-            return_type: _R,
-            volatility: Volatility | str,
-            name: Optional[str] = None,
-        ) -> ScalarUDF:
-            if not callable(func):
-                msg = "`func` argument must be callable"
-                raise TypeError(msg)
-            if name is None:
-                if hasattr(func, "__qualname__"):
-                    name = func.__qualname__.lower()
-                else:
-                    name = func.__class__.__name__.lower()
-            return ScalarUDF(
-                name=name,
-                func=func,
-                input_types=input_types,
-                return_type=return_type,
-                volatility=volatility,
-            )
-
-        def _decorator(
-            input_types: list[pa.DataType],
-            return_type: _R,
-            volatility: Volatility | str,
-            name: Optional[str] = None,
-        ) -> Callable:
-            def decorator(func: Callable):
-                udf_caller = ScalarUDF.udf(
-                    func, input_types, return_type, volatility, name
-                )
-
-                @functools.wraps(func)
-                def wrapper(*args: Any, **kwargs: Any):
-                    return udf_caller(*args, **kwargs)
-
-                return wrapper
-
-            return decorator
-
-        if args and callable(args[0]):
-            # Case 1: Used as a function, require the first parameter to be callable
-            return _function(*args, **kwargs)
-        # Case 2: Used as a decorator with parameters
-        return _decorator(*args, **kwargs)
-
-
-class Accumulator(metaclass=ABCMeta):
-    """Defines how an :py:class:`AggregateUDF` accumulates values."""
-
-    @abstractmethod
-    def state(self) -> list[pa.Scalar]:
-        """Return the current state."""
-
-    @abstractmethod
-    def update(self, *values: pa.Array) -> None:
-        """Evaluate an array of values and update state."""
-
-    @abstractmethod
-    def merge(self, states: list[pa.Array]) -> None:
-        """Merge a set of states."""
-
-    @abstractmethod
-    def evaluate(self) -> pa.Scalar:
-        """Return the resultant value."""
-
-
-class AggregateUDF:
-    """Class for performing scalar user-defined functions (UDF).
-
-    Aggregate UDFs operate on a group of rows and return a single value. See
-    also :py:class:`ScalarUDF` for operating on a row by row basis.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        accumulator: Callable[[], Accumulator],
-        input_types: list[pa.DataType],
-        return_type: pa.DataType,
-        state_type: list[pa.DataType],
-        volatility: Volatility | str,
-    ) -> None:
-        """Instantiate a user-defined aggregate function (UDAF).
-
-        See :py:func:`udaf` for a convenience function and argument
-        descriptions.
-        """
-        self._udaf = df_internal.AggregateUDF(
-            name,
-            accumulator,
-            input_types,
-            return_type,
-            state_type,
-            str(volatility),
-        )
-
-    def __call__(self, *args: Expr) -> Expr:
-        """Execute the UDAF.
-
-        This function is not typically called by an end user. These calls will
-        occur during the evaluation of the dataframe.
-        """
-        args_raw = [arg.expr for arg in args]
-        return Expr(self._udaf.__call__(*args_raw))
-
-    @overload
-    @staticmethod
-    def udaf(
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        state_type: list[pa.DataType],
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> Callable[..., AggregateUDF]: ...
-
-    @overload
-    @staticmethod
-    def udaf(
-        accum: Callable[[], Accumulator],
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        state_type: list[pa.DataType],
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> AggregateUDF: ...
-
-    @staticmethod
-    def udaf(*args: Any, **kwargs: Any):  # noqa: D417
-        """Create a new User-Defined Aggregate Function (UDAF).
-
-        This class allows you to define an **aggregate function** that can be used in
-        data aggregation or window function calls.
-
-        Usage:
-            - **As a function**: Call `udaf(accum, input_types, return_type, state_type,
-                volatility, name)`.
-            - **As a decorator**: Use `@udaf(input_types, return_type, state_type,
-                volatility, name)`.
-            When using `udaf` as a decorator, **do not pass `accum` explicitly**.
-
-        **Function example:**
-
-            If your `:py:class:Accumulator` can be instantiated with no arguments, you
-            can simply pass it's type as `accum`. If you need to pass additional
-            arguments to it's constructor, you can define a lambda or a factory method.
-            During runtime the `:py:class:Accumulator` will be constructed for every
-            instance in which this UDAF is used. The following examples are all valid.
-            ```
-            import pyarrow as pa
-            import pyarrow.compute as pc
-
-            class Summarize(Accumulator):
-                def __init__(self, bias: float = 0.0):
-                    self._sum = pa.scalar(bias)
-
-                def state(self) -> list[pa.Scalar]:
-                    return [self._sum]
-
-                def update(self, values: pa.Array) -> None:
-                    self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
-
-                def merge(self, states: list[pa.Array]) -> None:
-                    self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
-
-                def evaluate(self) -> pa.Scalar:
-                    return self._sum
-
-            def sum_bias_10() -> Summarize:
-                return Summarize(10.0)
-
-            udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
-                "immutable")
-            udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
-                "immutable")
-            udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
-                [pa.float64()], "immutable")
-            ```
-
-        **Decorator example:**
-            ```
-            @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
-            def udf4() -> Summarize:
-                return Summarize(10.0)
-            ```
-
-        Args:
-            accum: The accumulator python function. **Only needed when calling as a
-                function. Skip this argument when using `udaf` as a decorator.**
-            input_types: The data types of the arguments to ``accum``.
-            return_type: The data type of the return value.
-            state_type: The data types of the intermediate accumulation.
-            volatility: See :py:class:`Volatility` for allowed values.
-            name: A descriptive name for the function.
-
-        Returns:
-            A user-defined aggregate function, which can be used in either data
-            aggregation or window function calls.
-        """
-
-        def _function(
-            accum: Callable[[], Accumulator],
-            input_types: pa.DataType | list[pa.DataType],
-            return_type: pa.DataType,
-            state_type: list[pa.DataType],
-            volatility: Volatility | str,
-            name: Optional[str] = None,
-        ) -> AggregateUDF:
-            if not callable(accum):
-                msg = "`func` must be callable."
-                raise TypeError(msg)
-            if not isinstance(accum(), Accumulator):
-                msg = "Accumulator must implement the abstract base class Accumulator"
-                raise TypeError(msg)
-            if name is None:
-                name = accum().__class__.__qualname__.lower()
-            if isinstance(input_types, pa.DataType):
-                input_types = [input_types]
-            return AggregateUDF(
-                name=name,
-                accumulator=accum,
-                input_types=input_types,
-                return_type=return_type,
-                state_type=state_type,
-                volatility=volatility,
-            )
-
-        def _decorator(
-            input_types: pa.DataType | list[pa.DataType],
-            return_type: pa.DataType,
-            state_type: list[pa.DataType],
-            volatility: Volatility | str,
-            name: Optional[str] = None,
-        ) -> Callable[..., Callable[..., Expr]]:
-            def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]:
-                udaf_caller = AggregateUDF.udaf(
-                    accum, input_types, return_type, state_type, volatility, name
-                )
-
-                @functools.wraps(accum)
-                def wrapper(*args: Any, **kwargs: Any) -> Expr:
-                    return udaf_caller(*args, **kwargs)
-
-                return wrapper
-
-            return decorator
-
-        if args and callable(args[0]):
-            # Case 1: Used as a function, require the first parameter to be callable
-            return _function(*args, **kwargs)
-        # Case 2: Used as a decorator with parameters
-        return _decorator(*args, **kwargs)
-
-
-class WindowEvaluator:
-    """Evaluator class for user-defined window functions (UDWF).
-
-    It is up to the user to decide which evaluate function is appropriate.
-
-    +------------------------+--------------------------------+------------------+---------------------------+
-    | ``uses_window_frame``  | ``supports_bounded_execution`` | ``include_rank`` | function_to_implement     |
-    +========================+================================+==================+===========================+
-    | False (default)        | False (default)                | False (default)  | ``evaluate_all``          |
-    +------------------------+--------------------------------+------------------+---------------------------+
-    | False                  | True                           | False            | ``evaluate``              |
-    +------------------------+--------------------------------+------------------+---------------------------+
-    | False                  | True/False                     | True             | ``evaluate_all_with_rank``|
-    +------------------------+--------------------------------+------------------+---------------------------+
-    | True                   | True/False                     | True/False       | ``evaluate``              |
-    +------------------------+--------------------------------+------------------+---------------------------+
-    """  # noqa: W505, E501
-
-    def memoize(self) -> None:
-        """Perform a memoize operation to improve performance.
-
-        When the window frame has a fixed beginning (e.g UNBOUNDED
-        PRECEDING), some functions such as FIRST_VALUE and
-        NTH_VALUE do not need the (unbounded) input once they have
-        seen a certain amount of input.
-
-        `memoize` is called after each input batch is processed, and
-        such functions can save whatever they need
-        """
-
-    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:  # noqa: ARG002
-        """Return the range for the window fuction.
-
-        If `uses_window_frame` flag is `false`. This method is used to
-        calculate required range for the window function during
-        stateful execution.
-
-        Generally there is no required range, hence by default this
-        returns smallest range(current row). e.g seeing current row is
-        enough to calculate window result (such as row_number, rank,
-        etc)
-
-        Args:
-            idx:: Current index
-            num_rows: Number of rows.
-        """
-        return (idx, idx + 1)
-
-    def is_causal(self) -> bool:
-        """Get whether evaluator needs future data for its result."""
-        return False
-
-    def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
-        """Evaluate a window function on an entire input partition.
-
-        This function is called once per input *partition* for window functions that
-        *do not use* values from the window frame, such as
-        :py:func:`~datafusion.functions.row_number`,
-        :py:func:`~datafusion.functions.rank`,
-        :py:func:`~datafusion.functions.dense_rank`,
-        :py:func:`~datafusion.functions.percent_rank`,
-        :py:func:`~datafusion.functions.cume_dist`,
-        :py:func:`~datafusion.functions.lead`,
-        and :py:func:`~datafusion.functions.lag`.
-
-        It produces the result of all rows in a single pass. It
-        expects to receive the entire partition as the ``value`` and
-        must produce an output column with one output row for every
-        input row.
-
-        ``num_rows`` is required to correctly compute the output in case
-        ``len(values) == 0``
-
-        Implementing this function is an optimization. Certain window
-        functions are not affected by the window frame definition or
-        the query doesn't have a frame, and ``evaluate`` skips the
-        (costly) window frame boundary calculation and the overhead of
-        calling ``evaluate`` for each output row.
-
-        For example, the `LAG` built in window function does not use
-        the values of its window frame (it can be computed in one shot
-        on the entire partition with ``Self::evaluate_all`` regardless of the
-        window defined in the ``OVER`` clause)
-
-        .. code-block:: text
-
-            lag(x, 1) OVER (ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
-
-        However, ``avg()`` computes the average in the window and thus
-        does use its window frame.
-
-        .. code-block:: text
-
-            avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
-        """  # noqa: W505, E501
-
-    def evaluate(
-        self, values: list[pa.Array], eval_range: tuple[int, int]
-    ) -> pa.Scalar:
-        """Evaluate window function on a range of rows in an input partition.
-
-        This is the simplest and most general function to implement
-        but also the least performant as it creates output one row at
-        a time. It is typically much faster to implement stateful
-        evaluation using one of the other specialized methods on this
-        trait.
-
-        Returns a [`ScalarValue`] that is the value of the window
-        function within `range` for the entire partition. Argument
-        `values` contains the evaluation result of function arguments
-        and evaluation results of ORDER BY expressions. If function has a
-        single argument, `values[1..]` will contain ORDER BY expression results.
-        """
-
-    def evaluate_all_with_rank(
-        self, num_rows: int, ranks_in_partition: list[tuple[int, int]]
-    ) -> pa.Array:
-        """Called for window functions that only need the rank of a row.
-
-        Evaluate the partition evaluator against the partition using
-        the row ranks. For example, ``rank(col("a"))`` produces
-
-        .. code-block:: text
-
-            a | rank
-            - + ----
-            A | 1
-            A | 1
-            C | 3
-            D | 4
-            D | 4
-
-        For this case, `num_rows` would be `5` and the
-        `ranks_in_partition` would be called with
-
-        .. code-block:: text
-
-            [
-                (0,1),
-                (2,2),
-                (3,4),
-            ]
-
-        The user must implement this method if ``include_rank`` returns True.
-        """
-
-    def supports_bounded_execution(self) -> bool:
-        """Can the window function be incrementally computed using bounded memory?"""
-        return False
-
-    def uses_window_frame(self) -> bool:
-        """Does the window function use the values from the window frame?"""
-        return False
-
-    def include_rank(self) -> bool:
-        """Can this function be evaluated with (only) rank?"""
-        return False
-
-
-class WindowUDF:
-    """Class for performing window user-defined functions (UDF).
-
-    Window UDFs operate on a partition of rows. See
-    also :py:class:`ScalarUDF` for operating on a row by row basis.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        func: Callable[[], WindowEvaluator],
-        input_types: list[pa.DataType],
-        return_type: pa.DataType,
-        volatility: Volatility | str,
-    ) -> None:
-        """Instantiate a user-defined window function (UDWF).
-
-        See :py:func:`udwf` for a convenience function and argument
-        descriptions.
-        """
-        self._udwf = df_internal.WindowUDF(
-            name, func, input_types, return_type, str(volatility)
-        )
-
-    def __call__(self, *args: Expr) -> Expr:
-        """Execute the UDWF.
-
-        This function is not typically called by an end user. These calls will
-        occur during the evaluation of the dataframe.
-        """
-        args_raw = [arg.expr for arg in args]
-        return Expr(self._udwf.__call__(*args_raw))
-
-    @overload
-    @staticmethod
-    def udwf(
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> Callable[..., WindowUDF]: ...
-
-    @overload
-    @staticmethod
-    def udwf(
-        func: Callable[[], WindowEvaluator],
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> WindowUDF: ...
-
-    @staticmethod
-    def udwf(*args: Any, **kwargs: Any):  # noqa: D417
-        """Create a new User-Defined Window Function (UDWF).
-
-        This class can be used both as a **function** and as a **decorator**.
-
-        Usage:
-            - **As a function**: Call `udwf(func, input_types, return_type, volatility,
-              name)`.
-            - **As a decorator**: Use `@udwf(input_types, return_type, volatility,
-              name)`. When using `udwf` as a decorator, **do not pass `func`
-              explicitly**.
-
-        **Function example:**
-            ```
-            import pyarrow as pa
-
-            class BiasedNumbers(WindowEvaluator):
-                def __init__(self, start: int = 0) -> None:
-                    self.start = start
-
-                def evaluate_all(self, values: list[pa.Array],
-                    num_rows: int) -> pa.Array:
-                    return pa.array([self.start + i for i in range(num_rows)])
-
-            def bias_10() -> BiasedNumbers:
-                return BiasedNumbers(10)
-
-            udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
-            udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
-            udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
-
-            ```
-
-        **Decorator example:**
-            ```
-            @udwf(pa.int64(), pa.int64(), "immutable")
-            def biased_numbers() -> BiasedNumbers:
-                return BiasedNumbers(10)
-            ```
-
-        Args:
-            func: **Only needed when calling as a function. Skip this argument when
-                using `udwf` as a decorator.**
-            input_types: The data types of the arguments.
-            return_type: The data type of the return value.
-            volatility: See :py:class:`Volatility` for allowed values.
-            name: A descriptive name for the function.
-
-        Returns:
-            A user-defined window function that can be used in window function calls.
-        """
-        if args and callable(args[0]):
-            # Case 1: Used as a function, require the first parameter to be callable
-            return WindowUDF._create_window_udf(*args, **kwargs)
-        # Case 2: Used as a decorator with parameters
-        return WindowUDF._create_window_udf_decorator(*args, **kwargs)
-
-    @staticmethod
-    def _create_window_udf(
-        func: Callable[[], WindowEvaluator],
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> WindowUDF:
-        """Create a WindowUDF instance from function arguments."""
-        if not callable(func):
-            msg = "`func` must be callable."
-            raise TypeError(msg)
-        if not isinstance(func(), WindowEvaluator):
-            msg = "`func` must implement the abstract base class WindowEvaluator"
-            raise TypeError(msg)
-
-        name = name or func.__qualname__.lower()
-        input_types = (
-            [input_types] if isinstance(input_types, pa.DataType) else input_types
-        )
-
-        return WindowUDF(name, func, input_types, return_type, volatility)
-
-    @staticmethod
-    def _get_default_name(func: Callable) -> str:
-        """Get the default name for a function based on its attributes."""
-        if hasattr(func, "__qualname__"):
-            return func.__qualname__.lower()
-        return func.__class__.__name__.lower()
-
-    @staticmethod
-    def _normalize_input_types(
-        input_types: pa.DataType | list[pa.DataType],
-    ) -> list[pa.DataType]:
-        """Convert a single DataType to a list if needed."""
-        if isinstance(input_types, pa.DataType):
-            return [input_types]
-        return input_types
-
-    @staticmethod
-    def _create_window_udf_decorator(
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: pa.DataType,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]:
-        """Create a decorator for a WindowUDF."""
-
-        def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]:
-            udwf_caller = WindowUDF._create_window_udf(
-                func, input_types, return_type, volatility, name
-            )
-
-            @functools.wraps(func)
-            def wrapper(*args: Any, **kwargs: Any) -> Expr:
-                return udwf_caller(*args, **kwargs)
-
-            return wrapper
-
-        return decorator
-
-
-# Convenience exports so we can import instead of treating as
-# variables at the package root
-udf = ScalarUDF.udf
-udaf = AggregateUDF.udaf
-udwf = WindowUDF.udwf
+warnings.warn(
+    "The module 'udf' is deprecated and will be removed in the next release. "
+    "Please use 'user_defined' instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py
new file mode 100644
index 000000000..dd634c7fb
--- /dev/null
+++ b/python/datafusion/user_defined.py
@@ -0,0 +1,845 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Provides the user-defined functions for evaluation of dataframes."""
+
+from __future__ import annotations
+
+import functools
+from abc import ABCMeta, abstractmethod
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload
+
+import pyarrow as pa
+
+import datafusion._internal as df_internal
+from datafusion.expr import Expr
+
+if TYPE_CHECKING:
+    _R = TypeVar("_R", bound=pa.DataType)
+
+
+class Volatility(Enum):
+    """Defines how stable or volatile a function is.
+
+    When setting the volatility of a function, you can either pass this
+    enumeration or a ``str``. The ``str`` equivalent is the lower case value of the
+    name (`"immutable"`, `"stable"`, or `"volatile"`).
+    """
+
+    Immutable = 1
+    """An immutable function will always return the same output when given the
+    same input.
+
+    DataFusion will attempt to inline immutable functions during planning.
+    """
+
+    Stable = 2
+    """
+    Returns the same value for a given input within a single queries.
+
+    A stable function may return different values given the same input across
+    different queries but must return the same value for a given input within a
+    query. An example of this is the ``Now`` function. DataFusion will attempt to
+    inline ``Stable`` functions during planning, when possible. For query
+    ``select col1, now() from t1``, it might take a while to execute but ``now()``
+    column will be the same for each output row, which is evaluated during
+    planning.
+    """
+
+    Volatile = 3
+    """A volatile function may change the return value from evaluation to
+    evaluation.
+
+    Multiple invocations of a volatile function may return different results
+    when used in the same query. An example of this is the random() function.
+    DataFusion can not evaluate such functions during planning. In the query
+    ``select col1, random() from t1``, ``random()`` function will be evaluated
+    for each output row, resulting in a unique random value for each row.
+    """
+
+    def __str__(self) -> str:
+        """Returns the string equivalent."""
+        return self.name.lower()
+
+
+class ScalarUDF:
+    """Class for performing scalar user-defined functions (UDF).
+
+    Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for
+    operating on a group of rows.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        func: Callable[..., _R],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+    ) -> None:
+        """Instantiate a scalar user-defined function (UDF).
+
+        See helper method :py:func:`udf` for argument details.
+        """
+        if isinstance(input_types, pa.DataType):
+            input_types = [input_types]
+        self._udf = df_internal.ScalarUDF(
+            name, func, input_types, return_type, str(volatility)
+        )
+
+    def __repr__(self) -> str:
+        """Print a string representation of the Scalar UDF."""
+        return self._udf.__repr__()
+
+    def __call__(self, *args: Expr) -> Expr:
+        """Execute the UDF.
+
+        This function is not typically called by an end user. These calls will
+        occur during the evaluation of the dataframe.
+        """
+        args_raw = [arg.expr for arg in args]
+        return Expr(self._udf.__call__(*args_raw))
+
+    @overload
+    @staticmethod
+    def udf(
+        input_types: list[pa.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., ScalarUDF]: ...
+
+    @overload
+    @staticmethod
+    def udf(
+        func: Callable[..., _R],
+        input_types: list[pa.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> ScalarUDF: ...
+
+    @staticmethod
+    def udf(*args: Any, **kwargs: Any):  # noqa: D417
+        """Create a new User-Defined Function (UDF).
+
+        This class can be used both as either a function or a decorator.
+
+        Usage:
+            - As a function: ``udf(func, input_types, return_type, volatility, name)``.
+            - As a decorator: ``@udf(input_types, return_type, volatility, name)``.
+              When used a decorator, do **not** pass ``func`` explicitly.
+
+        Args:
+            func (Callable, optional): Only needed when calling as a function.
+                Skip this argument when using ``udf`` as a decorator.
+            input_types (list[pa.DataType]): The data types of the arguments
+                to ``func``. This list must be of the same length as the number of
+                arguments.
+            return_type (_R): The data type of the return value from the function.
+            volatility (Volatility | str): See `Volatility` for allowed values.
+            name (Optional[str]): A descriptive name for the function.
+
+        Returns:
+            A user-defined function that can be used in SQL expressions,
+            data aggregation, or window function calls.
+
+        Example: Using ``udf`` as a function::
+
+            def double_func(x):
+                return x * 2
+            double_udf = udf(double_func, [pa.int32()], pa.int32(),
+            "volatile", "double_it")
+
+        Example: Using ``udf`` as a decorator::
+
+            @udf([pa.int32()], pa.int32(), "volatile", "double_it")
+            def double_udf(x):
+                return x * 2
+        """
+
+        def _function(
+            func: Callable[..., _R],
+            input_types: list[pa.DataType],
+            return_type: _R,
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> ScalarUDF:
+            if not callable(func):
+                msg = "`func` argument must be callable"
+                raise TypeError(msg)
+            if name is None:
+                if hasattr(func, "__qualname__"):
+                    name = func.__qualname__.lower()
+                else:
+                    name = func.__class__.__name__.lower()
+            return ScalarUDF(
+                name=name,
+                func=func,
+                input_types=input_types,
+                return_type=return_type,
+                volatility=volatility,
+            )
+
+        def _decorator(
+            input_types: list[pa.DataType],
+            return_type: _R,
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> Callable:
+            def decorator(func: Callable):
+                udf_caller = ScalarUDF.udf(
+                    func, input_types, return_type, volatility, name
+                )
+
+                @functools.wraps(func)
+                def wrapper(*args: Any, **kwargs: Any):
+                    return udf_caller(*args, **kwargs)
+
+                return wrapper
+
+            return decorator
+
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return _function(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return _decorator(*args, **kwargs)
+
+
+class Accumulator(metaclass=ABCMeta):
+    """Defines how an :py:class:`AggregateUDF` accumulates values."""
+
+    @abstractmethod
+    def state(self) -> list[pa.Scalar]:
+        """Return the current state."""
+
+    @abstractmethod
+    def update(self, *values: pa.Array) -> None:
+        """Evaluate an array of values and update state."""
+
+    @abstractmethod
+    def merge(self, states: list[pa.Array]) -> None:
+        """Merge a set of states."""
+
+    @abstractmethod
+    def evaluate(self) -> pa.Scalar:
+        """Return the resultant value."""
+
+
+class AggregateUDF:
+    """Class for performing scalar user-defined functions (UDF).
+
+    Aggregate UDFs operate on a group of rows and return a single value. See
+    also :py:class:`ScalarUDF` for operating on a row by row basis.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        accumulator: Callable[[], Accumulator],
+        input_types: list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
+        volatility: Volatility | str,
+    ) -> None:
+        """Instantiate a user-defined aggregate function (UDAF).
+
+        See :py:func:`udaf` for a convenience function and argument
+        descriptions.
+        """
+        self._udaf = df_internal.AggregateUDF(
+            name,
+            accumulator,
+            input_types,
+            return_type,
+            state_type,
+            str(volatility),
+        )
+
+    def __repr__(self) -> str:
+        """Print a string representation of the Aggregate UDF."""
+        return self._udaf.__repr__()
+
+    def __call__(self, *args: Expr) -> Expr:
+        """Execute the UDAF.
+
+        This function is not typically called by an end user. These calls will
+        occur during the evaluation of the dataframe.
+        """
+        args_raw = [arg.expr for arg in args]
+        return Expr(self._udaf.__call__(*args_raw))
+
+    @overload
+    @staticmethod
+    def udaf(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., AggregateUDF]: ...
+
+    @overload
+    @staticmethod
+    def udaf(
+        accum: Callable[[], Accumulator],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> AggregateUDF: ...
+
+    @staticmethod
+    def udaf(*args: Any, **kwargs: Any):  # noqa: D417
+        """Create a new User-Defined Aggregate Function (UDAF).
+
+        This class allows you to define an aggregate function that can be used in
+        data aggregation or window function calls.
+
+        Usage:
+            - As a function: ``udaf(accum, input_types, return_type, state_type, volatility, name)``.
+            - As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``.
+              When using ``udaf`` as a decorator, do not pass ``accum`` explicitly.
+
+        Function example:
+
+        If your :py:class:`Accumulator` can be instantiated with no arguments, you
+        can simply pass it's type as `accum`. If you need to pass additional
+        arguments to it's constructor, you can define a lambda or a factory method.
+        During runtime the :py:class:`Accumulator` will be constructed for every
+        instance in which this UDAF is used. The following examples are all valid::
+
+            import pyarrow as pa
+            import pyarrow.compute as pc
+
+            class Summarize(Accumulator):
+                def __init__(self, bias: float = 0.0):
+                    self._sum = pa.scalar(bias)
+
+                def state(self) -> list[pa.Scalar]:
+                    return [self._sum]
+
+                def update(self, values: pa.Array) -> None:
+                    self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
+
+                def merge(self, states: list[pa.Array]) -> None:
+                    self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
+
+                def evaluate(self) -> pa.Scalar:
+                    return self._sum
+
+            def sum_bias_10() -> Summarize:
+                return Summarize(10.0)
+
+            udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
+                "immutable")
+            udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
+                "immutable")
+            udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
+                [pa.float64()], "immutable")
+
+        Decorator example:::
+
+            @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+            def udf4() -> Summarize:
+                return Summarize(10.0)
+
+        Args:
+            accum: The accumulator python function. Only needed when calling as a
+                function. Skip this argument when using ``udaf`` as a decorator.
+            input_types: The data types of the arguments to ``accum``.
+            return_type: The data type of the return value.
+            state_type: The data types of the intermediate accumulation.
+            volatility: See :py:class:`Volatility` for allowed values.
+            name: A descriptive name for the function.
+
+        Returns:
+            A user-defined aggregate function, which can be used in either data
+            aggregation or window function calls.
+        """  # noqa: E501 W505
+
+        def _function(
+            accum: Callable[[], Accumulator],
+            input_types: pa.DataType | list[pa.DataType],
+            return_type: pa.DataType,
+            state_type: list[pa.DataType],
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> AggregateUDF:
+            if not callable(accum):
+                msg = "`func` must be callable."
+                raise TypeError(msg)
+            if not isinstance(accum(), Accumulator):
+                msg = "Accumulator must implement the abstract base class Accumulator"
+                raise TypeError(msg)
+            if name is None:
+                name = accum().__class__.__qualname__.lower()
+            if isinstance(input_types, pa.DataType):
+                input_types = [input_types]
+            return AggregateUDF(
+                name=name,
+                accumulator=accum,
+                input_types=input_types,
+                return_type=return_type,
+                state_type=state_type,
+                volatility=volatility,
+            )
+
+        def _decorator(
+            input_types: pa.DataType | list[pa.DataType],
+            return_type: pa.DataType,
+            state_type: list[pa.DataType],
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> Callable[..., Callable[..., Expr]]:
+            def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]:
+                udaf_caller = AggregateUDF.udaf(
+                    accum, input_types, return_type, state_type, volatility, name
+                )
+
+                @functools.wraps(accum)
+                def wrapper(*args: Any, **kwargs: Any) -> Expr:
+                    return udaf_caller(*args, **kwargs)
+
+                return wrapper
+
+            return decorator
+
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return _function(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return _decorator(*args, **kwargs)
+
+
+class WindowEvaluator:
+    """Evaluator class for user-defined window functions (UDWF).
+
+    It is up to the user to decide which evaluate function is appropriate.
+
+    +------------------------+--------------------------------+------------------+---------------------------+
+    | ``uses_window_frame``  | ``supports_bounded_execution`` | ``include_rank`` | function_to_implement     |
+    +========================+================================+==================+===========================+
+    | False (default)        | False (default)                | False (default)  | ``evaluate_all``          |
+    +------------------------+--------------------------------+------------------+---------------------------+
+    | False                  | True                           | False            | ``evaluate``              |
+    +------------------------+--------------------------------+------------------+---------------------------+
+    | False                  | True/False                     | True             | ``evaluate_all_with_rank``|
+    +------------------------+--------------------------------+------------------+---------------------------+
+    | True                   | True/False                     | True/False       | ``evaluate``              |
+    +------------------------+--------------------------------+------------------+---------------------------+
+    """  # noqa: W505, E501
+
+    def memoize(self) -> None:
+        """Perform a memoize operation to improve performance.
+
+        When the window frame has a fixed beginning (e.g UNBOUNDED
+        PRECEDING), some functions such as FIRST_VALUE and
+        NTH_VALUE do not need the (unbounded) input once they have
+        seen a certain amount of input.
+
+        `memoize` is called after each input batch is processed, and
+        such functions can save whatever they need
+        """
+
+    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:  # noqa: ARG002
+        """Return the range for the window fuction.
+
+        If `uses_window_frame` flag is `false`. This method is used to
+        calculate required range for the window function during
+        stateful execution.
+
+        Generally there is no required range, hence by default this
+        returns smallest range(current row). e.g seeing current row is
+        enough to calculate window result (such as row_number, rank,
+        etc)
+
+        Args:
+            idx:: Current index
+            num_rows: Number of rows.
+        """
+        return (idx, idx + 1)
+
+    def is_causal(self) -> bool:
+        """Get whether evaluator needs future data for its result."""
+        return False
+
+    def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
+        """Evaluate a window function on an entire input partition.
+
+        This function is called once per input *partition* for window functions that
+        *do not use* values from the window frame, such as
+        :py:func:`~datafusion.functions.row_number`,
+        :py:func:`~datafusion.functions.rank`,
+        :py:func:`~datafusion.functions.dense_rank`,
+        :py:func:`~datafusion.functions.percent_rank`,
+        :py:func:`~datafusion.functions.cume_dist`,
+        :py:func:`~datafusion.functions.lead`,
+        and :py:func:`~datafusion.functions.lag`.
+
+        It produces the result of all rows in a single pass. It
+        expects to receive the entire partition as the ``value`` and
+        must produce an output column with one output row for every
+        input row.
+
+        ``num_rows`` is required to correctly compute the output in case
+        ``len(values) == 0``
+
+        Implementing this function is an optimization. Certain window
+        functions are not affected by the window frame definition or
+        the query doesn't have a frame, and ``evaluate`` skips the
+        (costly) window frame boundary calculation and the overhead of
+        calling ``evaluate`` for each output row.
+
+        For example, the `LAG` built in window function does not use
+        the values of its window frame (it can be computed in one shot
+        on the entire partition with ``Self::evaluate_all`` regardless of the
+        window defined in the ``OVER`` clause)
+
+        .. code-block:: text
+
+            lag(x, 1) OVER (ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
+
+        However, ``avg()`` computes the average in the window and thus
+        does use its window frame.
+
+        .. code-block:: text
+
+            avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
+        """  # noqa: W505, E501
+
+    def evaluate(
+        self, values: list[pa.Array], eval_range: tuple[int, int]
+    ) -> pa.Scalar:
+        """Evaluate window function on a range of rows in an input partition.
+
+        This is the simplest and most general function to implement
+        but also the least performant as it creates output one row at
+        a time. It is typically much faster to implement stateful
+        evaluation using one of the other specialized methods on this
+        trait.
+
+        Returns a [`ScalarValue`] that is the value of the window
+        function within `range` for the entire partition. Argument
+        `values` contains the evaluation result of function arguments
+        and evaluation results of ORDER BY expressions. If function has a
+        single argument, `values[1..]` will contain ORDER BY expression results.
+        """
+
+    def evaluate_all_with_rank(
+        self, num_rows: int, ranks_in_partition: list[tuple[int, int]]
+    ) -> pa.Array:
+        """Called for window functions that only need the rank of a row.
+
+        Evaluate the partition evaluator against the partition using
+        the row ranks. For example, ``rank(col("a"))`` produces
+
+        .. code-block:: text
+
+            a | rank
+            - + ----
+            A | 1
+            A | 1
+            C | 3
+            D | 4
+            D | 4
+
+        For this case, `num_rows` would be `5` and the
+        `ranks_in_partition` would be called with
+
+        .. code-block:: text
+
+            [
+                (0,1),
+                (2,2),
+                (3,4),
+            ]
+
+        The user must implement this method if ``include_rank`` returns True.
+        """
+
+    def supports_bounded_execution(self) -> bool:
+        """Can the window function be incrementally computed using bounded memory?"""
+        return False
+
+    def uses_window_frame(self) -> bool:
+        """Does the window function use the values from the window frame?"""
+        return False
+
+    def include_rank(self) -> bool:
+        """Can this function be evaluated with (only) rank?"""
+        return False
+
+
+class WindowUDF:
+    """Class for performing window user-defined functions (UDF).
+
+    Window UDFs operate on a partition of rows. See
+    also :py:class:`ScalarUDF` for operating on a row by row basis.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        func: Callable[[], WindowEvaluator],
+        input_types: list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+    ) -> None:
+        """Instantiate a user-defined window function (UDWF).
+
+        See :py:func:`udwf` for a convenience function and argument
+        descriptions.
+        """
+        self._udwf = df_internal.WindowUDF(
+            name, func, input_types, return_type, str(volatility)
+        )
+
+    def __repr__(self) -> str:
+        """Print a string representation of the Window UDF."""
+        return self._udwf.__repr__()
+
+    def __call__(self, *args: Expr) -> Expr:
+        """Execute the UDWF.
+
+        This function is not typically called by an end user. These calls will
+        occur during the evaluation of the dataframe.
+        """
+        args_raw = [arg.expr for arg in args]
+        return Expr(self._udwf.__call__(*args_raw))
+
+    @overload
+    @staticmethod
+    def udwf(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., WindowUDF]: ...
+
+    @overload
+    @staticmethod
+    def udwf(
+        func: Callable[[], WindowEvaluator],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> WindowUDF: ...
+
+    @staticmethod
+    def udwf(*args: Any, **kwargs: Any):  # noqa: D417
+        """Create a new User-Defined Window Function (UDWF).
+
+        This class can be used both as either a function or a decorator.
+
+        Usage:
+            - As a function: ``udwf(func, input_types, return_type, volatility, name)``.
+            - As a decorator: ``@udwf(input_types, return_type, volatility, name)``.
+              When using ``udwf`` as a decorator, do not pass ``func`` explicitly.
+
+        Function example::
+
+            import pyarrow as pa
+
+            class BiasedNumbers(WindowEvaluator):
+                def __init__(self, start: int = 0) -> None:
+                    self.start = start
+
+                def evaluate_all(self, values: list[pa.Array],
+                    num_rows: int) -> pa.Array:
+                    return pa.array([self.start + i for i in range(num_rows)])
+
+            def bias_10() -> BiasedNumbers:
+                return BiasedNumbers(10)
+
+            udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
+            udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
+            udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
+
+
+        Decorator example::
+
+            @udwf(pa.int64(), pa.int64(), "immutable")
+            def biased_numbers() -> BiasedNumbers:
+                return BiasedNumbers(10)
+
+        Args:
+            func: Only needed when calling as a function. Skip this argument when
+                using ``udwf`` as a decorator.
+            input_types: The data types of the arguments.
+            return_type: The data type of the return value.
+            volatility: See :py:class:`Volatility` for allowed values.
+            name: A descriptive name for the function.
+
+        Returns:
+            A user-defined window function that can be used in window function calls.
+        """
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return WindowUDF._create_window_udf(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return WindowUDF._create_window_udf_decorator(*args, **kwargs)
+
+    @staticmethod
+    def _create_window_udf(
+        func: Callable[[], WindowEvaluator],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> WindowUDF:
+        """Create a WindowUDF instance from function arguments."""
+        if not callable(func):
+            msg = "`func` must be callable."
+            raise TypeError(msg)
+        if not isinstance(func(), WindowEvaluator):
+            msg = "`func` must implement the abstract base class WindowEvaluator"
+            raise TypeError(msg)
+
+        name = name or func.__qualname__.lower()
+        input_types = (
+            [input_types] if isinstance(input_types, pa.DataType) else input_types
+        )
+
+        return WindowUDF(name, func, input_types, return_type, volatility)
+
+    @staticmethod
+    def _get_default_name(func: Callable) -> str:
+        """Get the default name for a function based on its attributes."""
+        if hasattr(func, "__qualname__"):
+            return func.__qualname__.lower()
+        return func.__class__.__name__.lower()
+
+    @staticmethod
+    def _normalize_input_types(
+        input_types: pa.DataType | list[pa.DataType],
+    ) -> list[pa.DataType]:
+        """Convert a single DataType to a list if needed."""
+        if isinstance(input_types, pa.DataType):
+            return [input_types]
+        return input_types
+
+    @staticmethod
+    def _create_window_udf_decorator(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]:
+        """Create a decorator for a WindowUDF."""
+
+        def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]:
+            udwf_caller = WindowUDF._create_window_udf(
+                func, input_types, return_type, volatility, name
+            )
+
+            @functools.wraps(func)
+            def wrapper(*args: Any, **kwargs: Any) -> Expr:
+                return udwf_caller(*args, **kwargs)
+
+            return wrapper
+
+        return decorator
+
+
+class TableFunction:
+    """Class for performing user-defined table functions (UDTF).
+
+    Table functions generate new table providers based on the
+    input expressions.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        func: Callable[[], any],
+    ) -> None:
+        """Instantiate a user-defined table function (UDTF).
+
+        See :py:func:`udtf` for a convenience function and argument
+        descriptions.
+        """
+        self._udtf = df_internal.TableFunction(name, func)
+
+    def __call__(self, *args: Expr) -> Any:
+        """Execute the UDTF and return a table provider."""
+        args_raw = [arg.expr for arg in args]
+        return self._udtf.__call__(*args_raw)
+
+    @overload
+    @staticmethod
+    def udtf(
+        name: str,
+    ) -> Callable[..., Any]: ...
+
+    @overload
+    @staticmethod
+    def udtf(
+        func: Callable[[], Any],
+        name: str,
+    ) -> TableFunction: ...
+
+    @staticmethod
+    def udtf(*args: Any, **kwargs: Any):
+        """Create a new User-Defined Table Function (UDTF)."""
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return TableFunction._create_table_udf(*args, **kwargs)
+        if args and hasattr(args[0], "__datafusion_table_function__"):
+            # Case 2: We have a datafusion FFI provided function
+            return TableFunction(args[1], args[0])
+        # Case 3: Used as a decorator with parameters
+        return TableFunction._create_table_udf_decorator(*args, **kwargs)
+
+    @staticmethod
+    def _create_table_udf(
+        func: Callable[..., Any],
+        name: str,
+    ) -> TableFunction:
+        """Create a TableFunction instance from function arguments."""
+        if not callable(func):
+            msg = "`func` must be callable."
+            raise TypeError(msg)
+
+        return TableFunction(name, func)
+
+    @staticmethod
+    def _create_table_udf_decorator(
+        name: Optional[str] = None,
+    ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]:
+        """Create a decorator for a WindowUDF."""
+
+        def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]:
+            return TableFunction._create_table_udf(func, name)
+
+        return decorator
+
+    def __repr__(self) -> str:
+        """User printable representation."""
+        return self._udtf.__repr__()
+
+
+# Convenience exports so we can import instead of treating as
+# variables at the package root
+udf = ScalarUDF.udf
+udaf = AggregateUDF.udaf
+udwf = WindowUDF.udwf
+udtf = TableFunction.udtf
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index daa4331df..404ce9545 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -14,8 +14,12 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import ctypes
+import datetime
 import os
 import re
+import threading
+import time
 from typing import Any
 
 import pyarrow as pa
@@ -136,6 +140,38 @@ def clean_formatter_state():
     reset_formatter()
 
 
+@pytest.fixture
+def null_df():
+    """Create a DataFrame with null values of different types."""
+    ctx = SessionContext()
+
+    # Create a RecordBatch with nulls across different types
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, None, 3, None], type=pa.int64()),
+            pa.array([4.5, 6.7, None, None], type=pa.float64()),
+            pa.array(["a", None, "c", None], type=pa.string()),
+            pa.array([True, None, False, None], type=pa.bool_()),
+            pa.array(
+                [10957, None, 18993, None], type=pa.date32()
+            ),  # 2000-01-01, null, 2022-01-01, null
+            pa.array(
+                [946684800000, None, 1640995200000, None], type=pa.date64()
+            ),  # 2000-01-01, null, 2022-01-01, null
+        ],
+        names=[
+            "int_col",
+            "float_col",
+            "str_col",
+            "bool_col",
+            "date32_col",
+            "date64_col",
+        ],
+    )
+
+    return ctx.create_dataframe([[batch]])
+
+
 # custom style for testing with html formatter
 class CustomStyleProvider:
     def get_cell_style(self) -> str:
@@ -2168,3 +2204,354 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
 
     assert "<style>" in local_html_1
     assert "<style>" in local_html_2
+
+
+def test_fill_null_basic(null_df):
+    """Test basic fill_null functionality with a single value."""
+    # Fill all nulls with 0
+    filled_df = null_df.fill_null(0)
+
+    result = filled_df.collect()[0]
+
+    # Check that nulls were filled with 0 (or equivalent)
+    assert result.column(0) == pa.array([1, 0, 3, 0])
+    assert result.column(1) == pa.array([4.5, 6.7, 0.0, 0.0])
+    # String column should be filled with "0"
+    assert result.column(2) == pa.array(["a", "0", "c", "0"])
+    # Boolean column should be filled with False (0 converted to bool)
+    assert result.column(3) == pa.array([True, False, False, False])
+
+
+def test_fill_null_subset(null_df):
+    """Test filling nulls only in a subset of columns."""
+    # Fill nulls only in numeric columns
+    filled_df = null_df.fill_null(0, subset=["int_col", "float_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check that nulls were filled only in specified columns
+    assert result.column(0) == pa.array([1, 0, 3, 0])
+    assert result.column(1) == pa.array([4.5, 6.7, 0.0, 0.0])
+    # These should still have nulls
+    assert None in result.column(2).to_pylist()
+    assert None in result.column(3).to_pylist()
+
+
+def test_fill_null_str_column(null_df):
+    """Test filling nulls in string columns with different values."""
+    # Fill string nulls with a replacement string
+    filled_df = null_df.fill_null("N/A", subset=["str_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check that string nulls were filled with "N/A"
+    assert result.column(2).to_pylist() == ["a", "N/A", "c", "N/A"]
+
+    # Other columns should be unchanged
+    assert None in result.column(0).to_pylist()
+    assert None in result.column(1).to_pylist()
+    assert None in result.column(3).to_pylist()
+
+    # Fill with an empty string
+    filled_df = null_df.fill_null("", subset=["str_col"])
+    result = filled_df.collect()[0]
+    assert result.column(2).to_pylist() == ["a", "", "c", ""]
+
+
+def test_fill_null_bool_column(null_df):
+    """Test filling nulls in boolean columns with different values."""
+    # Fill bool nulls with True
+    filled_df = null_df.fill_null(value=True, subset=["bool_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check that bool nulls were filled with True
+    assert result.column(3).to_pylist() == [True, True, False, True]
+
+    # Other columns should be unchanged
+    assert None in result.column(0).to_pylist()
+
+    # Fill bool nulls with False
+    filled_df = null_df.fill_null(value=False, subset=["bool_col"])
+    result = filled_df.collect()[0]
+    assert result.column(3).to_pylist() == [True, False, False, False]
+
+
+def test_fill_null_date32_column(null_df):
+    """Test filling nulls in date32 columns."""
+
+    # Fill date32 nulls with a specific date (1970-01-01)
+    epoch_date = datetime.date(1970, 1, 1)
+    filled_df = null_df.fill_null(epoch_date, subset=["date32_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check that date32 nulls were filled with epoch date
+    dates = result.column(4).to_pylist()
+    assert dates[0] == datetime.date(2000, 1, 1)  # Original value
+    assert dates[1] == epoch_date  # Filled value
+    assert dates[2] == datetime.date(2022, 1, 1)  # Original value
+    assert dates[3] == epoch_date  # Filled value
+
+    # Other date column should be unchanged
+    assert None in result.column(5).to_pylist()
+
+
+def test_fill_null_date64_column(null_df):
+    """Test filling nulls in date64 columns."""
+
+    # Fill date64 nulls with a specific date (1970-01-01)
+    epoch_date = datetime.date(1970, 1, 1)
+    filled_df = null_df.fill_null(epoch_date, subset=["date64_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check that date64 nulls were filled with epoch date
+    dates = result.column(5).to_pylist()
+    assert dates[0] == datetime.date(2000, 1, 1)  # Original value
+    assert dates[1] == epoch_date  # Filled value
+    assert dates[2] == datetime.date(2022, 1, 1)  # Original value
+    assert dates[3] == epoch_date  # Filled value
+
+    # Other date column should be unchanged
+    assert None in result.column(4).to_pylist()
+
+
+def test_fill_null_type_coercion(null_df):
+    """Test type coercion when filling nulls with values of different types."""
+    # Try to fill string nulls with a number
+    filled_df = null_df.fill_null(42, subset=["str_col"])
+
+    result = filled_df.collect()[0]
+
+    # String nulls should be filled with string representation of the number
+    assert result.column(2).to_pylist() == ["a", "42", "c", "42"]
+
+    # Try to fill bool nulls with a string that converts to True
+    filled_df = null_df.fill_null("true", subset=["bool_col"])
+    result = filled_df.collect()[0]
+
+    # This behavior depends on the implementation - check it works without error
+    # but don't make assertions about exact conversion behavior
+    assert None not in result.column(3).to_pylist()
+
+
+def test_fill_null_multiple_date_columns(null_df):
+    """Test filling nulls in both date column types simultaneously."""
+
+    # Fill both date column types with the same date
+    test_date = datetime.date(2023, 12, 31)
+    filled_df = null_df.fill_null(test_date, subset=["date32_col", "date64_col"])
+
+    result = filled_df.collect()[0]
+
+    # Check both date columns were filled correctly
+    date32_vals = result.column(4).to_pylist()
+    date64_vals = result.column(5).to_pylist()
+
+    assert None not in date32_vals
+    assert None not in date64_vals
+
+    assert date32_vals[1] == test_date
+    assert date32_vals[3] == test_date
+    assert date64_vals[1] == test_date
+    assert date64_vals[3] == test_date
+
+
+def test_fill_null_specific_types(null_df):
+    """Test filling nulls with type-appropriate values."""
+    # Fill with type-specific values
+    filled_df = null_df.fill_null("missing")
+
+    result = filled_df.collect()[0]
+
+    # Check that nulls were filled appropriately by type
+
+    assert result.column(0).to_pylist() == [1, None, 3, None]
+    assert result.column(1).to_pylist() == [4.5, 6.7, None, None]
+    assert result.column(2).to_pylist() == ["a", "missing", "c", "missing"]
+    assert result.column(3).to_pylist() == [True, None, False, None]  # Bool gets False
+    assert result.column(4).to_pylist() == [
+        datetime.date(2000, 1, 1),
+        None,
+        datetime.date(2022, 1, 1),
+        None,
+    ]
+    assert result.column(5).to_pylist() == [
+        datetime.date(2000, 1, 1),
+        None,
+        datetime.date(2022, 1, 1),
+        None,
+    ]
+
+
+def test_fill_null_immutability(null_df):
+    """Test that original DataFrame is unchanged after fill_null."""
+    # Get original values with nulls
+    original = null_df.collect()[0]
+    original_int_nulls = original.column(0).to_pylist().count(None)
+
+    # Apply fill_null
+    _filled_df = null_df.fill_null(0)
+
+    # Check that original is unchanged
+    new_original = null_df.collect()[0]
+    new_original_int_nulls = new_original.column(0).to_pylist().count(None)
+
+    assert original_int_nulls == new_original_int_nulls
+    assert original_int_nulls > 0  # Ensure we actually had nulls in the first place
+
+
+def test_fill_null_empty_df(ctx):
+    """Test fill_null on empty DataFrame."""
+    # Create an empty DataFrame with schema
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([], type=pa.int64()), pa.array([], type=pa.string())],
+        names=["a", "b"],
+    )
+    empty_df = ctx.create_dataframe([[batch]])
+
+    # Fill nulls (should work without errors)
+    filled_df = empty_df.fill_null(0)
+
+    # Should still be empty but with same schema
+    result = filled_df.collect()[0]
+    assert len(result.column(0)) == 0
+    assert len(result.column(1)) == 0
+    assert result.schema.field(0).name == "a"
+    assert result.schema.field(1).name == "b"
+
+
+def test_fill_null_all_null_column(ctx):
+    """Test fill_null on a column with all nulls."""
+    # Create DataFrame with a column of all nulls
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([None, None, None], type=pa.string())],
+        names=["a", "b"],
+    )
+    all_null_df = ctx.create_dataframe([[batch]])
+
+    # Fill nulls with a value
+    filled_df = all_null_df.fill_null("filled")
+
+    # Check that all nulls were filled
+    result = filled_df.collect()[0]
+    assert result.column(1).to_pylist() == ["filled", "filled", "filled"]
+
+
+def test_collect_interrupted():
+    """Test that a long-running query can be interrupted with Ctrl-C.
+
+    This test simulates a Ctrl-C keyboard interrupt by raising a KeyboardInterrupt
+    exception in the main thread during a long-running query execution.
+    """
+    # Create a context and a DataFrame with a query that will run for a while
+    ctx = SessionContext()
+
+    # Create a recursive computation that will run for some time
+    batches = []
+    for i in range(10):
+        batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(list(range(i * 1000, (i + 1) * 1000))),
+                pa.array([f"value_{j}" for j in range(i * 1000, (i + 1) * 1000)]),
+            ],
+            names=["a", "b"],
+        )
+        batches.append(batch)
+
+    # Register tables
+    ctx.register_record_batches("t1", [batches])
+    ctx.register_record_batches("t2", [batches])
+
+    # Create a large join operation that will take time to process
+    df = ctx.sql("""
+        WITH t1_expanded AS (
+            SELECT
+                a,
+                b,
+                CAST(a AS DOUBLE) / 1.5 AS c,
+                CAST(a AS DOUBLE) * CAST(a AS DOUBLE) AS d
+            FROM t1
+            CROSS JOIN (SELECT 1 AS dummy FROM t1 LIMIT 5)
+        ),
+        t2_expanded AS (
+            SELECT
+                a,
+                b,
+                CAST(a AS DOUBLE) * 2.5 AS e,
+                CAST(a AS DOUBLE) * CAST(a AS DOUBLE) * CAST(a AS DOUBLE) AS f
+            FROM t2
+            CROSS JOIN (SELECT 1 AS dummy FROM t2 LIMIT 5)
+        )
+        SELECT
+            t1.a, t1.b, t1.c, t1.d,
+            t2.a AS a2, t2.b AS b2, t2.e, t2.f
+        FROM t1_expanded t1
+        JOIN t2_expanded t2 ON t1.a % 100 = t2.a % 100
+        WHERE t1.a > 100 AND t2.a > 100
+    """)
+
+    # Flag to track if the query was interrupted
+    interrupted = False
+    interrupt_error = None
+    main_thread = threading.main_thread()
+
+    # Shared flag to indicate query execution has started
+    query_started = threading.Event()
+    max_wait_time = 5.0  # Maximum wait time in seconds
+
+    # This function will be run in a separate thread and will raise
+    # KeyboardInterrupt in the main thread
+    def trigger_interrupt():
+        """Poll for query start, then raise KeyboardInterrupt in the main thread"""
+        # Poll for query to start with small sleep intervals
+        start_time = time.time()
+        while not query_started.is_set():
+            time.sleep(0.1)  # Small sleep between checks
+            if time.time() - start_time > max_wait_time:
+                msg = f"Query did not start within {max_wait_time} seconds"
+                raise RuntimeError(msg)
+
+        # Check if thread ID is available
+        thread_id = main_thread.ident
+        if thread_id is None:
+            msg = "Cannot get main thread ID"
+            raise RuntimeError(msg)
+
+        # Use ctypes to raise exception in main thread
+        exception = ctypes.py_object(KeyboardInterrupt)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
+            ctypes.c_long(thread_id), exception
+        )
+        if res != 1:
+            # If res is 0, the thread ID was invalid
+            # If res > 1, we modified multiple threads
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(
+                ctypes.c_long(thread_id), ctypes.py_object(0)
+            )
+            msg = "Failed to raise KeyboardInterrupt in main thread"
+            raise RuntimeError(msg)
+
+    # Start a thread to trigger the interrupt
+    interrupt_thread = threading.Thread(target=trigger_interrupt)
+    # we mark as daemon so the test process can exit even if this thread doesn't finish
+    interrupt_thread.daemon = True
+    interrupt_thread.start()
+
+    # Execute the query and expect it to be interrupted
+    try:
+        # Signal that we're about to start the query
+        query_started.set()
+        df.collect()
+    except KeyboardInterrupt:
+        interrupted = True
+    except Exception as e:
+        interrupt_error = e
+
+    # Assert that the query was interrupted properly
+    if not interrupted:
+        pytest.fail(f"Query was not interrupted; got error: {interrupt_error}")
+
+    # Make sure the interrupt thread has finished
+    interrupt_thread.join(timeout=1.0)
diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py
index 58a202724..40a98dc4d 100644
--- a/python/tests/test_expr.py
+++ b/python/tests/test_expr.py
@@ -15,9 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from datetime import datetime, timezone
+
 import pyarrow as pa
 import pytest
-from datafusion import SessionContext, col
+from datafusion import (
+    SessionContext,
+    col,
+    functions,
+    lit,
+    lit_with_metadata,
+    literal_with_metadata,
+)
 from datafusion.expr import (
     Aggregate,
     AggregateFunction,
@@ -101,7 +110,7 @@ def test_limit(test_ctx):
 
     plan = plan.to_variant()
     assert isinstance(plan, Limit)
-    assert "Skip: Some(Literal(Int64(5)))" in str(plan)
+    assert "Skip: Some(Literal(Int64(5), None))" in str(plan)
 
 
 def test_aggregate_query(test_ctx):
@@ -361,3 +370,513 @@ def test_col_getattr():
 def test_alias_with_metadata(df):
     df = df.select(col("a").alias("b", {"key": "value"}))
     assert df.schema().field("b").metadata == {b"key": b"value"}
+
+
+# These unit tests are to ensure the expression functions do not regress
+# For the math functions we will use `functions.round` so we can more
+# easily test for equivalence and not worry about floating point precision
+@pytest.mark.parametrize(
+    ("function", "expected_result"),
+    [
+        # Math Functions
+        pytest.param(
+            functions.round(col("a").asin(), lit(4)),
+            pa.array([-0.8481, 0.5236, 0.0, None], type=pa.float64()),
+            id="asin",
+        ),
+        pytest.param(
+            functions.round(col("a").sin(), lit(4)),
+            pa.array([-0.6816, 0.4794, 0.0, None], type=pa.float64()),
+            id="sin",
+        ),
+        pytest.param(
+            # Since log10 of negative returns NaN and you can't test NaN for
+            # equivalence, also do an abs() here.
+            functions.round(col("a").abs().log10(), lit(4)),
+            pa.array([-0.1249, -0.301, -float("inf"), None], type=pa.float64()),
+            id="log10",
+        ),
+        pytest.param(
+            col("a").iszero(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="iszero",
+        ),
+        pytest.param(
+            functions.round(col("a").acos(), lit(4)),
+            pa.array([2.4189, 1.0472, 1.5708, None], type=pa.float64()),
+            id="acos",
+        ),
+        pytest.param(
+            col("e").isnan(),
+            pa.array([False, True, False, None], type=pa.bool_()),
+            id="isnan",
+        ),
+        pytest.param(
+            functions.round(col("a").degrees(), lit(4)),
+            pa.array([-42.9718, 28.6479, 0.0, None], type=pa.float64()),
+            id="degrees",
+        ),
+        pytest.param(
+            functions.round(col("a").asinh(), lit(4)),
+            pa.array([-0.6931, 0.4812, 0.0, None], type=pa.float64()),
+            id="asinh",
+        ),
+        pytest.param(
+            col("a").abs(),
+            pa.array([0.75, 0.5, 0.0, None], type=pa.float64()),
+            id="abs",
+        ),
+        pytest.param(
+            functions.round(col("a").exp(), lit(4)),
+            pa.array([0.4724, 1.6487, 1.0, None], type=pa.float64()),
+            id="exp",
+        ),
+        pytest.param(
+            functions.round(col("a").cosh(), lit(4)),
+            pa.array([1.2947, 1.1276, 1.0, None], type=pa.float64()),
+            id="cosh",
+        ),
+        pytest.param(
+            functions.round(col("a").radians(), lit(4)),
+            pa.array([-0.0131, 0.0087, 0.0, None], type=pa.float64()),
+            id="radians",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().sqrt(), lit(4)),
+            pa.array([0.866, 0.7071, 0.0, None], type=pa.float64()),
+            id="sqrt",
+        ),
+        pytest.param(
+            functions.round(col("a").tanh(), lit(4)),
+            pa.array([-0.6351, 0.4621, 0.0, None], type=pa.float64()),
+            id="tanh",
+        ),
+        pytest.param(
+            functions.round(col("a").atan(), lit(4)),
+            pa.array([-0.6435, 0.4636, 0.0, None], type=pa.float64()),
+            id="atan",
+        ),
+        pytest.param(
+            functions.round(col("a").atanh(), lit(4)),
+            pa.array([-0.973, 0.5493, 0.0, None], type=pa.float64()),
+            id="atanh",
+        ),
+        pytest.param(
+            # large numbers cause an integer overflow so divid to make smaller
+            (col("b") / lit(4)).factorial(),
+            pa.array([1, 3628800, 1, None], type=pa.int64()),
+            id="factorial",
+        ),
+        pytest.param(
+            # Valid values of acosh must be >= 1.0
+            functions.round((col("a").abs() + lit(1.0)).acosh(), lit(4)),
+            pa.array([1.1588, 0.9624, 0.0, None], type=pa.float64()),
+            id="acosh",
+        ),
+        pytest.param(
+            col("a").floor(),
+            pa.array([-1.0, 0.0, 0.0, None], type=pa.float64()),
+            id="floor",
+        ),
+        pytest.param(
+            col("a").ceil(),
+            pa.array([-0.0, 1.0, 0.0, None], type=pa.float64()),
+            id="ceil",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().ln(), lit(4)),
+            pa.array([-0.2877, -0.6931, float("-inf"), None], type=pa.float64()),
+            id="ln",
+        ),
+        pytest.param(
+            functions.round(col("a").tan(), lit(4)),
+            pa.array([-0.9316, 0.5463, 0.0, None], type=pa.float64()),
+            id="tan",
+        ),
+        pytest.param(
+            functions.round(col("a").cbrt(), lit(4)),
+            pa.array([-0.9086, 0.7937, 0.0, None], type=pa.float64()),
+            id="cbrt",
+        ),
+        pytest.param(
+            functions.round(col("a").cos(), lit(4)),
+            pa.array([0.7317, 0.8776, 1.0, None], type=pa.float64()),
+            id="cos",
+        ),
+        pytest.param(
+            functions.round(col("a").sinh(), lit(4)),
+            pa.array([-0.8223, 0.5211, 0.0, None], type=pa.float64()),
+            id="sinh",
+        ),
+        pytest.param(
+            col("a").signum(),
+            pa.array([-1.0, 1.0, 0.0, None], type=pa.float64()),
+            id="signum",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().log2(), lit(4)),
+            pa.array([-0.415, -1.0, float("-inf"), None], type=pa.float64()),
+            id="log2",
+        ),
+        pytest.param(
+            functions.round(col("a").cot(), lit(4)),
+            pa.array([-1.0734, 1.8305, float("inf"), None], type=pa.float64()),
+            id="cot",
+        ),
+        #
+        # String Functions
+        #
+        pytest.param(
+            col("c").reverse(),
+            pa.array(["olleH", " dlrow ", "!", None], type=pa.string()),
+            id="reverse",
+        ),
+        pytest.param(
+            col("c").bit_length(),
+            pa.array([40, 56, 8, None], type=pa.int32()),
+            id="bit_length",
+        ),
+        pytest.param(
+            col("b").to_hex(),
+            pa.array(["ffffffffffffffe2", "2a", "0", None], type=pa.string()),
+            id="to_hex",
+        ),
+        pytest.param(
+            col("c").length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="length",
+        ),
+        pytest.param(
+            col("c").lower(),
+            pa.array(["hello", " world ", "!", None], type=pa.string()),
+            id="lower",
+        ),
+        pytest.param(
+            col("c").ascii(),
+            pa.array([72, 32, 33, None], type=pa.int32()),
+            id="ascii",
+        ),
+        pytest.param(
+            col("c").sha512(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        "3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315"
+                    ),
+                    bytes.fromhex(
+                        "A6758FDA3C2F0B554084E18308EA99B94B54EEE8FDA72697CEA7844E524CC2F2F2EE4CC8BAC87D2E3E7222959FE3D0CA1A841761FDC0D1780F6FE9E39E369500"
+                    ),
+                    bytes.fromhex(
+                        "3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha512",
+        ),
+        pytest.param(
+            col("c").sha384(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        "3519FE5AD2C596EFE3E276A6F351B8FC0B03DB861782490D45F7598EBD0AB5FD5520ED102F38C4A5EC834E98668035FC"
+                    ),
+                    bytes.fromhex(
+                        "A6A38A9AE2CFD0D67F49989AD584632BF7D7A07DAD2277E92326A6A0B37F884A871D6173FB342CFE258E375258ACAAEC"
+                    ),
+                    bytes.fromhex(
+                        "1D0EC8C84EE9521E21F06774DE232367B64DE628474CB5B2E372B699A1F55AE335CC37193EF823E33324DFD9A70738A6"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha384",
+        ),
+        pytest.param(
+            col("c").sha256(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        "185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"
+                    ),
+                    bytes.fromhex(
+                        "DE2EF0D77D456EC1CDE2C52F75996F6636A64079297213D548D875A488B03A75"
+                    ),
+                    bytes.fromhex(
+                        "BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha256",
+        ),
+        pytest.param(
+            col("c").sha224(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        "4149DA18AA8BFC2B1E382C6C26556D01A92C261B6436DAD5E3BE3FCC"
+                    ),
+                    bytes.fromhex(
+                        "AD6DF6D9ECDDF50AF2A72D5E3144BA813EE954537572C0E8AB3066BE"
+                    ),
+                    bytes.fromhex(
+                        "6641A7E8278BCD49E476E7ACAE158F4105B2952D22AEB2E0B9A231A0"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha224",
+        ),
+        pytest.param(
+            col("c").btrim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="btrim",
+        ),
+        pytest.param(
+            col("c").trim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="trim",
+        ),
+        pytest.param(
+            col("c").md5(),
+            pa.array(
+                [
+                    "8b1a9953c4611296a827abf8c47804d7",
+                    "de802497c24568d9a85d4eb8c2b6e8fe",
+                    "9033e0e305f247c0c3c80d0c7848c8b3",
+                    None,
+                ],
+                type=pa.string(),
+            ),
+            id="md5",
+        ),
+        pytest.param(
+            col("c").octet_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="octet_length",
+        ),
+        pytest.param(
+            col("c").character_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="character_length",
+        ),
+        pytest.param(
+            col("c").char_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="char_length",
+        ),
+        pytest.param(
+            col("c").rtrim(),
+            pa.array(["Hello", " world", "!", None], type=pa.string_view()),
+            id="rtrim",
+        ),
+        pytest.param(
+            col("c").ltrim(),
+            pa.array(["Hello", "world ", "!", None], type=pa.string_view()),
+            id="ltrim",
+        ),
+        pytest.param(
+            col("c").upper(),
+            pa.array(["HELLO", " WORLD ", "!", None], type=pa.string()),
+            id="upper",
+        ),
+        pytest.param(
+            lit(65).chr(),
+            pa.array(["A", "A", "A", "A"], type=pa.string()),
+            id="chr",
+        ),
+        #
+        # Time Functions
+        #
+        pytest.param(
+            col("b").from_unixtime(),
+            pa.array(
+                [
+                    datetime(1969, 12, 31, 23, 59, 30, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 42, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
+                    None,
+                ],
+                type=pa.timestamp("s"),
+            ),
+            id="from_unixtime",
+        ),
+        pytest.param(
+            col("c").initcap(),
+            pa.array(["Hello", " World ", "!", None], type=pa.string_view()),
+            id="initcap",
+        ),
+        #
+        # Array Functions
+        #
+        pytest.param(
+            col("d").array_pop_back(),
+            pa.array([[-1, 1], [5, 10, 15], [], None], type=pa.list_(pa.int64())),
+            id="array_pop_back",
+        ),
+        pytest.param(
+            col("d").array_pop_front(),
+            pa.array([[1, 0], [10, 15, 20], [], None], type=pa.list_(pa.int64())),
+            id="array_pop_front",
+        ),
+        pytest.param(
+            col("d").array_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="array_length",
+        ),
+        pytest.param(
+            col("d").list_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="list_length",
+        ),
+        pytest.param(
+            col("d").array_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="array_ndims",
+        ),
+        pytest.param(
+            col("d").list_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="list_ndims",
+        ),
+        pytest.param(
+            col("d").array_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="array_dims",
+        ),
+        pytest.param(
+            col("d").array_empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="array_empty",
+        ),
+        pytest.param(
+            col("d").list_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], type=pa.list_(pa.int64())
+            ),
+            id="list_distinct",
+        ),
+        pytest.param(
+            col("d").array_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], type=pa.list_(pa.int64())
+            ),
+            id="array_distinct",
+        ),
+        pytest.param(
+            col("d").cardinality(),
+            pa.array([3, 4, None, None], type=pa.uint64()),
+            id="cardinality",
+        ),
+        pytest.param(
+            col("f").flatten(),
+            pa.array(
+                [[-1, 1, 0, 4, 4], [5, 10, 15, 20, 3], [], []],
+                type=pa.list_(pa.int64()),
+            ),
+            id="flatten",
+        ),
+        pytest.param(
+            col("d").list_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="list_dims",
+        ),
+        pytest.param(
+            col("d").empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="empty",
+        ),
+        #
+        # Other Tests
+        #
+        pytest.param(
+            col("d").arrow_typeof(),
+            pa.array(
+                [
+                    'List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                ],
+                type=pa.string(),
+            ),
+            id="arrow_typeof",
+        ),
+    ],
+)
+def test_expr_functions(ctx, function, expected_result):
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([-0.75, 0.5, 0.0, None], type=pa.float64()),
+            pa.array([-30, 42, 0, None], type=pa.int64()),
+            pa.array(["Hello", " world ", "!", None], type=pa.string_view()),
+            pa.array(
+                [[-1, 1, 0], [5, 10, 15, 20], [], None], type=pa.list_(pa.int64())
+            ),
+            pa.array([-0.75, float("nan"), 0.0, None], type=pa.float64()),
+            pa.array(
+                [[[-1, 1, 0], [4, 4]], [[5, 10, 15, 20], [3]], [[]], [None]],
+                type=pa.list_(pa.list_(pa.int64())),
+            ),
+        ],
+        names=["a", "b", "c", "d", "e", "f"],
+    )
+    df = ctx.create_dataframe([[batch]]).select(function)
+    result = df.collect()
+
+    assert len(result) == 1
+    assert result[0].column(0).equals(expected_result)
+
+
+def test_literal_metadata(ctx):
+    result = (
+        ctx.from_pydict({"a": [1]})
+        .select(
+            lit(1).alias("no_metadata"),
+            lit_with_metadata(2, {"key1": "value1"}).alias("lit_with_metadata_fn"),
+            literal_with_metadata(3, {"key2": "value2"}).alias(
+                "literal_with_metadata_fn"
+            ),
+        )
+        .collect()
+    )
+
+    expected_schema = pa.schema(
+        [
+            pa.field("no_metadata", pa.int64(), nullable=False),
+            pa.field(
+                "lit_with_metadata_fn",
+                pa.int64(),
+                nullable=False,
+                metadata={"key1": "value1"},
+            ),
+            pa.field(
+                "literal_with_metadata_fn",
+                pa.int64(),
+                nullable=False,
+                metadata={"key2": "value2"},
+            ),
+        ]
+    )
+
+    expected = pa.RecordBatch.from_pydict(
+        {
+            "no_metadata": pa.array([1]),
+            "lit_with_metadata_fn": pa.array([2]),
+            "literal_with_metadata_fn": pa.array([3]),
+        },
+        schema=expected_schema,
+    )
+
+    assert result[0] == expected
+
+    # Testing result[0].schema == expected_schema does not check each key/value pair
+    # so we want to explicitly test these
+    for expected_field in expected_schema:
+        actual_field = result[0].schema.field(expected_field.name)
+        assert expected_field.metadata == actual_field.metadata
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 90cf01f7e..5189b651a 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -1236,3 +1236,64 @@ def test_between_default(df):
 def test_alias_with_metadata(df):
     df = df.select(f.alias(f.col("a"), "b", {"key": "value"}))
     assert df.schema().field("b").metadata == {b"key": b"value"}
+
+
+def test_coalesce(df):
+    # Create a DataFrame with null values
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["Hello", None, "!"]),  # string column with null
+            pa.array([4, None, 6]),  # integer column with null
+            pa.array(["hello ", None, " !"]),  # string column with null
+            pa.array(
+                [
+                    datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
+                    None,
+                    datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
+                ]
+            ),  # datetime with null
+            pa.array([False, None, True]),  # boolean column with null
+        ],
+        names=["a", "b", "c", "d", "e"],
+    )
+    df_with_nulls = ctx.create_dataframe([[batch]])
+
+    # Test coalesce with different data types
+    result_df = df_with_nulls.select(
+        f.coalesce(column("a"), literal("default")).alias("a_coalesced"),
+        f.coalesce(column("b"), literal(0)).alias("b_coalesced"),
+        f.coalesce(column("c"), literal("default")).alias("c_coalesced"),
+        f.coalesce(column("d"), literal(datetime(2000, 1, 1, tzinfo=DEFAULT_TZ))).alias(
+            "d_coalesced"
+        ),
+        f.coalesce(column("e"), literal(value=False)).alias("e_coalesced"),
+    )
+
+    result = result_df.collect()[0]
+
+    # Verify results
+    assert result.column(0) == pa.array(
+        ["Hello", "default", "!"], type=pa.string_view()
+    )
+    assert result.column(1) == pa.array([4, 0, 6], type=pa.int64())
+    assert result.column(2) == pa.array(
+        ["hello ", "default", " !"], type=pa.string_view()
+    )
+    assert result.column(3).to_pylist() == [
+        datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
+        datetime(2000, 1, 1, tzinfo=DEFAULT_TZ),
+        datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
+    ]
+    assert result.column(4) == pa.array([False, False, True], type=pa.bool_())
+
+    # Test multiple arguments
+    result_df = df_with_nulls.select(
+        f.coalesce(column("a"), literal(None), literal("fallback")).alias(
+            "multi_coalesce"
+        )
+    )
+    result = result_df.collect()[0]
+    assert result.column(0) == pa.array(
+        ["Hello", "fallback", "!"], type=pa.string_view()
+    )
diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py
index 9ef7ed89a..fca94b35a 100644
--- a/python/tests/test_imports.py
+++ b/python/tests/test_imports.py
@@ -107,7 +107,7 @@ def test_class_module_is_datafusion():
         AggregateUDF,
         ScalarUDF,
     ]:
-        assert klass.__module__ == "datafusion.udf"
+        assert klass.__module__ == "datafusion.user_defined"
 
     # expressions
     for klass in [Expr, Column, Literal, BinaryExpr, AggregateFunction]:
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index b6348e3a0..41cee4ef3 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -157,8 +157,10 @@ def test_register_parquet(ctx, tmp_path):
     assert result.to_pydict() == {"cnt": [100]}
 
 
-@pytest.mark.parametrize("path_to_str", [True, False])
-def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
+@pytest.mark.parametrize(
+    ("path_to_str", "legacy_data_type"), [(True, False), (False, False), (False, True)]
+)
+def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_type):
     dir_root = tmp_path / "dataset_parquet_partitioned"
     dir_root.mkdir(exist_ok=False)
     (dir_root / "grp=a").mkdir(exist_ok=False)
@@ -177,10 +179,12 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
 
     dir_root = str(dir_root) if path_to_str else dir_root
 
+    partition_data_type = "string" if legacy_data_type else pa.string()
+
     ctx.register_parquet(
         "datapp",
         dir_root,
-        table_partition_cols=[("grp", "string")],
+        table_partition_cols=[("grp", partition_data_type)],
         parquet_pruning=True,
         file_extension=".parquet",
     )
@@ -488,9 +492,9 @@ def test_register_listing_table(
 ):
     dir_root = tmp_path / "dataset_parquet_partitioned"
     dir_root.mkdir(exist_ok=False)
-    (dir_root / "grp=a/date_id=20201005").mkdir(exist_ok=False, parents=True)
-    (dir_root / "grp=a/date_id=20211005").mkdir(exist_ok=False, parents=True)
-    (dir_root / "grp=b/date_id=20201005").mkdir(exist_ok=False, parents=True)
+    (dir_root / "grp=a/date=2020-10-05").mkdir(exist_ok=False, parents=True)
+    (dir_root / "grp=a/date=2021-10-05").mkdir(exist_ok=False, parents=True)
+    (dir_root / "grp=b/date=2020-10-05").mkdir(exist_ok=False, parents=True)
 
     table = pa.Table.from_arrays(
         [
@@ -501,13 +505,13 @@ def test_register_listing_table(
         names=["int", "str", "float"],
     )
     pa.parquet.write_table(
-        table.slice(0, 3), dir_root / "grp=a/date_id=20201005/file.parquet"
+        table.slice(0, 3), dir_root / "grp=a/date=2020-10-05/file.parquet"
     )
     pa.parquet.write_table(
-        table.slice(3, 2), dir_root / "grp=a/date_id=20211005/file.parquet"
+        table.slice(3, 2), dir_root / "grp=a/date=2021-10-05/file.parquet"
     )
     pa.parquet.write_table(
-        table.slice(5, 10), dir_root / "grp=b/date_id=20201005/file.parquet"
+        table.slice(5, 10), dir_root / "grp=b/date=2020-10-05/file.parquet"
     )
 
     dir_root = f"file://{dir_root}/" if path_to_str else dir_root
@@ -515,7 +519,7 @@ def test_register_listing_table(
     ctx.register_listing_table(
         "my_table",
         dir_root,
-        table_partition_cols=[("grp", "string"), ("date_id", "int")],
+        table_partition_cols=[("grp", pa.string()), ("date", pa.date64())],
         file_extension=".parquet",
         schema=table.schema if pass_schema else None,
         file_sort_order=file_sort_order,
@@ -531,7 +535,7 @@ def test_register_listing_table(
     assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
 
     result = ctx.sql(
-        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"  # noqa: E501
+        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date='2020-10-05' GROUP BY grp"  # noqa: E501
     ).collect()
     result = pa.Table.from_batches(result)
 
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 4190e7d64..5aaf00664 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -22,7 +22,7 @@
 from datafusion import SessionContext, column, lit, udwf
 from datafusion import functions as f
 from datafusion.expr import WindowFrame
-from datafusion.udf import WindowEvaluator
+from datafusion.user_defined import WindowEvaluator
 
 
 class ExponentialSmoothDefault(WindowEvaluator):
diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py
index 926a65961..f484cb282 100644
--- a/python/tests/test_wrapper_coverage.py
+++ b/python/tests/test_wrapper_coverage.py
@@ -28,14 +28,14 @@
     from enum import EnumMeta as EnumType
 
 
-def missing_exports(internal_obj, wrapped_obj) -> None:
+def missing_exports(internal_obj, wrapped_obj) -> None:  # noqa: C901
     """
     Identify if any of the rust exposted structs or functions do not have wrappers.
 
     Special handling for:
     - Raw* classes: Internal implementation details that shouldn't be exposed
     - _global_ctx: Internal implementation detail
-    - __self__, __class__: Python special attributes
+    - __self__, __class__, __repr__: Python special attributes
     """
     # Special case enums - EnumType overrides a some of the internal functions,
     # so check all of the values exist and move on
@@ -45,6 +45,9 @@ def missing_exports(internal_obj, wrapped_obj) -> None:
             assert value in dir(wrapped_obj)
         return
 
+    if "__repr__" in internal_obj.__dict__ and "__repr__" not in wrapped_obj.__dict__:
+        pytest.fail(f"Missing __repr__: {internal_obj.__name__}")
+
     for internal_attr_name in dir(internal_obj):
         wrapped_attr_name = internal_attr_name.removeprefix("Raw")
         assert wrapped_attr_name in dir(wrapped_obj)
diff --git a/src/catalog.rs b/src/catalog.rs
index 1e189a5aa..83f8d08cb 100644
--- a/src/catalog.rs
+++ b/src/catalog.rs
@@ -97,7 +97,7 @@ impl PyDatabase {
     }
 
     fn table(&self, name: &str, py: Python) -> PyDataFusionResult<PyTable> {
-        if let Some(table) = wait_for_future(py, self.database.table(name))? {
+        if let Some(table) = wait_for_future(py, self.database.table(name))?? {
             Ok(PyTable::new(table))
         } else {
             Err(PyDataFusionError::Common(format!(
diff --git a/src/config.rs b/src/config.rs
index 667d5c590..20f22196c 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -18,10 +18,10 @@
 use pyo3::prelude::*;
 use pyo3::types::*;
 
-use datafusion::common::ScalarValue;
 use datafusion::config::ConfigOptions;
 
 use crate::errors::PyDataFusionResult;
+use crate::utils::py_obj_to_scalar_value;
 
 #[pyclass(name = "Config", module = "datafusion", subclass)]
 #[derive(Clone)]
@@ -59,7 +59,7 @@ impl PyConfig {
 
     /// Set a configuration option
     pub fn set(&mut self, key: &str, value: PyObject, py: Python) -> PyDataFusionResult<()> {
-        let scalar_value = py_obj_to_scalar_value(py, value);
+        let scalar_value = py_obj_to_scalar_value(py, value)?;
         self.config.set(key, scalar_value.to_string().as_str())?;
         Ok(())
     }
@@ -82,20 +82,3 @@ impl PyConfig {
         }
     }
 }
-
-/// Convert a python object to a ScalarValue
-fn py_obj_to_scalar_value(py: Python, obj: PyObject) -> ScalarValue {
-    if let Ok(value) = obj.extract::<bool>(py) {
-        ScalarValue::Boolean(Some(value))
-    } else if let Ok(value) = obj.extract::<i64>(py) {
-        ScalarValue::Int64(Some(value))
-    } else if let Ok(value) = obj.extract::<u64>(py) {
-        ScalarValue::UInt64(Some(value))
-    } else if let Ok(value) = obj.extract::<f64>(py) {
-        ScalarValue::Float64(Some(value))
-    } else if let Ok(value) = obj.extract::<String>(py) {
-        ScalarValue::Utf8(Some(value))
-    } else {
-        panic!("Unsupported value type")
-    }
-}
diff --git a/src/context.rs b/src/context.rs
index 0db0f4d7e..6ce1f12bc 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -34,7 +34,7 @@ use pyo3::prelude::*;
 use crate::catalog::{PyCatalog, PyTable};
 use crate::dataframe::PyDataFrame;
 use crate::dataset::Dataset;
-use crate::errors::{py_datafusion_err, PyDataFusionResult};
+use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult};
 use crate::expr::sort_expr::PySortExpr;
 use crate::physical_plan::PyExecutionPlan;
 use crate::record_batch::PyRecordBatchStream;
@@ -43,6 +43,7 @@ use crate::sql::logical::PyLogicalPlan;
 use crate::store::StorageContexts;
 use crate::udaf::PyAggregateUDF;
 use crate::udf::PyScalarUDF;
+use crate::udtf::PyTableFunction;
 use crate::udwf::PyWindowUDF;
 use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future};
 use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef};
@@ -60,7 +61,7 @@ use datafusion::datasource::TableProvider;
 use datafusion::execution::context::{
     DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext,
 };
-use datafusion::execution::disk_manager::DiskManagerConfig;
+use datafusion::execution::disk_manager::DiskManagerMode;
 use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool};
 use datafusion::execution::options::ReadOptions;
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
@@ -182,22 +183,49 @@ impl PyRuntimeEnvBuilder {
     }
 
     fn with_disk_manager_disabled(&self) -> Self {
-        let mut builder = self.builder.clone();
-        builder = builder.with_disk_manager(DiskManagerConfig::Disabled);
-        Self { builder }
+        let mut runtime_builder = self.builder.clone();
+
+        let mut disk_mgr_builder = runtime_builder
+            .disk_manager_builder
+            .clone()
+            .unwrap_or_default();
+        disk_mgr_builder.set_mode(DiskManagerMode::Disabled);
+
+        runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder);
+        Self {
+            builder: runtime_builder,
+        }
     }
 
     fn with_disk_manager_os(&self) -> Self {
-        let builder = self.builder.clone();
-        let builder = builder.with_disk_manager(DiskManagerConfig::NewOs);
-        Self { builder }
+        let mut runtime_builder = self.builder.clone();
+
+        let mut disk_mgr_builder = runtime_builder
+            .disk_manager_builder
+            .clone()
+            .unwrap_or_default();
+        disk_mgr_builder.set_mode(DiskManagerMode::OsTmpDirectory);
+
+        runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder);
+        Self {
+            builder: runtime_builder,
+        }
     }
 
     fn with_disk_manager_specified(&self, paths: Vec<String>) -> Self {
-        let builder = self.builder.clone();
         let paths = paths.iter().map(|s| s.into()).collect();
-        let builder = builder.with_disk_manager(DiskManagerConfig::NewSpecified(paths));
-        Self { builder }
+        let mut runtime_builder = self.builder.clone();
+
+        let mut disk_mgr_builder = runtime_builder
+            .disk_manager_builder
+            .clone()
+            .unwrap_or_default();
+        disk_mgr_builder.set_mode(DiskManagerMode::Directories(paths));
+
+        runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder);
+        Self {
+            builder: runtime_builder,
+        }
     }
 
     fn with_unbounded_memory_pool(&self) -> Self {
@@ -352,7 +380,7 @@ impl PySessionContext {
         &mut self,
         name: &str,
         path: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_extension: &str,
         schema: Option<PyArrowType<Schema>>,
         file_sort_order: Option<Vec<Vec<PySortExpr>>>,
@@ -360,7 +388,12 @@ impl PySessionContext {
     ) -> PyDataFusionResult<()> {
         let options = ListingOptions::new(Arc::new(ParquetFormat::new()))
             .with_file_extension(file_extension)
-            .with_table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
+            .with_table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            )
             .with_file_sort_order(
                 file_sort_order
                     .unwrap_or_default()
@@ -374,7 +407,7 @@ impl PySessionContext {
             None => {
                 let state = self.ctx.state();
                 let schema = options.infer_schema(&state, &table_path);
-                wait_for_future(py, schema)?
+                wait_for_future(py, schema)??
             }
         };
         let config = ListingTableConfig::new(table_path)
@@ -390,10 +423,16 @@ impl PySessionContext {
         Ok(())
     }
 
+    pub fn register_udtf(&mut self, func: PyTableFunction) {
+        let name = func.name.clone();
+        let func = Arc::new(func);
+        self.ctx.register_udtf(&name, func);
+    }
+
     /// Returns a PyDataFrame whose plan corresponds to the SQL statement.
     pub fn sql(&mut self, query: &str, py: Python) -> PyDataFusionResult<PyDataFrame> {
         let result = self.ctx.sql(query);
-        let df = wait_for_future(py, result)?;
+        let df = wait_for_future(py, result)??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -410,7 +449,7 @@ impl PySessionContext {
             SQLOptions::new()
         };
         let result = self.ctx.sql_with_options(query, options);
-        let df = wait_for_future(py, result)?;
+        let df = wait_for_future(py, result)??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -444,7 +483,7 @@ impl PySessionContext {
 
         self.ctx.register_table(&*table_name, Arc::new(table))?;
 
-        let table = wait_for_future(py, self._table(&table_name))?;
+        let table = wait_for_future(py, self._table(&table_name))??;
 
         let df = PyDataFrame::new(table);
         Ok(df)
@@ -622,7 +661,7 @@ impl PySessionContext {
         &mut self,
         name: &str,
         path: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         parquet_pruning: bool,
         file_extension: &str,
         skip_metadata: bool,
@@ -631,7 +670,12 @@ impl PySessionContext {
         py: Python,
     ) -> PyDataFusionResult<()> {
         let mut options = ParquetReadOptions::default()
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
+            .table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            )
             .parquet_pruning(parquet_pruning)
             .skip_metadata(skip_metadata);
         options.file_extension = file_extension;
@@ -643,7 +687,7 @@ impl PySessionContext {
             .collect();
 
         let result = self.ctx.register_parquet(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
         Ok(())
     }
 
@@ -686,11 +730,11 @@ impl PySessionContext {
         if path.is_instance_of::<PyList>() {
             let paths = path.extract::<Vec<String>>()?;
             let result = self.register_csv_from_multiple_paths(name, paths, options);
-            wait_for_future(py, result)?;
+            wait_for_future(py, result)??;
         } else {
             let path = path.extract::<String>()?;
             let result = self.ctx.register_csv(name, &path, options);
-            wait_for_future(py, result)?;
+            wait_for_future(py, result)??;
         }
 
         Ok(())
@@ -711,7 +755,7 @@ impl PySessionContext {
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_compression_type: Option<String>,
         py: Python,
     ) -> PyDataFusionResult<()> {
@@ -721,13 +765,18 @@ impl PySessionContext {
 
         let mut options = NdJsonReadOptions::default()
             .file_compression_type(parse_file_compression_type(file_compression_type)?)
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
+            .table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            );
         options.schema_infer_max_records = schema_infer_max_records;
         options.file_extension = file_extension;
         options.schema = schema.as_ref().map(|x| &x.0);
 
         let result = self.ctx.register_json(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
 
         Ok(())
     }
@@ -744,20 +793,24 @@ impl PySessionContext {
         path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         file_extension: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         py: Python,
     ) -> PyDataFusionResult<()> {
         let path = path
             .to_str()
             .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?;
 
-        let mut options = AvroReadOptions::default()
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
+        let mut options = AvroReadOptions::default().table_partition_cols(
+            table_partition_cols
+                .into_iter()
+                .map(|(name, ty)| (name, ty.0))
+                .collect::<Vec<(String, DataType)>>(),
+        );
         options.file_extension = file_extension;
         options.schema = schema.as_ref().map(|x| &x.0);
 
         let result = self.ctx.register_avro(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
 
         Ok(())
     }
@@ -818,9 +871,19 @@ impl PySessionContext {
     }
 
     pub fn table(&self, name: &str, py: Python) -> PyResult<PyDataFrame> {
-        let x = wait_for_future(py, self.ctx.table(name))
+        let res = wait_for_future(py, self.ctx.table(name))
             .map_err(|e| PyKeyError::new_err(e.to_string()))?;
-        Ok(PyDataFrame::new(x))
+        match res {
+            Ok(df) => Ok(PyDataFrame::new(df)),
+            Err(e) => {
+                if let datafusion::error::DataFusionError::Plan(msg) = &e {
+                    if msg.contains("No table named") {
+                        return Err(PyKeyError::new_err(msg.to_string()));
+                    }
+                }
+                Err(py_datafusion_err(e))
+            }
+        }
     }
 
     pub fn table_exist(&self, name: &str) -> PyDataFusionResult<bool> {
@@ -843,7 +906,7 @@ impl PySessionContext {
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_compression_type: Option<String>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
@@ -851,17 +914,22 @@ impl PySessionContext {
             .to_str()
             .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?;
         let mut options = NdJsonReadOptions::default()
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
+            .table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            )
             .file_compression_type(parse_file_compression_type(file_compression_type)?);
         options.schema_infer_max_records = schema_infer_max_records;
         options.file_extension = file_extension;
         let df = if let Some(schema) = schema {
             options.schema = Some(&schema.0);
             let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)?
+            wait_for_future(py, result)??
         } else {
             let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)?
+            wait_for_future(py, result)??
         };
         Ok(PyDataFrame::new(df))
     }
@@ -884,7 +952,7 @@ impl PySessionContext {
         delimiter: &str,
         schema_infer_max_records: usize,
         file_extension: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_compression_type: Option<String>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
@@ -900,7 +968,12 @@ impl PySessionContext {
             .delimiter(delimiter[0])
             .schema_infer_max_records(schema_infer_max_records)
             .file_extension(file_extension)
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
+            .table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            )
             .file_compression_type(parse_file_compression_type(file_compression_type)?);
         options.schema = schema.as_ref().map(|x| &x.0);
 
@@ -908,12 +981,12 @@ impl PySessionContext {
             let paths = path.extract::<Vec<String>>()?;
             let paths = paths.iter().map(|p| p as &str).collect::<Vec<&str>>();
             let result = self.ctx.read_csv(paths, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)?);
+            let df = PyDataFrame::new(wait_for_future(py, result)??);
             Ok(df)
         } else {
             let path = path.extract::<String>()?;
             let result = self.ctx.read_csv(path, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)?);
+            let df = PyDataFrame::new(wait_for_future(py, result)??);
             Ok(df)
         }
     }
@@ -930,7 +1003,7 @@ impl PySessionContext {
     pub fn read_parquet(
         &self,
         path: &str,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         parquet_pruning: bool,
         file_extension: &str,
         skip_metadata: bool,
@@ -939,7 +1012,12 @@ impl PySessionContext {
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
         let mut options = ParquetReadOptions::default()
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?)
+            .table_partition_cols(
+                table_partition_cols
+                    .into_iter()
+                    .map(|(name, ty)| (name, ty.0))
+                    .collect::<Vec<(String, DataType)>>(),
+            )
             .parquet_pruning(parquet_pruning)
             .skip_metadata(skip_metadata);
         options.file_extension = file_extension;
@@ -951,7 +1029,7 @@ impl PySessionContext {
             .collect();
 
         let result = self.ctx.read_parquet(path, options);
-        let df = PyDataFrame::new(wait_for_future(py, result)?);
+        let df = PyDataFrame::new(wait_for_future(py, result)??);
         Ok(df)
     }
 
@@ -961,20 +1039,24 @@ impl PySessionContext {
         &self,
         path: &str,
         schema: Option<PyArrowType<Schema>>,
-        table_partition_cols: Vec<(String, String)>,
+        table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_extension: &str,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let mut options = AvroReadOptions::default()
-            .table_partition_cols(convert_table_partition_cols(table_partition_cols)?);
+        let mut options = AvroReadOptions::default().table_partition_cols(
+            table_partition_cols
+                .into_iter()
+                .map(|(name, ty)| (name, ty.0))
+                .collect::<Vec<(String, DataType)>>(),
+        );
         options.file_extension = file_extension;
         let df = if let Some(schema) = schema {
             options.schema = Some(&schema.0);
             let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)?
+            wait_for_future(py, read_future)??
         } else {
             let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)?
+            wait_for_future(py, read_future)??
         };
         Ok(PyDataFrame::new(df))
     }
@@ -1014,8 +1096,8 @@ impl PySessionContext {
         let plan = plan.plan.clone();
         let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
             rt.spawn(async move { plan.execute(part, Arc::new(ctx)) });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
-        Ok(PyRecordBatchStream::new(stream?))
+        let stream = wait_for_future(py, async { fut.await.map_err(to_datafusion_err) })???;
+        Ok(PyRecordBatchStream::new(stream))
     }
 }
 
@@ -1065,21 +1147,6 @@ impl PySessionContext {
     }
 }
 
-pub fn convert_table_partition_cols(
-    table_partition_cols: Vec<(String, String)>,
-) -> PyDataFusionResult<Vec<(String, DataType)>> {
-    table_partition_cols
-        .into_iter()
-        .map(|(name, ty)| match ty.as_str() {
-            "string" => Ok((name, DataType::Utf8)),
-            "int" => Ok((name, DataType::Int32)),
-            _ => Err(crate::errors::PyDataFusionError::Common(format!(
-                "Unsupported data type '{ty}' for partition column. Supported types are 'string' and 'int'"
-            ))),
-        })
-        .collect::<Result<Vec<_>, _>>()
-}
-
 pub fn parse_file_compression_type(
     file_compression_type: Option<String>,
 ) -> Result<FileCompressionType, PyErr> {
diff --git a/src/dataframe.rs b/src/dataframe.rs
index afe25ea7c..3d68db279 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -35,6 +35,7 @@ use datafusion::error::DataFusionError;
 use datafusion::execution::SendableRecordBatchStream;
 use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
 use datafusion::prelude::*;
+use datafusion_ffi::table_provider::FFI_TableProvider;
 use futures::{StreamExt, TryStreamExt};
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
@@ -43,12 +44,14 @@ use pyo3::types::{PyCapsule, PyList, PyTuple, PyTupleMethods};
 use tokio::task::JoinHandle;
 
 use crate::catalog::PyTable;
-use crate::errors::{py_datafusion_err, PyDataFusionError};
+use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError};
 use crate::expr::sort_expr::to_sort_expressions;
 use crate::physical_plan::PyExecutionPlan;
 use crate::record_batch::PyRecordBatchStream;
 use crate::sql::logical::PyLogicalPlan;
-use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future};
+use crate::utils::{
+    get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
+};
 use crate::{
     errors::PyDataFusionResult,
     expr::{sort_expr::PySortExpr, PyExpr},
@@ -59,7 +62,7 @@ use crate::{
 // this is an interim implementation
 #[pyclass(name = "TableProvider", module = "datafusion")]
 pub struct PyTableProvider {
-    provider: Arc<dyn TableProvider>,
+    provider: Arc<dyn TableProvider + Send>,
 }
 
 impl PyTableProvider {
@@ -73,6 +76,21 @@ impl PyTableProvider {
     }
 }
 
+#[pymethods]
+impl PyTableProvider {
+    fn __datafusion_table_provider__<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = CString::new("datafusion_table_provider").unwrap();
+
+        let runtime = get_tokio_runtime().0.handle().clone();
+        let provider = FFI_TableProvider::new(Arc::clone(&self.provider), false, Some(runtime));
+
+        PyCapsule::new(py, provider, Some(name.clone()))
+    }
+}
+
 /// Configuration for DataFrame display formatting
 #[derive(Debug, Clone)]
 pub struct FormatterConfig {
@@ -164,9 +182,7 @@ fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult<
     };
 
     // Return the validated config, converting String error to PyErr
-    config
-        .validate()
-        .map_err(pyo3::exceptions::PyValueError::new_err)?;
+    config.validate().map_err(PyValueError::new_err)?;
     Ok(config)
 }
 
@@ -313,7 +329,7 @@ impl PyDataFrame {
         let (batches, has_more) = wait_for_future(
             py,
             collect_record_batches_to_display(self.df.as_ref().clone(), config),
-        )?;
+        )??;
         if batches.is_empty() {
             // This should not be reached, but do it for safety since we index into the vector below
             return Ok("No data to display".to_string());
@@ -336,7 +352,7 @@ impl PyDataFrame {
         let (batches, has_more) = wait_for_future(
             py,
             collect_record_batches_to_display(self.df.as_ref().clone(), config),
-        )?;
+        )??;
         if batches.is_empty() {
             // This should not be reached, but do it for safety since we index into the vector below
             return Ok("No data to display".to_string());
@@ -368,7 +384,7 @@ impl PyDataFrame {
     /// Calculate summary statistics for a DataFrame
     fn describe(&self, py: Python) -> PyDataFusionResult<Self> {
         let df = self.df.as_ref().clone();
-        let stat_df = wait_for_future(py, df.describe())?;
+        let stat_df = wait_for_future(py, df.describe())??;
         Ok(Self::new(stat_df))
     }
 
@@ -471,7 +487,7 @@ impl PyDataFrame {
     /// Unless some order is specified in the plan, there is no
     /// guarantee of the order of the result.
     fn collect(&self, py: Python) -> PyResult<Vec<PyObject>> {
-        let batches = wait_for_future(py, self.df.as_ref().clone().collect())
+        let batches = wait_for_future(py, self.df.as_ref().clone().collect())?
             .map_err(PyDataFusionError::from)?;
         // cannot use PyResult<Vec<RecordBatch>> return type due to
         // https://github.com/PyO3/pyo3/issues/1813
@@ -480,14 +496,14 @@ impl PyDataFrame {
 
     /// Cache DataFrame.
     fn cache(&self, py: Python) -> PyDataFusionResult<Self> {
-        let df = wait_for_future(py, self.df.as_ref().clone().cache())?;
+        let df = wait_for_future(py, self.df.as_ref().clone().cache())??;
         Ok(Self::new(df))
     }
 
     /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
     /// maintaining the input partitioning.
     fn collect_partitioned(&self, py: Python) -> PyResult<Vec<Vec<PyObject>>> {
-        let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())
+        let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?
             .map_err(PyDataFusionError::from)?;
 
         batches
@@ -591,7 +607,7 @@ impl PyDataFrame {
 
     /// Get the execution plan for this `DataFrame`
     fn execution_plan(&self, py: Python) -> PyDataFusionResult<PyExecutionPlan> {
-        let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())?;
+        let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())??;
         Ok(plan.into())
     }
 
@@ -704,7 +720,7 @@ impl PyDataFrame {
                 DataFrameWriteOptions::new(),
                 Some(csv_options),
             ),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -765,7 +781,7 @@ impl PyDataFrame {
                 DataFrameWriteOptions::new(),
                 Option::from(options),
             ),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -793,7 +809,7 @@ impl PyDataFrame {
                 DataFrameWriteOptions::new(),
                 Option::from(table_options),
             ),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -805,7 +821,7 @@ impl PyDataFrame {
                 .as_ref()
                 .clone()
                 .write_json(path, DataFrameWriteOptions::new(), None),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -828,7 +844,7 @@ impl PyDataFrame {
         py: Python<'py>,
         requested_schema: Option<Bound<'py, PyCapsule>>,
     ) -> PyDataFusionResult<Bound<'py, PyCapsule>> {
-        let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())?;
+        let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())??;
         let mut schema: Schema = self.df.schema().to_owned().into();
 
         if let Some(schema_capsule) = requested_schema {
@@ -861,8 +877,8 @@ impl PyDataFrame {
         let df = self.df.as_ref().clone();
         let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
             rt.spawn(async move { df.execute_stream().await });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
-        Ok(PyRecordBatchStream::new(stream?))
+        let stream = wait_for_future(py, async { fut.await.map_err(to_datafusion_err) })???;
+        Ok(PyRecordBatchStream::new(stream))
     }
 
     fn execute_stream_partitioned(&self, py: Python) -> PyResult<Vec<PyRecordBatchStream>> {
@@ -871,14 +887,11 @@ impl PyDataFrame {
         let df = self.df.as_ref().clone();
         let fut: JoinHandle<datafusion::common::Result<Vec<SendableRecordBatchStream>>> =
             rt.spawn(async move { df.execute_stream_partitioned().await });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
+        let stream = wait_for_future(py, async { fut.await.map_err(to_datafusion_err) })?
+            .map_err(py_datafusion_err)?
+            .map_err(py_datafusion_err)?;
 
-        match stream {
-            Ok(batches) => Ok(batches.into_iter().map(PyRecordBatchStream::new).collect()),
-            _ => Err(PyValueError::new_err(
-                "Unable to execute stream partitioned",
-            )),
-        }
+        Ok(stream.into_iter().map(PyRecordBatchStream::new).collect())
     }
 
     /// Convert to pandas dataframe with pyarrow
@@ -923,14 +936,33 @@ impl PyDataFrame {
 
     // Executes this DataFrame to get the total number of rows.
     fn count(&self, py: Python) -> PyDataFusionResult<usize> {
-        Ok(wait_for_future(py, self.df.as_ref().clone().count())?)
+        Ok(wait_for_future(py, self.df.as_ref().clone().count())??)
+    }
+
+    /// Fill null values with a specified value for specific columns
+    #[pyo3(signature = (value, columns=None))]
+    fn fill_null(
+        &self,
+        value: PyObject,
+        columns: Option<Vec<PyBackedStr>>,
+        py: Python,
+    ) -> PyDataFusionResult<Self> {
+        let scalar_value = py_obj_to_scalar_value(py, value)?;
+
+        let cols = match columns {
+            Some(col_names) => col_names.iter().map(|c| c.to_string()).collect(),
+            None => Vec::new(), // Empty vector means fill null for all columns
+        };
+
+        let df = self.df.as_ref().clone().fill_null(scalar_value, cols)?;
+        Ok(Self::new(df))
     }
 }
 
 /// Print DataFrame
 fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> {
     // Get string representation of record batches
-    let batches = wait_for_future(py, df.collect())?;
+    let batches = wait_for_future(py, df.collect())??;
     let batches_as_string = pretty::pretty_format_batches(&batches);
     let result = match batches_as_string {
         Ok(batch) => format!("DataFrame()\n{batch}"),
diff --git a/src/errors.rs b/src/errors.rs
index f1d5aeb23..d4f4f221d 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -28,7 +28,7 @@ pub type PyDataFusionResult<T> = std::result::Result<T, PyDataFusionError>;
 
 #[derive(Debug)]
 pub enum PyDataFusionError {
-    ExecutionError(InnerDataFusionError),
+    ExecutionError(Box<InnerDataFusionError>),
     ArrowError(ArrowError),
     Common(String),
     PythonError(PyErr),
@@ -55,7 +55,7 @@ impl From<ArrowError> for PyDataFusionError {
 
 impl From<InnerDataFusionError> for PyDataFusionError {
     fn from(err: InnerDataFusionError) -> PyDataFusionError {
-        PyDataFusionError::ExecutionError(err)
+        PyDataFusionError::ExecutionError(Box::new(err))
     }
 }
 
diff --git a/src/expr.rs b/src/expr.rs
index 404e575f8..6b1d01d65 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams};
+use datafusion::logical_expr::expr::AggregateFunctionParams;
 use datafusion::logical_expr::utils::exprlist_to_fields;
 use datafusion::logical_expr::{
-    ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition,
+    lit_with_metadata, ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition,
 };
 use pyo3::IntoPyObjectExt;
 use pyo3::{basic::CompareOp, prelude::*};
@@ -37,9 +37,7 @@ use datafusion::logical_expr::{
 };
 
 use crate::common::data_type::{DataTypeMap, NullTreatment, PyScalarValue, RexType};
-use crate::errors::{
-    py_runtime_err, py_type_err, py_unsupported_variant_err, PyDataFusionError, PyDataFusionResult,
-};
+use crate::errors::{py_runtime_err, py_type_err, py_unsupported_variant_err, PyDataFusionResult};
 use crate::expr::aggregate_expr::PyAggregateFunction;
 use crate::expr::binary_expr::PyBinaryExpr;
 use crate::expr::column::PyColumn;
@@ -152,7 +150,7 @@ impl PyExpr {
                 Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?)
             }
             Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?),
-            Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata(value.clone(), metadata.clone()).into_bound_py_any(py)?),
             Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?),
             Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?),
             Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?),
@@ -284,6 +282,14 @@ impl PyExpr {
         lit(value.0).into()
     }
 
+    #[staticmethod]
+    pub fn literal_with_metadata(
+        value: PyScalarValue,
+        metadata: HashMap<String, String>,
+    ) -> PyExpr {
+        lit_with_metadata(value.0, metadata).into()
+    }
+
     #[staticmethod]
     pub fn column(value: &str) -> PyExpr {
         col(value).into()
@@ -379,7 +385,7 @@ impl PyExpr {
     /// Extracts the Expr value into a PyObject that can be shared with Python
     pub fn python_value(&self, py: Python) -> PyResult<PyObject> {
         match &self.expr {
-            Expr::Literal(scalar_value) => scalar_to_pyarrow(scalar_value, py),
+            Expr::Literal(scalar_value, _) => scalar_to_pyarrow(scalar_value, py),
             _ => Err(py_type_err(format!(
                 "Non Expr::Literal encountered in types: {:?}",
                 &self.expr
@@ -419,11 +425,13 @@ impl PyExpr {
                 params: AggregateFunctionParams { args, .. },
                 ..
             })
-            | Expr::ScalarFunction(ScalarFunction { args, .. })
-            | Expr::WindowFunction(WindowFunction {
-                params: WindowFunctionParams { args, .. },
-                ..
-            }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()),
+            | Expr::ScalarFunction(ScalarFunction { args, .. }) => {
+                Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect())
+            }
+            Expr::WindowFunction(boxed_window_fn) => {
+                let args = &boxed_window_fn.params.args;
+                Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect())
+            }
 
             // Expr(s) that require more specific processing
             Expr::Case(Case {
@@ -602,10 +610,10 @@ impl PyExpr {
     ) -> PyDataFusionResult<PyExpr> {
         match &self.expr {
             Expr::AggregateFunction(agg_fn) => {
-                let window_fn = Expr::WindowFunction(WindowFunction::new(
+                let window_fn = Expr::WindowFunction(Box::new(WindowFunction::new(
                     WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()),
                     agg_fn.params.args.clone(),
-                ));
+                )));
 
                 add_builder_fns_to_window(
                     window_fn,
@@ -622,11 +630,11 @@ impl PyExpr {
                 order_by,
                 null_treatment,
             ),
-            _ => Err(
-                PyDataFusionError::ExecutionError(datafusion::error::DataFusionError::Plan(
-                    format!("Using {} with `over` is not allowed. Must use an aggregate or window function.", self.expr.variant_name()),
-                ))
-            ),
+            _ => Err(datafusion::error::DataFusionError::Plan(format!(
+                "Using {} with `over` is not allowed. Must use an aggregate or window function.",
+                self.expr.variant_name()
+            ))
+            .into()),
         }
     }
 }
@@ -745,7 +753,7 @@ impl PyExpr {
                 | Operator::QuestionPipe => Err(py_type_err(format!("Unsupported expr: ${op}"))),
             },
             Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type),
-            Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value),
+            Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value),
             _ => Err(py_type_err(format!(
                 "Non Expr::Literal encountered in types: {:?}",
                 expr
diff --git a/src/expr/literal.rs b/src/expr/literal.rs
index a660ac914..45303a104 100644
--- a/src/expr/literal.rs
+++ b/src/expr/literal.rs
@@ -18,11 +18,22 @@
 use crate::errors::PyDataFusionError;
 use datafusion::common::ScalarValue;
 use pyo3::{prelude::*, IntoPyObjectExt};
+use std::collections::BTreeMap;
 
 #[pyclass(name = "Literal", module = "datafusion.expr", subclass)]
 #[derive(Clone)]
 pub struct PyLiteral {
     pub value: ScalarValue,
+    pub metadata: Option<BTreeMap<String, String>>,
+}
+
+impl PyLiteral {
+    pub fn new_with_metadata(
+        value: ScalarValue,
+        metadata: Option<BTreeMap<String, String>>,
+    ) -> PyLiteral {
+        Self { value, metadata }
+    }
 }
 
 impl From<PyLiteral> for ScalarValue {
@@ -33,7 +44,10 @@ impl From<PyLiteral> for ScalarValue {
 
 impl From<ScalarValue> for PyLiteral {
     fn from(value: ScalarValue) -> PyLiteral {
-        PyLiteral { value }
+        PyLiteral {
+            value,
+            metadata: None,
+        }
     }
 }
 
diff --git a/src/expr/window.rs b/src/expr/window.rs
index c5467bf94..052d9eeb4 100644
--- a/src/expr/window.rs
+++ b/src/expr/window.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use datafusion::common::{DataFusionError, ScalarValue};
-use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits};
 use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
@@ -118,10 +117,9 @@ impl PyWindowExpr {
     /// Returns order by columns in a window function expression
     pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult<Vec<PySortExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction {
-                params: WindowFunctionParams { order_by, .. },
-                ..
-            }) => py_sort_expr_list(&order_by),
+            Expr::WindowFunction(boxed_window_fn) => {
+                py_sort_expr_list(&boxed_window_fn.params.order_by)
+            }
             other => Err(not_window_function_err(other)),
         }
     }
@@ -129,10 +127,9 @@ impl PyWindowExpr {
     /// Return partition by columns in a window function expression
     pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction {
-                params: WindowFunctionParams { partition_by, .. },
-                ..
-            }) => py_expr_list(&partition_by),
+            Expr::WindowFunction(boxed_window_fn) => {
+                py_expr_list(&boxed_window_fn.params.partition_by)
+            }
             other => Err(not_window_function_err(other)),
         }
     }
@@ -140,10 +137,7 @@ impl PyWindowExpr {
     /// Return input args for window function
     pub fn get_args(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction {
-                params: WindowFunctionParams { args, .. },
-                ..
-            }) => py_expr_list(&args),
+            Expr::WindowFunction(boxed_window_fn) => py_expr_list(&boxed_window_fn.params.args),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -151,7 +145,7 @@ impl PyWindowExpr {
     /// Return window function name
     pub fn window_func_name(&self, expr: PyExpr) -> PyResult<String> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { fun, .. }) => Ok(fun.to_string()),
+            Expr::WindowFunction(boxed_window_fn) => Ok(boxed_window_fn.fun.to_string()),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -159,10 +153,9 @@ impl PyWindowExpr {
     /// Returns a Pywindow frame for a given window function expression
     pub fn get_frame(&self, expr: PyExpr) -> Option<PyWindowFrame> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction {
-                params: WindowFunctionParams { window_frame, .. },
-                ..
-            }) => Some(window_frame.into()),
+            Expr::WindowFunction(boxed_window_fn) => {
+                Some(boxed_window_fn.params.window_frame.into())
+            }
             _ => None,
         }
     }
diff --git a/src/functions.rs b/src/functions.rs
index caa79b8ad..b2bafcb65 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -103,7 +103,7 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr {
 #[pyo3(signature = (array, element, index=None))]
 fn array_position(array: PyExpr, element: PyExpr, index: Option<i64>) -> PyExpr {
     let index = ScalarValue::Int64(index);
-    let index = Expr::Literal(index);
+    let index = Expr::Literal(index, None);
     datafusion::functions_nested::expr_fn::array_position(array.into(), element.into(), index)
         .into()
 }
@@ -334,7 +334,7 @@ fn window(
         .unwrap_or(WindowFrame::new(order_by.as_ref().map(|v| !v.is_empty())));
 
     Ok(PyExpr {
-        expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction {
+        expr: datafusion::logical_expr::Expr::WindowFunction(Box::new(WindowFunction {
             fun,
             params: WindowFunctionParams {
                 args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
@@ -351,7 +351,7 @@ fn window(
                 window_frame,
                 null_treatment: None,
             },
-        }),
+        })),
     })
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index 990231c66..1293eee3c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -60,6 +60,7 @@ pub mod substrait;
 mod udaf;
 #[allow(clippy::borrow_deref_ref)]
 mod udf;
+pub mod udtf;
 mod udwf;
 pub mod utils;
 
@@ -90,6 +91,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<udf::PyScalarUDF>()?;
     m.add_class::<udaf::PyAggregateUDF>()?;
     m.add_class::<udwf::PyWindowUDF>()?;
+    m.add_class::<udtf::PyTableFunction>()?;
     m.add_class::<config::PyConfig>()?;
     m.add_class::<sql::logical::PyLogicalPlan>()?;
     m.add_class::<physical_plan::PyExecutionPlan>()?;
diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs
index 4b4c86597..7fbb1dc2a 100644
--- a/src/pyarrow_filter_expression.rs
+++ b/src/pyarrow_filter_expression.rs
@@ -61,7 +61,7 @@ fn extract_scalar_list<'py>(
         .iter()
         .map(|expr| match expr {
             // TODO: should we also leverage `ScalarValue::to_pyarrow` here?
-            Expr::Literal(v) => match v {
+            Expr::Literal(v, _) => match v {
                 // The unwraps here are for infallible conversions
                 ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?),
                 ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?),
@@ -106,7 +106,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
             let op_module = Python::import(py, "operator")?;
             let pc_expr: PyDataFusionResult<Bound<'_, PyAny>> = match expr {
                 Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?),
-                Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)),
+                Expr::Literal(scalar, _) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)),
                 Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
                     let operator = operator_to_py(op, &op_module)?;
                     let left = PyArrowFilterExpression::try_from(left.as_ref())?.0;
diff --git a/src/record_batch.rs b/src/record_batch.rs
index ec61c263f..a85f05423 100644
--- a/src/record_batch.rs
+++ b/src/record_batch.rs
@@ -63,7 +63,7 @@ impl PyRecordBatchStream {
 impl PyRecordBatchStream {
     fn next(&mut self, py: Python) -> PyResult<PyRecordBatch> {
         let stream = self.stream.clone();
-        wait_for_future(py, next_stream(stream, true))
+        wait_for_future(py, next_stream(stream, true))?
     }
 
     fn __next__(&mut self, py: Python) -> PyResult<PyRecordBatch> {
diff --git a/src/substrait.rs b/src/substrait.rs
index 1fefc0bbd..4da3738fb 100644
--- a/src/substrait.rs
+++ b/src/substrait.rs
@@ -72,7 +72,7 @@ impl PySubstraitSerializer {
         path: &str,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))?;
+        wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))??;
         Ok(())
     }
 
@@ -94,19 +94,20 @@ impl PySubstraitSerializer {
         ctx: PySessionContext,
         py: Python,
     ) -> PyDataFusionResult<PyObject> {
-        let proto_bytes: Vec<u8> = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?;
+        let proto_bytes: Vec<u8> =
+            wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))??;
         Ok(PyBytes::new(py, &proto_bytes).into())
     }
 
     #[staticmethod]
     pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult<PyPlan> {
-        let plan = wait_for_future(py, serializer::deserialize(path))?;
+        let plan = wait_for_future(py, serializer::deserialize(path))??;
         Ok(PyPlan { plan: *plan })
     }
 
     #[staticmethod]
     pub fn deserialize_bytes(proto_bytes: Vec<u8>, py: Python) -> PyDataFusionResult<PyPlan> {
-        let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))?;
+        let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))??;
         Ok(PyPlan { plan: *plan })
     }
 }
@@ -143,7 +144,7 @@ impl PySubstraitConsumer {
     ) -> PyDataFusionResult<PyLogicalPlan> {
         let session_state = ctx.ctx.state();
         let result = consumer::from_substrait_plan(&session_state, &plan.plan);
-        let logical_plan = wait_for_future(py, result)?;
+        let logical_plan = wait_for_future(py, result)??;
         Ok(PyLogicalPlan::new(logical_plan))
     }
 }
diff --git a/src/udtf.rs b/src/udtf.rs
new file mode 100644
index 000000000..db16d6c05
--- /dev/null
+++ b/src/udtf.rs
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use pyo3::prelude::*;
+use std::sync::Arc;
+
+use crate::dataframe::PyTableProvider;
+use crate::errors::{py_datafusion_err, to_datafusion_err};
+use crate::expr::PyExpr;
+use crate::utils::validate_pycapsule;
+use datafusion::catalog::{TableFunctionImpl, TableProvider};
+use datafusion::error::Result as DataFusionResult;
+use datafusion::logical_expr::Expr;
+use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider};
+use datafusion_ffi::udtf::{FFI_TableFunction, ForeignTableFunction};
+use pyo3::exceptions::PyNotImplementedError;
+use pyo3::types::{PyCapsule, PyTuple};
+
+/// Represents a user defined table function
+#[pyclass(name = "TableFunction", module = "datafusion")]
+#[derive(Debug, Clone)]
+pub struct PyTableFunction {
+    pub(crate) name: String,
+    pub(crate) inner: PyTableFunctionInner,
+}
+
+// TODO: Implement pure python based user defined table functions
+#[derive(Debug, Clone)]
+pub(crate) enum PyTableFunctionInner {
+    PythonFunction(Arc<PyObject>),
+    FFIFunction(Arc<dyn TableFunctionImpl>),
+}
+
+#[pymethods]
+impl PyTableFunction {
+    #[new]
+    #[pyo3(signature=(name, func))]
+    pub fn new(name: &str, func: Bound<'_, PyAny>) -> PyResult<Self> {
+        let inner = if func.hasattr("__datafusion_table_function__")? {
+            let capsule = func.getattr("__datafusion_table_function__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>().map_err(py_datafusion_err)?;
+            validate_pycapsule(capsule, "datafusion_table_function")?;
+
+            let ffi_func = unsafe { capsule.reference::<FFI_TableFunction>() };
+            let foreign_func: ForeignTableFunction = ffi_func.to_owned().into();
+
+            PyTableFunctionInner::FFIFunction(Arc::new(foreign_func))
+        } else {
+            let py_obj = Arc::new(func.unbind());
+            PyTableFunctionInner::PythonFunction(py_obj)
+        };
+
+        Ok(Self {
+            name: name.to_string(),
+            inner,
+        })
+    }
+
+    #[pyo3(signature = (*args))]
+    pub fn __call__(&self, args: Vec<PyExpr>) -> PyResult<PyTableProvider> {
+        let args: Vec<Expr> = args.iter().map(|e| e.expr.clone()).collect();
+        let table_provider = self.call(&args).map_err(py_datafusion_err)?;
+
+        Ok(PyTableProvider::new(table_provider))
+    }
+
+    fn __repr__(&self) -> PyResult<String> {
+        Ok(format!("TableUDF({})", self.name))
+    }
+}
+
+#[allow(clippy::result_large_err)]
+fn call_python_table_function(
+    func: &Arc<PyObject>,
+    args: &[Expr],
+) -> DataFusionResult<Arc<dyn TableProvider>> {
+    let args = args
+        .iter()
+        .map(|arg| PyExpr::from(arg.clone()))
+        .collect::<Vec<_>>();
+
+    // move |args: &[ArrayRef]| -> Result<ArrayRef, DataFusionError> {
+    Python::with_gil(|py| {
+        let py_args = PyTuple::new(py, args)?;
+        let provider_obj = func.call1(py, py_args)?;
+        let provider = provider_obj.bind(py);
+
+        if provider.hasattr("__datafusion_table_provider__")? {
+            let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>().map_err(py_datafusion_err)?;
+            validate_pycapsule(capsule, "datafusion_table_provider")?;
+
+            let provider = unsafe { capsule.reference::<FFI_TableProvider>() };
+            let provider: ForeignTableProvider = provider.into();
+
+            Ok(Arc::new(provider) as Arc<dyn TableProvider>)
+        } else {
+            Err(PyNotImplementedError::new_err(
+                "__datafusion_table_provider__ does not exist on Table Provider object.",
+            ))
+        }
+    })
+    .map_err(to_datafusion_err)
+}
+
+impl TableFunctionImpl for PyTableFunction {
+    fn call(&self, args: &[Expr]) -> DataFusionResult<Arc<dyn TableProvider>> {
+        match &self.inner {
+            PyTableFunctionInner::FFIFunction(func) => func.call(args),
+            PyTableFunctionInner::PythonFunction(obj) => call_python_table_function(obj, args),
+        }
+    }
+}
diff --git a/src/udwf.rs b/src/udwf.rs
index defd9c522..a0c8cc59a 100644
--- a/src/udwf.rs
+++ b/src/udwf.rs
@@ -300,13 +300,9 @@ impl WindowUDFImpl for MultiColumnWindowUDF {
         &self.signature
     }
 
-    fn field(&self, field_args: WindowUDFFieldArgs) -> Result<arrow::datatypes::Field> {
+    fn field(&self, field_args: WindowUDFFieldArgs) -> Result<arrow::datatypes::FieldRef> {
         // TODO: Should nullable always be `true`?
-        Ok(arrow::datatypes::Field::new(
-            field_args.name(),
-            self.return_type.clone(),
-            true,
-        ))
+        Ok(arrow::datatypes::Field::new(field_args.name(), self.return_type.clone(), true).into())
     }
 
     // TODO: Enable passing partition_evaluator_args to python?
diff --git a/src/utils.rs b/src/utils.rs
index 3487de21b..90d654385 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -15,17 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::errors::{PyDataFusionError, PyDataFusionResult};
-use crate::TokioRuntime;
-use datafusion::execution::context::SessionContext;
-use datafusion::logical_expr::Volatility;
-use pyo3::exceptions::PyValueError;
+use crate::{
+    common::data_type::PyScalarValue,
+    errors::{PyDataFusionError, PyDataFusionResult},
+    TokioRuntime,
+};
+use datafusion::{
+    common::ScalarValue, execution::context::SessionContext, logical_expr::Volatility,
+};
 use pyo3::prelude::*;
-use pyo3::types::PyCapsule;
-use std::future::Future;
-use std::sync::OnceLock;
-use tokio::runtime::Runtime;
-
+use pyo3::{exceptions::PyValueError, types::PyCapsule};
+use std::{future::Future, sync::OnceLock, time::Duration};
+use tokio::{runtime::Runtime, time::sleep};
 /// Utility to get the Tokio Runtime from Python
 #[inline]
 pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
@@ -45,14 +46,31 @@ pub(crate) fn get_global_ctx() -> &'static SessionContext {
     CTX.get_or_init(SessionContext::new)
 }
 
-/// Utility to collect rust futures with GIL released
-pub fn wait_for_future<F>(py: Python, f: F) -> F::Output
+/// Utility to collect rust futures with GIL released and respond to
+/// Python interrupts such as ``KeyboardInterrupt``. If a signal is
+/// received while the future is running, the future is aborted and the
+/// corresponding Python exception is raised.
+pub fn wait_for_future<F>(py: Python, fut: F) -> PyResult<F::Output>
 where
     F: Future + Send,
     F::Output: Send,
 {
     let runtime: &Runtime = &get_tokio_runtime().0;
-    py.allow_threads(|| runtime.block_on(f))
+    const INTERVAL_CHECK_SIGNALS: Duration = Duration::from_millis(1_000);
+
+    py.allow_threads(|| {
+        runtime.block_on(async {
+            tokio::pin!(fut);
+            loop {
+                tokio::select! {
+                    res = &mut fut => break Ok(res),
+                    _ = sleep(INTERVAL_CHECK_SIGNALS) => {
+                        Python::with_gil(|py| py.check_signals())?;
+                    }
+                }
+            }
+        })
+    })
 }
 
 pub(crate) fn parse_volatility(value: &str) -> PyDataFusionResult<Volatility> {
@@ -87,3 +105,19 @@ pub(crate) fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyRe
 
     Ok(())
 }
+
+pub(crate) fn py_obj_to_scalar_value(py: Python, obj: PyObject) -> PyResult<ScalarValue> {
+    // convert Python object to PyScalarValue to ScalarValue
+
+    let pa = py.import("pyarrow")?;
+
+    // Convert Python object to PyArrow scalar
+    let scalar = pa.call_method1("scalar", (obj,))?;
+
+    // Convert PyArrow scalar to PyScalarValue
+    let py_scalar = PyScalarValue::extract_bound(scalar.as_ref())
+        .map_err(|e| PyValueError::new_err(format!("Failed to extract PyScalarValue: {}", e)))?;
+
+    // Convert PyScalarValue to ScalarValue
+    Ok(py_scalar.into())
+}

From af791f26b79490732de4f7c3ef5c7035afdb9330 Mon Sep 17 00:00:00 2001
From: nuno-faria <nunofpfaria@gmail.com>
Date: Fri, 20 Jun 2025 13:50:23 +0100
Subject: [PATCH 5/5] Fix ruff errors

---
 python/tests/test_dataframe.py | 70 +++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 404ce9545..3c9b97f23 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -27,8 +27,8 @@
 import pytest
 from datafusion import (
     DataFrame,
-    ParquetWriterOptions,
     ParquetColumnOptions,
+    ParquetWriterOptions,
     SessionContext,
     WindowFrame,
     column,
@@ -1668,7 +1668,9 @@ def test_write_parquet_with_options_compression(df, tmp_path, compression):
     import re
 
     path = tmp_path
-    df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
+    df.write_parquet_with_options(
+        str(path), ParquetWriterOptions(compression=compression)
+    )
 
     # test that the actual compression scheme is the one written
     for _root, _dirs, files in os.walk(path):
@@ -1695,7 +1697,9 @@ def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compre
     path = tmp_path
 
     with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
-        df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
+        df.write_parquet_with_options(
+            str(path), ParquetWriterOptions(compression=compression)
+        )
 
 
 @pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
@@ -1703,20 +1707,26 @@ def test_write_parquet_with_options_invalid_compression(df, tmp_path, compressio
     path = tmp_path
 
     with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
-        df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
+        df.write_parquet_with_options(
+            str(path), ParquetWriterOptions(compression=compression)
+        )
 
 
 @pytest.mark.parametrize(
     ("writer_version", "format_version"),
     [("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
 )
-def test_write_parquet_with_options_writer_version(df, tmp_path, writer_version, format_version):
+def test_write_parquet_with_options_writer_version(
+    df, tmp_path, writer_version, format_version
+):
     """Test the Parquet writer version. Note that writer_version=2.0 results in
     format_version=2.6"""
     if writer_version is None:
         df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
     else:
-        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
+        df.write_parquet_with_options(
+            tmp_path, ParquetWriterOptions(writer_version=writer_version)
+        )
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1730,13 +1740,19 @@ def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_ve
     with pytest.raises(
         Exception, match="Unknown or unsupported parquet writer version"
     ):
-        df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
+        df.write_parquet_with_options(
+            tmp_path, ParquetWriterOptions(writer_version=writer_version)
+        )
 
 
 @pytest.mark.parametrize("dictionary_enabled", [True, False, None])
-def test_write_parquet_with_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
+def test_write_parquet_with_options_dictionary_enabled(
+    df, tmp_path, dictionary_enabled
+):
     """Test enabling/disabling the dictionaries in Parquet."""
-    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
+    df.write_parquet_with_options(
+        tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled)
+    )
     # by default, the dictionary is enabled, so None results in True
     result = dictionary_enabled if dictionary_enabled is not None else True
 
@@ -1758,7 +1774,9 @@ def test_write_parquet_with_options_statistics_enabled(
 ):
     """Test configuring the statistics in Parquet. In pyarrow we can only check for
     column-level statistics, so "page" and "chunk" are tested in the same way."""
-    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
+    df.write_parquet_with_options(
+        tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled)
+    )
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1773,11 +1791,15 @@ def test_write_parquet_with_options_statistics_enabled(
 
 
 @pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
-def test_write_parquet_with_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
+def test_write_parquet_with_options_max_row_group_size(
+    large_df, tmp_path, max_row_group_size
+):
     """Test configuring the max number of rows per group in Parquet. These test cases
     guarantee that the number of rows for each row group is max_row_group_size, given
     the total number of rows is a multiple of max_row_group_size."""
-    large_df.write_parquet_with_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
+    large_df.write_parquet_with_options(
+        tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size)
+    )
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1812,7 +1834,10 @@ def test_write_parquet_with_options_statistics_truncate_length(
         "b": ["a_smaller", "m_smaller", "z_smaller"],
     }
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
+    df.write_parquet_with_options(
+        tmp_path,
+        ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length),
+    )
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1870,11 +1895,13 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res
             data["float"] = [1.01, 2.02, 3.03]
         elif data_type == "str":
             data["str"] = ["a", "b", "c"]
-        elif  data_type == "bool":
+        elif data_type == "bool":
             data["bool"] = [True, False, True]
 
     df = ctx.from_arrow(pa.record_batch(data))
-    df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
+    df.write_parquet_with_options(
+        tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False)
+    )
 
     for file in tmp_path.rglob("*.parquet"):
         parquet = pq.ParquetFile(file)
@@ -1901,7 +1928,9 @@ def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding):
 
 
 @pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
-def test_write_parquet_with_options_dictionary_encoding_fallback(df, tmp_path, encoding):
+def test_write_parquet_with_options_dictionary_encoding_fallback(
+    df, tmp_path, encoding
+):
     """Test that the dictionary encoding cannot be used as fallback in Parquet."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
     with pytest.raises(
@@ -1918,7 +1947,9 @@ def test_write_parquet_with_options_bloom_filter(df, tmp_path):
     path_bloom_filter = tmp_path / "2"
 
     df.write_parquet_with_options(path_no_bloom_filter, ParquetWriterOptions())
-    df.write_parquet_with_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
+    df.write_parquet_with_options(
+        path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True)
+    )
 
     size_no_bloom_filter = 0
     for file in path_no_bloom_filter.rglob("*.parquet"):
@@ -1989,8 +2020,9 @@ def test_write_parquet_with_options_column_options(df, tmp_path):
     df = ctx.from_arrow(pa.record_batch(data))
     df.write_parquet_with_options(
         tmp_path,
-        ParquetWriterOptions(compression="brotli(8)",
-        column_specific_options=column_specific_options),
+        ParquetWriterOptions(
+            compression="brotli(8)", column_specific_options=column_specific_options
+        ),
     )
 
     for file in tmp_path.rglob("*.parquet"):