Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
7249542
WIP
Fokko May 13, 2025
0260aa4
Merge branch 'main' of github.com:apache/iceberg-rust
Fokko May 13, 2025
cff3d2b
Expose Avro parsers in Python
Fokko May 14, 2025
ee6aeda
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko May 14, 2025
fb44a0a
Cleanup
Fokko May 14, 2025
9bc9baf
Thanks Scott!
Fokko May 14, 2025
24b02e3
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko May 15, 2025
d02aff8
Merge branch 'main' into fd-avro-pyiceberg
Fokko May 29, 2025
7c63887
Less is more
Fokko Jun 1, 2025
8ca7e90
Cleanup
Fokko Jun 1, 2025
2a14693
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Jun 3, 2025
6b62d04
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Jun 12, 2025
63918be
WIP
Fokko Jun 13, 2025
5e6bb10
fix: literal conversion to py
roeap Jun 26, 2025
2e62d7d
Merge pull request #1 from roeap/fix/pyo3-converty
Fokko Jun 26, 2025
846561a
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Jun 26, 2025
820b895
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Jul 16, 2025
9fc7ae5
Merge remote-tracking branch 'origin' into fd-avro-pyiceberg
kevinjqliu Jul 20, 2025
36aed09
fix clippy
kevinjqliu Jul 20, 2025
b0805a9
add unit test for `read_manifest_entries`
kevinjqliu Jul 21, 2025
bd904c5
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Jul 23, 2025
7a33eaf
WIP
Fokko Jul 23, 2025
c3ccbc9
WIP
Fokko Jul 23, 2025
324e61f
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Aug 7, 2025
4b0157e
MOAR
Fokko Aug 7, 2025
4490632
WIP
Fokko Aug 7, 2025
aed1078
Merge branch 'fd-avro-pyiceberg' of github.com:Fokko/iceberg-rust int…
Fokko Aug 7, 2025
9b3094b
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Sep 1, 2025
a025533
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Sep 1, 2025
9dec734
[WIP] Allow V2 reader to read v1 manifests
emkornfield Aug 28, 2025
6ca3f1e
defaults belong in serde
emkornfield Aug 29, 2025
73bae07
remove whitespace
emkornfield Aug 29, 2025
df15c22
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Sep 1, 2025
3c661f4
Cleanup
Fokko Sep 1, 2025
248cc4b
Cleanup
Fokko Sep 1, 2025
589f480
Move PyDataFile to a separate file
Fokko Sep 1, 2025
66019f3
Bump Avro because of better error message
Fokko Sep 1, 2025
19fe632
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Sep 3, 2025
53b833f
WIP
Fokko Sep 3, 2025
d52148d
Make teh linter happy
Fokko Sep 3, 2025
350cbc1
Merge branch 'main' of github.com:apache/iceberg-rust into fd-avro-py…
Fokko Sep 8, 2025
5549a1f
Merge branch 'main' into fd-avro-pyiceberg
Fokko Sep 15, 2025
d5f671c
Fix faulty testconf
Fokko Sep 16, 2025
1153a2a
Merge branch 'fd-avro-pyiceberg' of github.com:Fokko/iceberg-rust int…
Fokko Sep 16, 2025
2536d0c
Make ruff happy
Fokko Sep 16, 2025
628c782
Yikes!
Fokko Sep 16, 2025
7f6e579
Make tests happy
Fokko Sep 16, 2025
514e8f1
pass path with file:// scheme
kevinjqliu Sep 17, 2025
c2be1fc
Merge branch 'main' into fd-avro-pyiceberg
Fokko Sep 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ include = [
ignore = ["F403", "F405"]

[tool.hatch.envs.dev]
dependencies = ["maturin>=1.0,<2.0", "pytest>=8.3.2", "datafusion==45.*", "pyiceberg[sql-sqlite,pyarrow]>=0.9.1"]
dependencies = ["maturin>=1.0,<2.0", "pytest>=8.3.2", "datafusion==45.*", "pyiceberg[sql-sqlite,pyarrow]>=0.10.0", "fastavro>=1.11.1"]

[tool.hatch.envs.dev.scripts]
build = "maturin build --out dist --sdist"
Expand Down
165 changes: 165 additions & 0 deletions bindings/python/src/data_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::collections::HashMap;

use iceberg::spec::{DataFile, DataFileFormat, PrimitiveLiteral};
use pyo3::IntoPyObjectExt;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::PyBytes;

#[pyclass()]
pub struct PyPrimitiveLiteral {
inner: PrimitiveLiteral,
}

#[pymethods]
impl PyPrimitiveLiteral {
pub fn value(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
match &self.inner {
PrimitiveLiteral::Boolean(v) => v.into_py_any(py),
PrimitiveLiteral::Int(v) => v.into_py_any(py),
PrimitiveLiteral::Long(v) => v.into_py_any(py),
PrimitiveLiteral::Float(v) => v.0.into_py_any(py), // unwrap OrderedFloat
PrimitiveLiteral::Double(v) => v.0.into_py_any(py),
PrimitiveLiteral::String(v) => v.into_py_any(py),
PrimitiveLiteral::Binary(v) => PyBytes::new(py, v).into_py_any(py),
PrimitiveLiteral::Int128(v) => v.into_py_any(py), // Python handles big ints
PrimitiveLiteral::UInt128(v) => v.into_py_any(py),
PrimitiveLiteral::AboveMax => Err(PyValueError::new_err("AboveMax is not supported")),
PrimitiveLiteral::BelowMin => Err(PyValueError::new_err("BelowMin is not supported")),
}
}
}

#[pyclass]
pub struct PyDataFile {
inner: DataFile,
}

#[pymethods]
impl PyDataFile {
#[getter]
fn content(&self) -> i32 {
self.inner.content_type() as i32
}

#[getter]
fn file_path(&self) -> &str {
self.inner.file_path()
}

#[getter]
fn file_format(&self) -> &str {
match self.inner.file_format() {
DataFileFormat::Avro => "avro",
DataFileFormat::Orc => "orc",
DataFileFormat::Parquet => "parquet",
DataFileFormat::Puffin => "puffin",
}
}

#[getter]
fn partition(&self) -> Vec<Option<PyPrimitiveLiteral>> {
self.inner
.partition()
.iter()
.map(|lit| {
lit.and_then(|l| {
Some(PyPrimitiveLiteral {
inner: l.as_primitive_literal()?,
})
})
})
.collect()
}

#[getter]
fn record_count(&self) -> u64 {
self.inner.record_count()
}

#[getter]
fn file_size_in_bytes(&self) -> u64 {
self.inner.file_size_in_bytes()
}

#[getter]
fn column_sizes(&self) -> &HashMap<i32, u64> {
self.inner.column_sizes()
}

#[getter]
fn value_counts(&self) -> &HashMap<i32, u64> {
self.inner.value_counts()
}

#[getter]
fn null_value_counts(&self) -> &HashMap<i32, u64> {
self.inner.null_value_counts()
}

#[getter]
fn nan_value_counts(&self) -> &HashMap<i32, u64> {
self.inner.nan_value_counts()
}

#[getter]
fn upper_bounds(&self) -> HashMap<i32, Vec<u8>> {
self.inner
.upper_bounds()
.iter()
.map(|(k, v)| (*k, v.to_bytes().unwrap().to_vec()))
.collect()
}

#[getter]
fn lower_bounds(&self) -> HashMap<i32, Vec<u8>> {
self.inner
.lower_bounds()
.iter()
.map(|(k, v)| (*k, v.to_bytes().unwrap().to_vec()))
.collect()
}

#[getter]
fn key_metadata(&self) -> Option<&[u8]> {
self.inner.key_metadata()
}

#[getter]
fn split_offsets(&self) -> &[i64] {
self.inner.split_offsets()
}

#[getter]
fn equality_ids(&self) -> &[i32] {
self.inner.equality_ids()
}

#[getter]
fn sort_order_id(&self) -> Option<i32> {
self.inner.sort_order_id()
}
}

impl PyDataFile {
pub fn new(inner: DataFile) -> Self {
Self { inner }
}
}
3 changes: 3 additions & 0 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@

use pyo3::prelude::*;

mod data_file;
mod datafusion_table_provider;
mod error;
mod manifest;
mod runtime;
mod transform;

#[pymodule]
fn pyiceberg_core_rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
datafusion_table_provider::register_module(py, m)?;
transform::register_module(py, m)?;
manifest::register_module(py, m)?;
Ok(())
}
Loading
Loading