4747import boto3
4848import pytest
4949from moto import mock_aws
50+ from pydantic_core import to_json
5051
5152from pyiceberg .catalog import Catalog , load_catalog
5253from pyiceberg .catalog .noop import NoopCatalog
6768)
6869from pyiceberg .io .fsspec import FsspecFileIO
6970from pyiceberg .manifest import DataFile , FileFormat
71+ from pyiceberg .partitioning import PartitionField , PartitionSpec
7072from pyiceberg .schema import Accessor , Schema
7173from pyiceberg .serializers import ToOutputFile
7274from pyiceberg .table import FileScanTask , Table
7375from pyiceberg .table .metadata import TableMetadataV1 , TableMetadataV2
76+ from pyiceberg .transforms import DayTransform , IdentityTransform
7477from pyiceberg .types import (
7578 BinaryType ,
7679 BooleanType ,
@@ -1255,8 +1258,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
12551258 {"key" : 15 , "value" : 0 },
12561259 ],
12571260 "lower_bounds" : [
1258- {"key" : 2 , "value" : b"2020-04-01 00:00 " },
1259- {"key" : 3 , "value" : b"2020-04-01 00:12 " },
1261+ {"key" : 2 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1262+ {"key" : 3 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
12601263 {"key" : 7 , "value" : b"\x03 \x00 \x00 \x00 " },
12611264 {"key" : 8 , "value" : b"\x01 \x00 \x00 \x00 " },
12621265 {"key" : 10 , "value" : b"\xf6 (\\ \x8f \xc2 \x05 S\xc0 " },
@@ -1270,8 +1273,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
12701273 {"key" : 19 , "value" : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 \xc0 " },
12711274 ],
12721275 "upper_bounds" : [
1273- {"key" : 2 , "value" : b"2020-04-30 23:5: " },
1274- {"key" : 3 , "value" : b"2020-05-01 00:41 " },
1276+ {"key" : 2 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1277+ {"key" : 3 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
12751278 {"key" : 7 , "value" : b"\t \x01 \x00 \x00 " },
12761279 {"key" : 8 , "value" : b"\t \x01 \x00 \x00 " },
12771280 {"key" : 10 , "value" : b"\xcd \xcc \xcc \xcc \xcc ,_@" },
@@ -1376,8 +1379,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
13761379 ],
13771380 "lower_bounds" : [
13781381 {"key" : 1 , "value" : b"\x01 \x00 \x00 \x00 " },
1379- {"key" : 2 , "value" : b"2020-04-01 00:00 " },
1380- {"key" : 3 , "value" : b"2020-04-01 00:03 " },
1382+ {"key" : 2 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1383+ {"key" : 3 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
13811384 {"key" : 4 , "value" : b"\x00 \x00 \x00 \x00 " },
13821385 {"key" : 5 , "value" : b"\x01 \x00 \x00 \x00 " },
13831386 {"key" : 6 , "value" : b"N" },
@@ -1396,8 +1399,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
13961399 ],
13971400 "upper_bounds" : [
13981401 {"key" : 1 , "value" : b"\x01 \x00 \x00 \x00 " },
1399- {"key" : 2 , "value" : b"2020-04-30 23:5: " },
1400- {"key" : 3 , "value" : b"2020-05-01 00:1: " },
1402+ {"key" : 2 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1403+ {"key" : 3 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
14011404 {"key" : 4 , "value" : b"\x06 \x00 \x00 \x00 " },
14021405 {"key" : 5 , "value" : b"c\x00 \x00 \x00 " },
14031406 {"key" : 6 , "value" : b"Y" },
@@ -1858,15 +1861,40 @@ def simple_map() -> MapType:
18581861
18591862
18601863@pytest .fixture (scope = "session" )
1861- def generated_manifest_entry_file (avro_schema_manifest_entry : Dict [str , Any ]) -> Generator [str , None , None ]:
1864+ def test_schema () -> Schema :
1865+ return Schema (
1866+ NestedField (1 , "VendorID" , IntegerType (), False ), NestedField (2 , "tpep_pickup_datetime" , TimestampType (), False )
1867+ )
1868+
1869+
1870+ @pytest .fixture (scope = "session" )
1871+ def test_partition_spec () -> Schema :
1872+ return PartitionSpec (
1873+ PartitionField (1 , 1000 , IdentityTransform (), "VendorID" ),
1874+ PartitionField (2 , 1001 , DayTransform (), "tpep_pickup_day" ),
1875+ )
1876+
1877+
1878+ @pytest .fixture (scope = "session" )
1879+ def generated_manifest_entry_file (
1880+ avro_schema_manifest_entry : Dict [str , Any ], test_schema : Schema , test_partition_spec : PartitionSpec
1881+ ) -> Generator [str , None , None ]:
18621882 from fastavro import parse_schema , writer
18631883
18641884 parsed_schema = parse_schema (avro_schema_manifest_entry )
18651885
18661886 with TemporaryDirectory () as tmpdir :
18671887 tmp_avro_file = tmpdir + "/manifest.avro"
18681888 with open (tmp_avro_file , "wb" ) as out :
1869- writer (out , parsed_schema , manifest_entry_records )
1889+ writer (
1890+ out ,
1891+ parsed_schema ,
1892+ manifest_entry_records ,
1893+ metadata = {
1894+ "schema" : test_schema .model_dump_json (),
1895+ "partition-spec" : to_json (test_partition_spec .fields ).decode ("utf-8" ),
1896+ },
1897+ )
18701898 yield tmp_avro_file
18711899
18721900
0 commit comments