Skip to content

Commit 0a199a3

Browse files
committed
clean up
1 parent 0e57e95 commit 0a199a3

File tree

6 files changed

+13
-20
lines changed

6 files changed

+13
-20
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ test-integration:
4040
docker-compose -f dev/docker-compose-integration.yml kill
4141
docker-compose -f dev/docker-compose-integration.yml rm -f
4242
docker-compose -f dev/docker-compose-integration.yml up -d
43-
sleep 5
43+
sleep 10
4444
docker-compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
4545
poetry run pytest tests/ -v -m integration ${PYTEST_ARGS}
4646

pyiceberg/avro/file.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ def _write_header(self) -> None:
276276
def write_block(self, objects: List[D]) -> None:
277277
in_memory = io.BytesIO()
278278
block_content_encoder = BinaryEncoder(output_stream=in_memory)
279-
280279
for obj in objects:
281280
self.writer.write(block_content_encoder, obj)
282281
block_content = in_memory.getvalue()

pyiceberg/io/pyarrow.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1719,10 +1719,6 @@ def fill_parquet_file_metadata(
17191719
data_file.split_offsets = split_offsets
17201720

17211721

1722-
# write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
1723-
# write_file(io = table.io, table_metadata = table.metadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
1724-
1725-
17261722
def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
17271723
for task in tasks:
17281724
parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)

pyiceberg/table/__init__.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ def property_as_int(properties: Dict[str, str], property_name: str, default: Opt
232232
return default
233233

234234

235-
# to do
236235
class PartitionProjector:
237236
def __init__(
238237
self,
@@ -2528,8 +2527,8 @@ def _write_filtered_manifest(self, file_num: int = 1) -> List[ManifestFile]:
25282527
) as writer:
25292528
for manifest_entry in filtered_entries:
25302529
writer.add_entry(manifest_entry)
2531-
m = writer.to_manifest_file()
2532-
return [m]
2530+
2531+
return [writer.to_manifest_file()]
25332532

25342533
def _write_added_manifest(self) -> List[ManifestFile]:
25352534
if self._added_data_files:
@@ -2560,11 +2559,11 @@ def _write_added_manifest(self) -> List[ManifestFile]:
25602559
def _manifests(self) -> List[ManifestFile]:
25612560
executor = ExecutorFactory.get_or_create()
25622561

2563-
added_manifests = executor.submit(self._write_added_manifest).result()
2564-
filtered_manifests = executor.submit(self._write_filtered_manifest).result()
2565-
existing_manifests = executor.submit(self._existing_manifests).result()
2562+
added_manifests = executor.submit(self._write_added_manifest)
2563+
filtered_manifests = executor.submit(self._write_filtered_manifest)
2564+
existing_manifests = executor.submit(self._existing_manifests)
25662565

2567-
return added_manifests + filtered_manifests + existing_manifests
2566+
return added_manifests.result() + filtered_manifests.result() + existing_manifests.result()
25682567

25692568
def _summary(self) -> Summary:
25702569
ssc = SnapshotSummaryCollector()
@@ -2573,7 +2572,7 @@ def _summary(self) -> Summary:
25732572
ssc.add_file(data_file=data_file)
25742573

25752574
# Only delete files caused by partial overwrite.
2576-
# Full table overwrite uses previous snapshot to update in a more efficient way.
2575+
# Full table overwrite uses previous snapshot summary to update in a more efficient way.
25772576
if self._deleted_data_files is not None and isinstance(self._deleted_data_files, ExplicitlyDeletedDataFiles):
25782577
for data_file in self._deleted_data_files.deleted_files:
25792578
ssc.remove_file(data_file=data_file)
@@ -2584,10 +2583,9 @@ def _summary(self) -> Summary:
25842583
else None
25852584
)
25862585

2587-
summary_collector_to_fill = ssc.build()
2588-
summary_to_fill = Summary(operation=self._operation, **summary_collector_to_fill)
2586+
summary_to_update = Summary(operation=self._operation, **ssc.build())
25892587
return update_snapshot_summaries(
2590-
summary=summary_to_fill,
2588+
summary=summary_to_update,
25912589
previous_summary=previous_snapshot.summary if previous_snapshot is not None else None,
25922590
truncate_full_table=isinstance(self._deleted_data_files, DeleteAllDataFiles),
25932591
)
@@ -2700,7 +2698,6 @@ def group_by_partition_scheme(
27002698
f"Not all transforms are supported, get: {[transform in supported for transform in iceberg_table_metadata.spec().fields]}."
27012699
)
27022700

2703-
# only works for identity
27042701
sort_options = _get_partition_sort_order(partition_columns, reverse=False)
27052702
sorted_arrow_table = arrow_table.sort_by(sorting=sort_options['sort_keys'], null_placement=sort_options['null_placement'])
27062703
return sorted_arrow_table

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,4 +364,5 @@ def _update_totals(total_property: str, added_property: str, removed_property: s
364364
added_property=ADDED_EQUALITY_DELETES,
365365
removed_property=REMOVED_EQUALITY_DELETES,
366366
)
367+
367368
return summary

tests/integration/test_partitioned_writes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def table_v1_with_null_partitioned(session_catalog: Catalog, arrow_table_with_nu
170170
"timestamptz",
171171
"timestamp",
172172
"binary",
173-
] # [, "date", "timestamptz", "timestamp", "binary", "fixed"]
173+
]
174174
for partition_col in partition_cols:
175175
identifier = f"default.arrow_table_v1_with_null_partitioned_on_col_{partition_col}"
176176

@@ -555,7 +555,7 @@ def test_data_files_with_table_partitioned_with_null(
555555
# fourth operation of append manifest list abandons M3 since it has no existing or added entries and keeps M4 and added M5 with 3 added files
556556
# fifth operation of static overwrite's manifest list is linked to one filtered manifest M7 which filters and merges M5 and M6 where each has 1 entrys are deleted (int=1 matching the filter) and 2 entries marked as existed, this operation
557557
# also links to M6 which adds 3 entries.
558-
# so we have flattened list of [[M1], [M1, M2], [M3, M4], [M4, M5], [M6, M7, M8]]
558+
# so we have flattened list of [[M1], [M1, M2], [M3, M4], [M4, M5], [M6, M7]]
559559
# where: add exist delete added_by
560560
# M1 3 0 0 S1
561561
# M2 3 0 0 S2

0 commit comments

Comments
 (0)