Skip to content

Commit ace87ed

Browse files
authored
Inject columns of proper length for missing fields (#1767)
Fixes #1766.
1 parent f459662 commit ace87ed

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1438,7 +1438,8 @@ def _task_to_record_batches(
14381438
for name, value in projected_missing_fields.items():
14391439
index = result_batch.schema.get_field_index(name)
14401440
if index != -1:
1441-
result_batch = result_batch.set_column(index, name, [value])
1441+
arr = pa.repeat(value, result_batch.num_rows)
1442+
result_batch = result_batch.set_column(index, name, arr)
14421443

14431444
yield result_batch
14441445

tests/io/test_pyarrow.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ def test_identity_transform_column_projection(tmp_path: str, catalog: InMemoryCa
11531153
properties={TableProperties.DEFAULT_NAME_MAPPING: create_mapping_from_schema(schema).model_dump_json()},
11541154
)
11551155

1156-
file_data = pa.array(["foo"], type=pa.string())
1156+
file_data = pa.array(["foo", "bar", "baz"], type=pa.string())
11571157
file_loc = f"{tmp_path}/test.parquet"
11581158
pq.write_table(pa.table([file_data], names=["other_field"]), file_loc)
11591159

@@ -1181,14 +1181,13 @@ def test_identity_transform_column_projection(tmp_path: str, catalog: InMemoryCa
11811181
with transaction.update_snapshot().overwrite() as update:
11821182
update.append_data_file(unpartitioned_file)
11831183

1184-
assert (
1185-
str(table.scan().to_arrow())
1186-
== """pyarrow.Table
1187-
other_field: large_string
1188-
partition_id: int64
1189-
----
1190-
other_field: [["foo"]]
1191-
partition_id: [[1]]"""
1184+
schema = pa.schema([("other_field", pa.large_string()), ("partition_id", pa.int64())])
1185+
assert table.scan().to_arrow() == pa.table(
1186+
{
1187+
"other_field": ["foo", "bar", "baz"],
1188+
"partition_id": [1, 1, 1],
1189+
},
1190+
schema=schema,
11921191
)
11931192

11941193

0 commit comments

Comments
 (0)