Skip to content

Commit 6c1e7cf

Browse files
authored
field-id in NameMapping should be optional (#1426)
1 parent 392c9ce commit 6c1e7cf

File tree

2 files changed

+45
-12
lines changed

2 files changed

+45
-12
lines changed

pyiceberg/table/name_mapping.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838

3939
class MappedField(IcebergBaseModel):
40-
field_id: int = Field(alias="field-id")
40+
field_id: Optional[int] = Field(alias="field-id", default=None)
4141
names: List[str] = conlist(str)
4242
fields: List[MappedField] = Field(default_factory=list)
4343

@@ -49,12 +49,12 @@ def convert_null_to_empty_List(cls, v: Any) -> Any:
4949
@model_serializer
5050
def ser_model(self) -> Dict[str, Any]:
5151
"""Set custom serializer to leave out the field when it is empty."""
52-
fields = {"fields": self.fields} if len(self.fields) > 0 else {}
53-
return {
54-
"field-id": self.field_id,
55-
"names": self.names,
56-
**fields,
57-
}
52+
serialized: Dict[str, Any] = {"names": self.names}
53+
if self.field_id is not None:
54+
serialized["field-id"] = self.field_id
55+
if len(self.fields) > 0:
56+
serialized["fields"] = self.fields
57+
return serialized
5858

5959
def __len__(self) -> int:
6060
"""Return the number of fields."""
@@ -65,7 +65,8 @@ def __str__(self) -> str:
6565
# Otherwise the UTs fail because the order of the set can change
6666
fields_str = ", ".join([str(e) for e in self.fields]) or ""
6767
fields_str = " " + fields_str if fields_str else ""
68-
return "([" + ", ".join(self.names) + "] -> " + (str(self.field_id) or "?") + fields_str + ")"
68+
field_id = "?" if self.field_id is None else (str(self.field_id) or "?")
69+
return "([" + ", ".join(self.names) + "] -> " + field_id + fields_str + ")"
6970

7071

7172
class NameMapping(IcebergRootModel[List[MappedField]]):
@@ -232,7 +233,9 @@ def mapping(self, nm: NameMapping, field_results: List[MappedField]) -> List[Map
232233

233234
def fields(self, struct: List[MappedField], field_results: List[MappedField]) -> List[MappedField]:
234235
reassignments: Dict[str, int] = {
235-
update.name: update.field_id for f in field_results if (update := self._updates.get(f.field_id))
236+
update.name: update.field_id
237+
for f in field_results
238+
if f.field_id is not None and (update := self._updates.get(f.field_id))
236239
}
237240
return [
238241
updated_field
@@ -241,6 +244,8 @@ def fields(self, struct: List[MappedField], field_results: List[MappedField]) ->
241244
]
242245

243246
def field(self, field: MappedField, field_result: List[MappedField]) -> MappedField:
247+
if field.field_id is None:
248+
return field
244249
field_names = field.names
245250
if (update := self._updates.get(field.field_id)) is not None and update.name not in field_names:
246251
field_names.append(update.name)
@@ -333,8 +338,8 @@ def struct(self, struct: StructType, struct_partner: Optional[MappedField], fiel
333338
return StructType(*field_results)
334339

335340
def field(self, field: NestedField, field_partner: Optional[MappedField], field_result: IcebergType) -> IcebergType:
336-
if field_partner is None:
337-
raise ValueError(f"Field missing from NameMapping: {'.'.join(self.current_path)}")
341+
if field_partner is None or field_partner.field_id is None:
342+
raise ValueError(f"Field or field ID missing from NameMapping: {'.'.join(self.current_path)}")
338343

339344
return NestedField(
340345
field_id=field_partner.field_id,

tests/table/test_name_mapping.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,21 @@ def test_json_mapped_field_no_names_deserialization() -> None:
109109
assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields)
110110

111111

112+
def test_json_mapped_field_no_field_id_deserialization() -> None:
113+
mapped_field = """{
114+
"names": []
115+
}
116+
"""
117+
assert MappedField(field_id=None, names=[]) == MappedField.model_validate_json(mapped_field)
118+
119+
mapped_field_with_null_fields = """{
120+
"names": [],
121+
"fields": null
122+
}
123+
"""
124+
assert MappedField(names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields)
125+
126+
112127
def test_json_name_mapping_deserialization() -> None:
113128
name_mapping = """
114129
[
@@ -164,10 +179,23 @@ def test_json_name_mapping_deserialization() -> None:
164179
])
165180

166181

182+
def test_json_mapped_field_no_field_id_serialization() -> None:
183+
table_name_mapping_nested_no_field_id = NameMapping([
184+
MappedField(field_id=1, names=["foo"]),
185+
MappedField(field_id=None, names=["bar"]),
186+
MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]),
187+
])
188+
189+
assert (
190+
table_name_mapping_nested_no_field_id.model_dump_json()
191+
== """[{"names":["foo"],"field-id":1},{"names":["bar"]},{"names":["qux"],"field-id":2,"fields":[{"names":["element"]}]}]"""
192+
)
193+
194+
167195
def test_json_serialization(table_name_mapping_nested: NameMapping) -> None:
168196
assert (
169197
table_name_mapping_nested.model_dump_json()
170-
== """[{"field-id":1,"names":["foo"]},{"field-id":2,"names":["bar"]},{"field-id":3,"names":["baz"]},{"field-id":4,"names":["qux"],"fields":[{"field-id":5,"names":["element"]}]},{"field-id":6,"names":["quux"],"fields":[{"field-id":7,"names":["key"]},{"field-id":8,"names":["value"],"fields":[{"field-id":9,"names":["key"]},{"field-id":10,"names":["value"]}]}]},{"field-id":11,"names":["location"],"fields":[{"field-id":12,"names":["element"],"fields":[{"field-id":13,"names":["latitude"]},{"field-id":14,"names":["longitude"]}]}]},{"field-id":15,"names":["person"],"fields":[{"field-id":16,"names":["name"]},{"field-id":17,"names":["age"]}]}]"""
198+
== """[{"names":["foo"],"field-id":1},{"names":["bar"],"field-id":2},{"names":["baz"],"field-id":3},{"names":["qux"],"field-id":4,"fields":[{"names":["element"],"field-id":5}]},{"names":["quux"],"field-id":6,"fields":[{"names":["key"],"field-id":7},{"names":["value"],"field-id":8,"fields":[{"names":["key"],"field-id":9},{"names":["value"],"field-id":10}]}]},{"names":["location"],"field-id":11,"fields":[{"names":["element"],"field-id":12,"fields":[{"names":["latitude"],"field-id":13},{"names":["longitude"],"field-id":14}]}]},{"names":["person"],"field-id":15,"fields":[{"names":["name"],"field-id":16},{"names":["age"],"field-id":17}]}]"""
171199
)
172200

173201

0 commit comments

Comments
 (0)