-
Notifications
You must be signed in to change notification settings - Fork 392
fix: field id in name mapping should be optional #1426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,7 +37,7 @@ | |
|
|
||
|
|
||
| class MappedField(IcebergBaseModel): | ||
| field_id: int = Field(alias="field-id") | ||
| field_id: Optional[int] = Field(alias="field-id", default=None) | ||
| names: List[str] = conlist(str) | ||
| fields: List[MappedField] = Field(default_factory=list) | ||
|
|
||
|
|
@@ -49,12 +49,12 @@ def convert_null_to_empty_List(cls, v: Any) -> Any: | |
| @model_serializer | ||
| def ser_model(self) -> Dict[str, Any]: | ||
| """Set custom serializer to leave out the field when it is empty.""" | ||
| fields = {"fields": self.fields} if len(self.fields) > 0 else {} | ||
| return { | ||
| "field-id": self.field_id, | ||
| "names": self.names, | ||
| **fields, | ||
| } | ||
| serialized: Dict[str, Any] = {"names": self.names} | ||
| if self.field_id is not None: | ||
| serialized["field-id"] = self.field_id | ||
| if len(self.fields) > 0: | ||
| serialized["fields"] = self.fields | ||
| return serialized | ||
|
|
||
| def __len__(self) -> int: | ||
| """Return the number of fields.""" | ||
|
|
@@ -65,7 +65,8 @@ def __str__(self) -> str: | |
| # Otherwise the UTs fail because the order of the set can change | ||
| fields_str = ", ".join([str(e) for e in self.fields]) or "" | ||
| fields_str = " " + fields_str if fields_str else "" | ||
| return "([" + ", ".join(self.names) + "] -> " + (str(self.field_id) or "?") + fields_str + ")" | ||
| field_id = "?" if self.field_id is None else (str(self.field_id) or "?") | ||
| return "([" + ", ".join(self.names) + "] -> " + field_id + fields_str + ")" | ||
|
|
||
|
|
||
| class NameMapping(IcebergRootModel[List[MappedField]]): | ||
|
|
@@ -232,7 +233,9 @@ def mapping(self, nm: NameMapping, field_results: List[MappedField]) -> List[Map | |
|
|
||
| def fields(self, struct: List[MappedField], field_results: List[MappedField]) -> List[MappedField]: | ||
| reassignments: Dict[str, int] = { | ||
| update.name: update.field_id for f in field_results if (update := self._updates.get(f.field_id)) | ||
| update.name: update.field_id | ||
| for f in field_results | ||
| if f.field_id is not None and (update := self._updates.get(f.field_id)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the rationale behind this change? Should we look at all the other places where
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, is this change also due to the type checker?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, also changing the API of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
| return [ | ||
| updated_field | ||
|
|
@@ -241,6 +244,8 @@ def fields(self, struct: List[MappedField], field_results: List[MappedField]) -> | |
| ] | ||
|
|
||
| def field(self, field: MappedField, field_result: List[MappedField]) -> MappedField: | ||
| if field.field_id is None: | ||
| return field | ||
| field_names = field.names | ||
| if (update := self._updates.get(field.field_id)) is not None and update.name not in field_names: | ||
| field_names.append(update.name) | ||
|
|
@@ -333,8 +338,8 @@ def struct(self, struct: StructType, struct_partner: Optional[MappedField], fiel | |
| return StructType(*field_results) | ||
|
|
||
| def field(self, field: NestedField, field_partner: Optional[MappedField], field_result: IcebergType) -> IcebergType: | ||
| if field_partner is None: | ||
| raise ValueError(f"Field missing from NameMapping: {'.'.join(self.current_path)}") | ||
| if field_partner is None or field_partner.field_id is None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious about this change, why do we need to check for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should just be a type check since
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see |
||
| raise ValueError(f"Field or field ID missing from NameMapping: {'.'.join(self.current_path)}") | ||
|
|
||
| return NestedField( | ||
| field_id=field_partner.field_id, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,6 +109,21 @@ def test_json_mapped_field_no_names_deserialization() -> None: | |
| assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields) | ||
|
|
||
|
|
||
| def test_json_mapped_field_no_field_id_deserialization() -> None: | ||
| mapped_field = """{ | ||
| "names": [] | ||
| } | ||
| """ | ||
| assert MappedField(field_id=None, names=[]) == MappedField.model_validate_json(mapped_field) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: also test omitting the |
||
|
|
||
| mapped_field_with_null_fields = """{ | ||
| "names": [], | ||
| "fields": null | ||
| } | ||
| """ | ||
| assert MappedField(names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields) | ||
|
|
||
|
|
||
| def test_json_name_mapping_deserialization() -> None: | ||
| name_mapping = """ | ||
| [ | ||
|
|
@@ -164,10 +179,23 @@ def test_json_name_mapping_deserialization() -> None: | |
| ]) | ||
|
|
||
|
|
||
| def test_json_mapped_field_no_field_id_serialization() -> None: | ||
| table_name_mapping_nested_no_field_id = NameMapping([ | ||
| MappedField(field_id=1, names=["foo"]), | ||
| MappedField(field_id=None, names=["bar"]), | ||
| MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]), | ||
| ]) | ||
|
|
||
| assert ( | ||
| table_name_mapping_nested_no_field_id.model_dump_json() | ||
| == """[{"names":["foo"],"field-id":1},{"names":["bar"]},{"names":["qux"],"field-id":2,"fields":[{"names":["element"]}]}]""" | ||
| ) | ||
|
|
||
|
|
||
| def test_json_serialization(table_name_mapping_nested: NameMapping) -> None: | ||
| assert ( | ||
| table_name_mapping_nested.model_dump_json() | ||
| == """[{"field-id":1,"names":["foo"]},{"field-id":2,"names":["bar"]},{"field-id":3,"names":["baz"]},{"field-id":4,"names":["qux"],"fields":[{"field-id":5,"names":["element"]}]},{"field-id":6,"names":["quux"],"fields":[{"field-id":7,"names":["key"]},{"field-id":8,"names":["value"],"fields":[{"field-id":9,"names":["key"]},{"field-id":10,"names":["value"]}]}]},{"field-id":11,"names":["location"],"fields":[{"field-id":12,"names":["element"],"fields":[{"field-id":13,"names":["latitude"]},{"field-id":14,"names":["longitude"]}]}]},{"field-id":15,"names":["person"],"fields":[{"field-id":16,"names":["name"]},{"field-id":17,"names":["age"]}]}]""" | ||
| == """[{"names":["foo"],"field-id":1},{"names":["bar"],"field-id":2},{"names":["baz"],"field-id":3},{"names":["qux"],"field-id":4,"fields":[{"names":["element"],"field-id":5}]},{"names":["quux"],"field-id":6,"fields":[{"names":["key"],"field-id":7},{"names":["value"],"field-id":8,"fields":[{"names":["key"],"field-id":9},{"names":["value"],"field-id":10}]}]},{"names":["location"],"field-id":11,"fields":[{"names":["element"],"field-id":12,"fields":[{"names":["latitude"],"field-id":13},{"names":["longitude"],"field-id":14}]}]},{"names":["person"],"field-id":15,"fields":[{"names":["name"],"field-id":16},{"names":["age"],"field-id":17}]}]""" | ||
| ) | ||
|
|
||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.