|
22 | 22 | import pytest |
23 | 23 | from sortedcontainers import SortedList |
24 | 24 |
|
| 25 | +from pyiceberg.catalog.noop import NoopCatalog |
25 | 26 | from pyiceberg.exceptions import CommitFailedException |
26 | 27 | from pyiceberg.expressions import ( |
27 | 28 | AlwaysTrue, |
28 | 29 | And, |
29 | 30 | EqualTo, |
30 | 31 | In, |
31 | 32 | ) |
32 | | -from pyiceberg.io import PY_IO_IMPL |
| 33 | +from pyiceberg.io import PY_IO_IMPL, load_file_io |
33 | 34 | from pyiceberg.manifest import ( |
34 | 35 | DataFile, |
35 | 36 | DataFileContent, |
@@ -848,3 +849,89 @@ def test_assert_default_sort_order_id(table_v2: Table) -> None: |
848 | 849 | match="Requirement failed: default sort order id has changed: expected 1, found 3", |
849 | 850 | ): |
850 | 851 | AssertDefaultSortOrderId(default_sort_order_id=1).validate(base_metadata) |
| 852 | + |
| 853 | + |
| 854 | +def test_correct_schema() -> None: |
| 855 | + table_metadata = TableMetadataV2( |
| 856 | + **{ |
| 857 | + "format-version": 2, |
| 858 | + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", |
| 859 | + "location": "s3://bucket/test/location", |
| 860 | + "last-sequence-number": 34, |
| 861 | + "last-updated-ms": 1602638573590, |
| 862 | + "last-column-id": 3, |
| 863 | + "current-schema-id": 1, |
| 864 | + "schemas": [ |
| 865 | + {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]}, |
| 866 | + { |
| 867 | + "type": "struct", |
| 868 | + "schema-id": 1, |
| 869 | + "identifier-field-ids": [1, 2], |
| 870 | + "fields": [ |
| 871 | + {"id": 1, "name": "x", "required": True, "type": "long"}, |
| 872 | + {"id": 2, "name": "y", "required": True, "type": "long"}, |
| 873 | + {"id": 3, "name": "z", "required": True, "type": "long"}, |
| 874 | + ], |
| 875 | + }, |
| 876 | + ], |
| 877 | + "default-spec-id": 0, |
| 878 | + "partition-specs": [ |
| 879 | + {"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]} |
| 880 | + ], |
| 881 | + "last-partition-id": 1000, |
| 882 | + "default-sort-order-id": 0, |
| 883 | + "sort-orders": [], |
| 884 | + "current-snapshot-id": 123, |
| 885 | + "snapshots": [ |
| 886 | + { |
| 887 | + "snapshot-id": 234, |
| 888 | + "timestamp-ms": 1515100955770, |
| 889 | + "sequence-number": 0, |
| 890 | + "summary": {"operation": "append"}, |
| 891 | + "manifest-list": "s3://a/b/1.avro", |
| 892 | + "schema-id": 10, |
| 893 | + }, |
| 894 | + { |
| 895 | + "snapshot-id": 123, |
| 896 | + "timestamp-ms": 1515100955770, |
| 897 | + "sequence-number": 0, |
| 898 | + "summary": {"operation": "append"}, |
| 899 | + "manifest-list": "s3://a/b/1.avro", |
| 900 | + "schema-id": 0, |
| 901 | + }, |
| 902 | + ], |
| 903 | + } |
| 904 | + ) |
| 905 | + |
| 906 | + t = Table( |
| 907 | + identifier=("default", "t1"), |
| 908 | + metadata=table_metadata, |
| 909 | + metadata_location="s3://../..", |
| 910 | + io=load_file_io(), |
| 911 | + catalog=NoopCatalog("NoopCatalog"), |
| 912 | + ) |
| 913 | + |
| 914 | + # Should use the current schema, instead the one from the snapshot |
| 915 | + assert t.scan().projection() == Schema( |
| 916 | + NestedField(field_id=1, name='x', field_type=LongType(), required=True), |
| 917 | + NestedField(field_id=2, name='y', field_type=LongType(), required=True), |
| 918 | + NestedField(field_id=3, name='z', field_type=LongType(), required=True), |
| 919 | + schema_id=1, |
| 920 | + identifier_field_ids=[1, 2], |
| 921 | + ) |
| 922 | + |
| 923 | + # When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot |
| 924 | + assert t.scan(snapshot_id=123).projection() == Schema( |
| 925 | + NestedField(field_id=1, name='x', field_type=LongType(), required=True), |
| 926 | + schema_id=0, |
| 927 | + identifier_field_ids=[], |
| 928 | + ) |
| 929 | + |
| 930 | + with pytest.warns(UserWarning, match="Metadata does not contain schema with id: 10"): |
| 931 | + t.scan(snapshot_id=234).projection() |
| 932 | + |
| 933 | + # Invalid snapshot |
| 934 | + with pytest.raises(ValueError) as exc_info: |
| 935 | + _ = t.scan(snapshot_id=-1).projection() |
| 936 | + |
| 937 | + assert "Snapshot not found: -1" in str(exc_info.value) |
0 commit comments