@@ -682,6 +682,24 @@ def schema_list_of_structs() -> Schema:
682682 )
683683
684684
685+ @pytest .fixture
686+ def schema_map_of_structs () -> Schema :
687+ return Schema (
688+ NestedField (
689+ 5 ,
690+ "locations" ,
691+ MapType (
692+ key_id = 51 ,
693+ value_id = 52 ,
694+ key_type = StringType (),
695+ value_type = StructType (NestedField (511 , "lat" , DoubleType ()), NestedField (512 , "long" , DoubleType ())),
696+ element_required = False ,
697+ ),
698+ required = False ,
699+ ),
700+ )
701+
702+
685703@pytest .fixture
686704def schema_map () -> Schema :
687705 return Schema (
@@ -793,6 +811,25 @@ def file_list_of_structs(schema_list_of_structs: Schema, tmpdir: str) -> str:
793811 )
794812
795813
814+ @pytest .fixture
815+ def file_map_of_structs (schema_map_of_structs : Schema , tmpdir : str ) -> str :
816+ pyarrow_schema = schema_to_pyarrow (
817+ schema_map_of_structs , metadata = {ICEBERG_SCHEMA : bytes (schema_map_of_structs .model_dump_json (), UTF8 )}
818+ )
819+ return _write_table_to_file (
820+ f"file:{ tmpdir } /e.parquet" ,
821+ pyarrow_schema ,
822+ pa .Table .from_pylist (
823+ [
824+ {"locations" : {"1" : {"lat" : 52.371807 , "long" : 4.896029 }, "2" : {"lat" : 52.387386 , "long" : 4.646219 }}},
825+ {"locations" : {}},
826+ {"locations" : {"3" : {"lat" : 52.078663 , "long" : 4.288788 }, "4" : {"lat" : 52.387386 , "long" : 4.646219 }}},
827+ ],
828+ schema = pyarrow_schema ,
829+ ),
830+ )
831+
832+
796833@pytest .fixture
797834def file_map (schema_map : Schema , tmpdir : str ) -> str :
798835 pyarrow_schema = schema_to_pyarrow (schema_map , metadata = {ICEBERG_SCHEMA : bytes (schema_map .model_dump_json (), UTF8 )})
@@ -914,7 +951,11 @@ def test_read_list(schema_list: Schema, file_list: str) -> None:
914951 for actual , expected in zip (result_table .columns [0 ], [list (range (1 , 10 )), list (range (2 , 20 )), list (range (3 , 30 ))]):
915952 assert actual .as_py () == expected
916953
917- assert repr (result_table .schema ) == "ids: list<item: int32>\n child 0, item: int32"
954+ assert (
955+ repr (result_table .schema )
956+ == """ids: list<element: int32>
957+ child 0, element: int32"""
958+ )
918959
919960
920961def test_read_map (schema_map : Schema , file_map : str ) -> None :
@@ -927,9 +968,9 @@ def test_read_map(schema_map: Schema, file_map: str) -> None:
927968 assert (
928969 repr (result_table .schema )
929970 == """properties: map<string, string>
930- child 0, entries: struct<key: string not null, value: string> not null
971+ child 0, entries: struct<key: string not null, value: string not null > not null
931972 child 0, key: string not null
932- child 1, value: string"""
973+ child 1, value: string not null """
933974 )
934975
935976
@@ -1063,7 +1104,11 @@ def test_projection_nested_struct_subset(file_struct: str) -> None:
10631104 assert actual .as_py () == {"lat" : expected }
10641105
10651106 assert len (result_table .columns [0 ]) == 3
1066- assert repr (result_table .schema ) == "location: struct<lat: double not null> not null\n child 0, lat: double not null"
1107+ assert (
1108+ repr (result_table .schema )
1109+ == """location: struct<lat: double not null> not null
1110+ child 0, lat: double not null"""
1111+ )
10671112
10681113
10691114def test_projection_nested_new_field (file_struct : str ) -> None :
@@ -1082,7 +1127,11 @@ def test_projection_nested_new_field(file_struct: str) -> None:
10821127 for actual , expected in zip (result_table .columns [0 ], [None , None , None ]):
10831128 assert actual .as_py () == {"null" : expected }
10841129 assert len (result_table .columns [0 ]) == 3
1085- assert repr (result_table .schema ) == "location: struct<null: double> not null\n child 0, null: double"
1130+ assert (
1131+ repr (result_table .schema )
1132+ == """location: struct<null: double> not null
1133+ child 0, null: double"""
1134+ )
10861135
10871136
10881137def test_projection_nested_struct (schema_struct : Schema , file_struct : str ) -> None :
@@ -1111,7 +1160,10 @@ def test_projection_nested_struct(schema_struct: Schema, file_struct: str) -> No
11111160 assert len (result_table .columns [0 ]) == 3
11121161 assert (
11131162 repr (result_table .schema )
1114- == "location: struct<lat: double, null: double, long: double> not null\n child 0, lat: double\n child 1, null: double\n child 2, long: double"
1163+ == """location: struct<lat: double, null: double, long: double> not null
1164+ child 0, lat: double
1165+ child 1, null: double
1166+ child 2, long: double"""
11151167 )
11161168
11171169
@@ -1136,28 +1188,75 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of
11361188 result_table = project (schema , [file_list_of_structs ])
11371189 assert len (result_table .columns ) == 1
11381190 assert len (result_table .columns [0 ]) == 3
1191+ results = [row .as_py () for row in result_table .columns [0 ]]
1192+ assert results == [
1193+ [
1194+ {'latitude' : 52.371807 , 'longitude' : 4.896029 , 'altitude' : None },
1195+ {'latitude' : 52.387386 , 'longitude' : 4.646219 , 'altitude' : None },
1196+ ],
1197+ [],
1198+ [
1199+ {'latitude' : 52.078663 , 'longitude' : 4.288788 , 'altitude' : None },
1200+ {'latitude' : 52.387386 , 'longitude' : 4.646219 , 'altitude' : None },
1201+ ],
1202+ ]
1203+ assert (
1204+ repr (result_table .schema )
1205+ == """locations: list<element: struct<latitude: double not null, longitude: double not null, altitude: double>>
1206+ child 0, element: struct<latitude: double not null, longitude: double not null, altitude: double>
1207+ child 0, latitude: double not null
1208+ child 1, longitude: double not null
1209+ child 2, altitude: double"""
1210+ )
1211+
1212+
1213+ def test_projection_maps_of_structs (schema_map_of_structs : Schema , file_map_of_structs : str ) -> None :
1214+ schema = Schema (
1215+ NestedField (
1216+ 5 ,
1217+ "locations" ,
1218+ MapType (
1219+ key_id = 51 ,
1220+ value_id = 52 ,
1221+ key_type = StringType (),
1222+ value_type = StructType (
1223+ NestedField (511 , "latitude" , DoubleType ()),
1224+ NestedField (512 , "longitude" , DoubleType ()),
1225+ NestedField (513 , "altitude" , DoubleType (), required = False ),
1226+ ),
1227+ element_required = False ,
1228+ ),
1229+ required = False ,
1230+ ),
1231+ )
1232+
1233+ result_table = project (schema , [file_map_of_structs ])
1234+ assert len (result_table .columns ) == 1
1235+ assert len (result_table .columns [0 ]) == 3
11391236 for actual , expected in zip (
11401237 result_table .columns [0 ],
11411238 [
11421239 [
1143- {"latitude" : 52.371807 , "longitude" : 4.896029 , "altitude" : None },
1144- {"latitude" : 52.387386 , "longitude" : 4.646219 , "altitude" : None },
1240+ ( "1" , {"latitude" : 52.371807 , "longitude" : 4.896029 , "altitude" : None }) ,
1241+ ( "2" , {"latitude" : 52.387386 , "longitude" : 4.646219 , "altitude" : None }) ,
11451242 ],
11461243 [],
11471244 [
1148- {"latitude" : 52.078663 , "longitude" : 4.288788 , "altitude" : None },
1149- {"latitude" : 52.387386 , "longitude" : 4.646219 , "altitude" : None },
1245+ ( "3" , {"latitude" : 52.078663 , "longitude" : 4.288788 , "altitude" : None }) ,
1246+ ( "4" , {"latitude" : 52.387386 , "longitude" : 4.646219 , "altitude" : None }) ,
11501247 ],
11511248 ],
11521249 ):
11531250 assert actual .as_py () == expected
11541251 assert (
11551252 repr (result_table .schema )
1156- == """locations: list<item: struct<latitude: double not null, longitude: double not null, altitude: double>>
1157- child 0, item: struct<latitude: double not null, longitude: double not null, altitude: double>
1158- child 0, latitude: double not null
1159- child 1, longitude: double not null
1160- child 2, altitude: double"""
1253+ == """locations: map<string, struct<latitude: double not null, longitude: double not null, altitude: double>>
1254+ child 0, entries: struct<key: string not null, value: struct<latitude: double not null, longitude: double not null, altitude: double> not null> not null
1255+ child 0, key: string not null
1256+ child 1, value: struct<latitude: double not null, longitude: double not null, altitude: double> not null
1257+ child 0, latitude: double not null
1258+ child 1, longitude: double not null
1259+ child 2, altitude: double"""
11611260 )
11621261
11631262
0 commit comments