11""" test parquet compat """
22import datetime
33from distutils .version import LooseVersion
4+ import locale
45import os
56from warnings import catch_warnings
67
@@ -129,7 +130,6 @@ def check_round_trip(
129130 read_kwargs = None ,
130131 expected = None ,
131132 check_names = True ,
132- check_like = False ,
133133 repeat = 2 ,
134134):
135135 """Verify parquet serializer and deserializer produce the same results.
@@ -149,8 +149,6 @@ def check_round_trip(
149149 Expected deserialization result, otherwise will be equal to `df`
150150 check_names: list of str, optional
151151 Closed set of column names to be compared
152- check_like: bool, optional
153- If True, ignore the order of index & columns.
154152 repeat: int, optional
155153 How many times to repeat the test
156154 """
@@ -171,9 +169,7 @@ def compare(repeat):
171169 with catch_warnings (record = True ):
172170 actual = read_parquet (path , ** read_kwargs )
173171
174- tm .assert_frame_equal (
175- expected , actual , check_names = check_names , check_like = check_like
176- )
172+ tm .assert_frame_equal (expected , actual , check_names = check_names )
177173
178174 if path is None :
179175 with tm .ensure_clean () as path :
@@ -489,37 +485,15 @@ def test_categorical(self, pa):
489485 expected = df .astype (object )
490486 check_round_trip (df , pa , expected = expected )
491487
488+ # GH#33077 2020-03-27
489+ @pytest .mark .xfail (
490+ locale .getlocale ()[0 ] in ["zh_CN" , "it_IT" ],
491+ reason = "dateutil cannot parse e.g. '五, 27 3月 2020 21:45:38 GMT'" ,
492+ )
492493 def test_s3_roundtrip (self , df_compat , s3_resource , pa ):
493494 # GH #19134
494495 check_round_trip (df_compat , pa , path = "s3://pandas-test/pyarrow.parquet" )
495496
496- @td .skip_if_no ("s3fs" )
497- @pytest .mark .parametrize ("partition_col" , [["A" ], []])
498- def test_s3_roundtrip_for_dir (self , df_compat , s3_resource , pa , partition_col ):
499- from pandas .io .s3 import get_fs as get_s3_fs
500-
501- # GH #26388
502- # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
503- # As per pyarrow partitioned columns become 'categorical' dtypes
504- # and are added to back of dataframe on read
505-
506- expected_df = df_compat .copy ()
507- if partition_col :
508- expected_df [partition_col ] = expected_df [partition_col ].astype ("category" )
509- check_round_trip (
510- df_compat ,
511- pa ,
512- expected = expected_df ,
513- path = "s3://pandas-test/parquet_dir" ,
514- write_kwargs = {
515- "partition_cols" : partition_col ,
516- "compression" : None ,
517- "filesystem" : get_s3_fs (),
518- },
519- check_like = True ,
520- repeat = 1 ,
521- )
522-
523497 def test_partition_cols_supported (self , pa , df_full ):
524498 # GH #23283
525499 partition_cols = ["bool" , "int" ]
0 commit comments