@@ -506,7 +506,8 @@ def test_invalid_timestamp(self, version):
506506 original = DataFrame ([(1 ,)], columns = ['variable' ])
507507 time_stamp = '01 Jan 2000, 00:00:00'
508508 with tm .ensure_clean () as path :
509- with pytest .raises (ValueError ):
509+ msg = "time_stamp should be datetime type"
510+ with pytest .raises (ValueError , match = msg ):
510511 original .to_stata (path , time_stamp = time_stamp ,
511512 version = version )
512513
@@ -547,8 +548,8 @@ def test_no_index(self):
547548 with tm .ensure_clean () as path :
548549 original .to_stata (path , write_index = False )
549550 written_and_read_again = self .read_dta (path )
550- pytest .raises (
551- KeyError , lambda : written_and_read_again ['index_not_written' ])
551+ with pytest .raises (KeyError , match = original . index . name ):
552+ written_and_read_again ['index_not_written' ]
552553
553554 def test_string_no_dates (self ):
554555 s1 = Series (['a' , 'A longer string' ])
@@ -713,7 +714,11 @@ def test_excessively_long_string(self):
713714 s ['s' + str (str_len )] = Series (['a' * str_len ,
714715 'b' * str_len , 'c' * str_len ])
715716 original = DataFrame (s )
716- with pytest .raises (ValueError ):
717+ msg = (r"Fixed width strings in Stata \.dta files are limited to 244"
718+ r" \(or fewer\)\ncharacters\. Column 's500' does not satisfy"
719+ r" this restriction\. Use the\n'version=117' parameter to write"
720+ r" the newer \(Stata 13 and later\) format\." )
721+ with pytest .raises (ValueError , match = msg ):
717722 with tm .ensure_clean () as path :
718723 original .to_stata (path )
719724
@@ -864,11 +869,14 @@ def test_drop_column(self):
864869 columns = columns )
865870 tm .assert_frame_equal (expected , reordered )
866871
867- with pytest .raises (ValueError ):
872+ msg = "columns contains duplicate entries"
873+ with pytest .raises (ValueError , match = msg ):
868874 columns = ['byte_' , 'byte_' ]
869875 read_stata (self .dta15_117 , convert_dates = True , columns = columns )
870876
871- with pytest .raises (ValueError ):
877+ msg = ("The following columns were not found in the Stata data set:"
878+ " not_found" )
879+ with pytest .raises (ValueError , match = msg ):
872880 columns = ['byte_' , 'int_' , 'long_' , 'not_found' ]
873881 read_stata (self .dta15_117 , convert_dates = True , columns = columns )
874882
@@ -924,7 +932,10 @@ def test_categorical_warnings_and_errors(self):
924932 original = pd .concat ([original [col ].astype ('category' )
925933 for col in original ], axis = 1 )
926934 with tm .ensure_clean () as path :
927- pytest .raises (ValueError , original .to_stata , path )
935+ msg = ("Stata value labels for a single variable must have"
936+ r" a combined length less than 32,000 characters\." )
937+ with pytest .raises (ValueError , match = msg ):
938+ original .to_stata (path )
928939
929940 original = pd .DataFrame .from_records (
930941 [['a' ],
@@ -1196,14 +1207,17 @@ def test_invalid_variable_labels(self, version):
11961207 'b' : 'City Exponent' ,
11971208 'c' : 'City' }
11981209 with tm .ensure_clean () as path :
1199- with pytest .raises (ValueError ):
1210+ msg = "Variable labels must be 80 characters or fewer"
1211+ with pytest .raises (ValueError , match = msg ):
12001212 original .to_stata (path ,
12011213 variable_labels = variable_labels ,
12021214 version = version )
12031215
12041216 variable_labels ['a' ] = u'invalid character Œ'
12051217 with tm .ensure_clean () as path :
1206- with pytest .raises (ValueError ):
1218+ msg = ("Variable labels must contain only characters that can be"
1219+ " encoded in Latin-1" )
1220+ with pytest .raises (ValueError , match = msg ):
12071221 original .to_stata (path ,
12081222 variable_labels = variable_labels ,
12091223 version = version )
@@ -1221,7 +1235,9 @@ def test_write_variable_label_errors(self):
12211235 'b' : 'City Exponent' ,
12221236 'c' : u'' .join (values )}
12231237
1224- with pytest .raises (ValueError ):
1238+ msg = ("Variable labels must contain only characters that can be"
1239+ " encoded in Latin-1" )
1240+ with pytest .raises (ValueError , match = msg ):
12251241 with tm .ensure_clean () as path :
12261242 original .to_stata (path , variable_labels = variable_labels_utf8 )
12271243
@@ -1231,7 +1247,8 @@ def test_write_variable_label_errors(self):
12311247 'that is too long for Stata which means '
12321248 'that it has more than 80 characters' }
12331249
1234- with pytest .raises (ValueError ):
1250+ msg = "Variable labels must be 80 characters or fewer"
1251+ with pytest .raises (ValueError , match = msg ):
12351252 with tm .ensure_clean () as path :
12361253 original .to_stata (path , variable_labels = variable_labels_long )
12371254
@@ -1265,7 +1282,8 @@ def test_default_date_conversion(self):
12651282 def test_unsupported_type (self ):
12661283 original = pd .DataFrame ({'a' : [1 + 2j , 2 + 4j ]})
12671284
1268- with pytest .raises (NotImplementedError ):
1285+ msg = "Data type complex128 not supported"
1286+ with pytest .raises (NotImplementedError , match = msg ):
12691287 with tm .ensure_clean () as path :
12701288 original .to_stata (path )
12711289
@@ -1277,7 +1295,8 @@ def test_unsupported_datetype(self):
12771295 'strs' : ['apple' , 'banana' , 'cherry' ],
12781296 'dates' : dates })
12791297
1280- with pytest .raises (NotImplementedError ):
1298+ msg = "Format %tC not implemented"
1299+ with pytest .raises (NotImplementedError , match = msg ):
12811300 with tm .ensure_clean () as path :
12821301 original .to_stata (path , convert_dates = {'dates' : 'tC' })
12831302
@@ -1291,9 +1310,10 @@ def test_unsupported_datetype(self):
12911310
12921311 def test_repeated_column_labels (self ):
12931312 # GH 13923
1294- with pytest .raises (ValueError ) as cm :
1313+ msg = (r"Value labels for column ethnicsn are not unique\. The"
1314+ r" repeated labels are:\n\n-+wolof" )
1315+ with pytest .raises (ValueError , match = msg ):
12951316 read_stata (self .dta23 , convert_categoricals = True )
1296- assert 'wolof' in cm .exception
12971317
12981318 def test_stata_111 (self ):
12991319 # 111 is an old version but still used by current versions of
@@ -1316,17 +1336,18 @@ def test_out_of_range_double(self):
13161336 'ColumnTooBig' : [0.0 ,
13171337 np .finfo (np .double ).eps ,
13181338 np .finfo (np .double ).max ]})
1319- with pytest .raises (ValueError ) as cm :
1339+ msg = (r"Column ColumnTooBig has a maximum value \(.+\)"
1340+ r" outside the range supported by Stata \(.+\)" )
1341+ with pytest .raises (ValueError , match = msg ):
13201342 with tm .ensure_clean () as path :
13211343 df .to_stata (path )
1322- assert 'ColumnTooBig' in cm .exception
13231344
13241345 df .loc [2 , 'ColumnTooBig' ] = np .inf
1325- with pytest .raises (ValueError ) as cm :
1346+ msg = ("Column ColumnTooBig has a maximum value of infinity which"
1347+ " is outside the range supported by Stata" )
1348+ with pytest .raises (ValueError , match = msg ):
13261349 with tm .ensure_clean () as path :
13271350 df .to_stata (path )
1328- assert 'ColumnTooBig' in cm .exception
1329- assert 'infinity' in cm .exception
13301351
13311352 def test_out_of_range_float (self ):
13321353 original = DataFrame ({'ColumnOk' : [0.0 ,
@@ -1348,11 +1369,11 @@ def test_out_of_range_float(self):
13481369 reread .set_index ('index' ))
13491370
13501371 original .loc [2 , 'ColumnTooBig' ] = np .inf
1351- with pytest .raises (ValueError ) as cm :
1372+ msg = ("Column ColumnTooBig has a maximum value of infinity which"
1373+ " is outside the range supported by Stata" )
1374+ with pytest .raises (ValueError , match = msg ):
13521375 with tm .ensure_clean () as path :
13531376 original .to_stata (path )
1354- assert 'ColumnTooBig' in cm .exception
1355- assert 'infinity' in cm .exception
13561377
13571378 def test_path_pathlib (self ):
13581379 df = tm .makeDataFrame ()
@@ -1466,7 +1487,8 @@ def test_invalid_date_conversion(self):
14661487 'dates' : dates })
14671488
14681489 with tm .ensure_clean () as path :
1469- with pytest .raises (ValueError ):
1490+ msg = "convert_dates key must be a column or an integer"
1491+ with pytest .raises (ValueError , match = msg ):
14701492 original .to_stata (path ,
14711493 convert_dates = {'wrong_name' : 'tc' })
14721494
@@ -1546,18 +1568,27 @@ def test_all_none_exception(self, version):
15461568 output = pd .DataFrame (output )
15471569 output .loc [:, 'none' ] = None
15481570 with tm .ensure_clean () as path :
1549- with pytest .raises (ValueError ) as excinfo :
1571+ msg = (r"Column `none` cannot be exported\.\n\n"
1572+ "Only string-like object arrays containing all strings or a"
1573+ r" mix of strings and None can be exported\. Object arrays"
1574+ r" containing only null values are prohibited\. Other"
1575+ " object typescannot be exported and must first be"
1576+ r" converted to one of the supported types\." )
1577+ with pytest .raises (ValueError , match = msg ):
15501578 output .to_stata (path , version = version )
1551- assert 'Only string-like' in excinfo .value .args [0 ]
1552- assert 'Column `none`' in excinfo .value .args [0 ]
15531579
15541580 @pytest .mark .parametrize ('version' , [114 , 117 ])
15551581 def test_invalid_file_not_written (self , version ):
15561582 content = 'Here is one __�__ Another one __·__ Another one __½__'
15571583 df = DataFrame ([content ], columns = ['invalid' ])
15581584 expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError
15591585 with tm .ensure_clean () as path :
1560- with pytest .raises (expected_exc ):
1586+ msg1 = (r"'latin-1' codec can't encode character '\\ufffd'"
1587+ r" in position 14: ordinal not in range\(256\)" )
1588+ msg2 = ("'ascii' codec can't decode byte 0xef in position 14:"
1589+ r" ordinal not in range\(128\)" )
1590+ with pytest .raises (expected_exc , match = r'{}|{}' .format (
1591+ msg1 , msg2 )):
15611592 with tm .assert_produces_warning (ResourceWarning ):
15621593 df .to_stata (path )
15631594
0 commit comments