55import platform
66import codecs
77
8- import subprocess
9-
108import re
119import sys
1210from datetime import datetime
@@ -1494,36 +1492,71 @@ def test_memory_map(self):
14941492 out = self .read_csv (mmap_file , memory_map = True )
14951493 tm .assert_frame_equal (out , expected )
14961494
1497-
14981495 def test_parse_trim_buffers (self ):
1499- # This test is designed to cause a `segfault` with unpatched `tokenizer.c`,
1500- # Sometimes the test fails on `segfault`, other times it fails due to memory
1501- # corruption, which causes the loaded DataFrame to differ from the expected
1502- # one.
1496+ # This test is designed to cause a `segfault` with unpatched
1497+ # `tokenizer.c`, Sometimes the test fails on `segfault`, other
1498+ # times it fails due to memory corruption, which causes the
1499+ # loaded DataFrame to differ from the expected one.
15031500 n_lines , chunksizes = 173 , range (57 , 90 )
15041501
15051502 # Create the expected output
15061503 expected_ = [(chunksize_ , "9999-9" , "9999-9" )
1507- for chunksize_ in chunksizes
1508- for _ in range ((n_lines + chunksize_ - 1 ) // chunksize_ )]
1504+ for chunksize_ in chunksizes
1505+ for _ in range ((n_lines + chunksize_ - 1 ) // chunksize_ )]
15091506 expected = pd .DataFrame (expected_ , columns = None , index = None )
15101507
15111508 # Generate a large mixed-type CSV file on-the-fly (approx 272 KiB)
1512- record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
1509+ record_ = \
1510+ """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.""" \
1511+ """99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-""" \
1512+ """ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-""" \
1513+ """ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-""" \
1514+ """ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-""" \
1515+ """ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-""" \
1516+ """ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,""" \
1517+ """ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-""" \
1518+ """ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-""" \
1519+ """ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.""" \
1520+ """99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.""" \
1521+ """99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.""" \
1522+ """99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
1523+ """ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-""" \
1524+ """ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.""" \
1525+ """999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
1526+ """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-""" \
1527+ """ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-""" \
1528+ """ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-""" \
1529+ """ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
1530+ """ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-""" \
1531+ """ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.""" \
1532+ """99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.""" \
1533+ """99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9""" \
1534+ """.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
1535+ """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-""" \
1536+ """ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.""" \
1537+ """99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.""" \
1538+ """99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-""" \
1539+ """ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.""" \
1540+ """99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.""" \
1541+ """99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.""" \
1542+ """99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
15131543 csv_data = "\n " .join ([record_ ] * n_lines ) + "\n "
15141544
15151545 output_ = list ()
15161546 for chunksize_ in chunksizes :
15171547 try :
1518- iterator_ = self .read_csv (StringIO (csv_data ), header = None , dtype = object ,
1519- chunksize = chunksize_ , na_filter = True )
1520- except ValueError , e :
1548+ iterator_ = self .read_csv (StringIO (csv_data ), header = None ,
1549+ dtype = object , chunksize = chunksize_ ,
1550+ na_filter = True )
1551+ except ValueError :
15211552 # Ignore unsuported dtype=object by engine=python
15221553 pass
15231554
15241555 for chunk_ in iterator_ :
1525- output_ .append ((chunksize_ , chunk_ .iloc [0 , 0 ], chunk_ .iloc [- 1 , 0 ]))
1556+ output_ .append ((chunksize_ ,
1557+ chunk_ .iloc [0 , 0 ],
1558+ chunk_ .iloc [- 1 , 0 ]))
15261559
15271560 df = pd .DataFrame (output_ , columns = None , index = None )
15281561
1529- tm .assert_frame_equal (df , expected )
1562+ tm .assert_frame_equal (df , expected )
0 commit comments