@@ -1491,83 +1491,3 @@ def test_memory_map(self):
14911491
14921492 out = self .read_csv (mmap_file , memory_map = True )
14931493 tm .assert_frame_equal (out , expected )
1494-
1495- def test_parse_trim_buffers (self ):
1496- # This test is part of a bugfix for issue #13703. It attmepts to
1497- # to stress the system memory allocator, to cause it to move the
1498- # stream buffer and either let the OS reclaim the region, or let
1499- # other memory requests of parser otherwise modify the contents
1500- # of memory space, where it was formely located.
1501- # This test is designed to cause a `segfault` with unpatched
1502- # `tokenizer.c`. Sometimes the test fails on `segfault`, other
1503- # times it fails due to memory corruption, which causes the
1504- # loaded DataFrame to differ from the expected one.
1505-
1506- # Generate a large mixed-type CSV file on-the-fly (one record is
1507- # approx 1.5KiB).
1508- record_ = \
1509- """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z""" \
1510- """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,""" \
1511- """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9""" \
1512- """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,""" \
1513- """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.""" \
1514- """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.""" \
1515- """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ""" \
1516- """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ""" \
1517- """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z""" \
1518- """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,""" \
1519- """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,""" \
1520- """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,""" \
1521- """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999""" \
1522- """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
1523- """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,""" \
1524- """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z""" \
1525- """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ""" \
1526- """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99""" \
1527- """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-""" \
1528- """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9""" \
1529- """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,""" \
1530- """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
1531- """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ""" \
1532- """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ""" \
1533- """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ""" \
1534- """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ""" \
1535- """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99""" \
1536- """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9""" \
1537- """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
1538-
1539- # Set the number of line so that a call to `parser_trim_buffers`
1540- # is trgiggered: a couple of full chunks and a relatively small
1541- # 'residual' chunk.
1542- chunksize , n_lines = 128 , 2 * 128 + 15
1543- csv_data = "\n " .join ([record_ ] * n_lines ) + "\n "
1544-
1545- # We will use StringIO to load the CSV from this text buffer.
1546- # pd.read_csv() will iterate over the file in chunks and will
1547- # finally read a residual chunk of really small size.
1548-
1549- # Create the expected output: maually create the dataframe
1550- # by splitting by comma and repeating the `n_lines` number
1551- # of times.
1552- row = tuple (val_ if val_ else float ("nan" )
1553- for val_ in record_ .split ("," ))
1554- expected_ = [row for _ in range (n_lines )]
1555- expected = pd .DataFrame (expected_ , dtype = object ,
1556- columns = None , index = None )
1557-
1558- # Iterate over the CSV file in chunks of `chunksize` lines
1559- output_ = []
1560- try :
1561- iterator_ = self .read_csv (StringIO (csv_data ), header = None ,
1562- dtype = object , chunksize = chunksize )
1563- for chunk_ in iterator_ :
1564- output_ .append (chunk_ )
1565- except ValueError :
1566- # Ignore unsuported dtype=object by engine=python
1567- # in this case output_ list is empty
1568- pass
1569-
1570- # Check for data corruption if there is any output.
1571- if output_ :
1572- df = pd .concat (output_ , axis = 0 , ignore_index = True )
1573- tm .assert_frame_equal (df , expected )
0 commit comments