1- from io import StringIO
1+ from io import BytesIO , StringIO
22import random
33import string
44
@@ -146,10 +146,10 @@ def time_read_csv(self, bad_date_value):
146146class ReadCSVSkipRows (BaseIO ):
147147
148148 fname = "__test__.csv"
149- params = [None , 10000 ]
150- param_names = ["skiprows" ]
149+ params = ( [None , 10000 ], [ "c" , "python" ])
150+ param_names = ["skiprows" , "engine" ]
151151
152- def setup (self , skiprows ):
152+ def setup (self , skiprows , engine ):
153153 N = 20000
154154 index = tm .makeStringIndex (N )
155155 df = DataFrame (
@@ -164,8 +164,8 @@ def setup(self, skiprows):
164164 )
165165 df .to_csv (self .fname )
166166
167- def time_skipprows (self , skiprows ):
168- read_csv (self .fname , skiprows = skiprows )
167+ def time_skipprows (self , skiprows , engine ):
168+ read_csv (self .fname , skiprows = skiprows , engine = engine )
169169
170170
171171class ReadUint64Integers (StringIORewind ):
@@ -192,10 +192,10 @@ def time_read_uint64_na_values(self):
192192class ReadCSVThousands (BaseIO ):
193193
194194 fname = "__test__.csv"
195- params = (["," , "|" ], [None , "," ])
196- param_names = ["sep" , "thousands" ]
195+ params = (["," , "|" ], [None , "," ], [ "c" , "python" ] )
196+ param_names = ["sep" , "thousands" , "engine" ]
197197
198- def setup (self , sep , thousands ):
198+ def setup (self , sep , thousands , engine ):
199199 N = 10000
200200 K = 8
201201 data = np .random .randn (N , K ) * np .random .randint (100 , 10000 , (N , K ))
@@ -206,16 +206,19 @@ def setup(self, sep, thousands):
206206 df = df .applymap (lambda x : fmt .format (x ))
207207 df .to_csv (self .fname , sep = sep )
208208
209- def time_thousands (self , sep , thousands ):
210- read_csv (self .fname , sep = sep , thousands = thousands )
209+ def time_thousands (self , sep , thousands , engine ):
210+ read_csv (self .fname , sep = sep , thousands = thousands , engine = engine )
211211
212212
213213class ReadCSVComment (StringIORewind ):
214- def setup (self ):
214+ params = ["c" , "python" ]
215+ param_names = ["engine" ]
216+
217+ def setup (self , engine ):
215218 data = ["A,B,C" ] + (["1,2,3 # comment" ] * 100000 )
216219 self .StringIO_input = StringIO ("\n " .join (data ))
217220
218- def time_comment (self ):
221+ def time_comment (self , engine ):
219222 read_csv (
220223 self .data (self .StringIO_input ), comment = "#" , header = None , names = list ("abc" )
221224 )
@@ -255,25 +258,47 @@ def time_read_csv_python_engine(self, sep, decimal, float_precision):
255258 )
256259
257260
261+ class ReadCSVEngine (StringIORewind ):
262+ params = ["c" , "python" ]
263+ param_names = ["engine" ]
264+
265+ def setup (self , engine ):
266+ data = ["A,B,C,D,E" ] + (["1,2,3,4,5" ] * 100000 )
267+ self .StringIO_input = StringIO ("\n " .join (data ))
268+ # simulate reading from file
269+ self .BytesIO_input = BytesIO (self .StringIO_input .read ().encode ("utf-8" ))
270+
271+ def time_read_stringcsv (self , engine ):
272+ read_csv (self .data (self .StringIO_input ), engine = engine )
273+
274+ def time_read_bytescsv (self , engine ):
275+ read_csv (self .data (self .BytesIO_input ), engine = engine )
276+
277+
258278class ReadCSVCategorical (BaseIO ):
259279
260280 fname = "__test__.csv"
281+ params = ["c" , "python" ]
282+ param_names = ["engine" ]
261283
262- def setup (self ):
284+ def setup (self , engine ):
263285 N = 100000
264286 group1 = ["aaaaaaaa" , "bbbbbbb" , "cccccccc" , "dddddddd" , "eeeeeeee" ]
265287 df = DataFrame (np .random .choice (group1 , (N , 3 )), columns = list ("abc" ))
266288 df .to_csv (self .fname , index = False )
267289
268- def time_convert_post (self ):
269- read_csv (self .fname ).apply (Categorical )
290+ def time_convert_post (self , engine ):
291+ read_csv (self .fname , engine = engine ).apply (Categorical )
270292
271- def time_convert_direct (self ):
272- read_csv (self .fname , dtype = "category" )
293+ def time_convert_direct (self , engine ):
294+ read_csv (self .fname , engine = engine , dtype = "category" )
273295
274296
275297class ReadCSVParseDates (StringIORewind ):
276- def setup (self ):
298+ params = ["c" , "python" ]
299+ param_names = ["engine" ]
300+
301+ def setup (self , engine ):
277302 data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n
278303 {},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n
279304 {},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n
@@ -284,18 +309,20 @@ def setup(self):
284309 data = data .format (* two_cols )
285310 self .StringIO_input = StringIO (data )
286311
287- def time_multiple_date (self ):
312+ def time_multiple_date (self , engine ):
288313 read_csv (
289314 self .data (self .StringIO_input ),
315+ engine = engine ,
290316 sep = "," ,
291317 header = None ,
292318 names = list (string .digits [:9 ]),
293319 parse_dates = [[1 , 2 ], [1 , 3 ]],
294320 )
295321
296- def time_baseline (self ):
322+ def time_baseline (self , engine ):
297323 read_csv (
298324 self .data (self .StringIO_input ),
325+ engine = engine ,
299326 sep = "," ,
300327 header = None ,
301328 parse_dates = [1 ],
@@ -304,17 +331,18 @@ def time_baseline(self):
304331
305332
306333class ReadCSVCachedParseDates (StringIORewind ):
307- params = ([True , False ],)
308- param_names = ["do_cache" ]
334+ params = ([True , False ], [ "c" , "python" ] )
335+ param_names = ["do_cache" , "engine" ]
309336
310- def setup (self , do_cache ):
337+ def setup (self , do_cache , engine ):
311338 data = ("\n " .join (f"10/{ year } " for year in range (2000 , 2100 )) + "\n " ) * 10
312339 self .StringIO_input = StringIO (data )
313340
314- def time_read_csv_cached (self , do_cache ):
341+ def time_read_csv_cached (self , do_cache , engine ):
315342 try :
316343 read_csv (
317344 self .data (self .StringIO_input ),
345+ engine = engine ,
318346 header = None ,
319347 parse_dates = [0 ],
320348 cache_dates = do_cache ,
@@ -329,37 +357,40 @@ class ReadCSVMemoryGrowth(BaseIO):
329357 chunksize = 20
330358 num_rows = 1000
331359 fname = "__test__.csv"
360+ params = ["c" , "python" ]
361+ param_names = ["engine" ]
332362
333- def setup (self ):
363+ def setup (self , engine ):
334364 with open (self .fname , "w" ) as f :
335365 for i in range (self .num_rows ):
336366 f .write (f"{ i } \n " )
337367
338- def mem_parser_chunks (self ):
368+ def mem_parser_chunks (self , engine ):
339369 # see gh-24805.
340- result = read_csv (self .fname , chunksize = self .chunksize )
370+ result = read_csv (self .fname , chunksize = self .chunksize , engine = engine )
341371
342372 for _ in result :
343373 pass
344374
345375
346376class ReadCSVParseSpecialDate (StringIORewind ):
347- params = (["mY" , "mdY" , "hm" ],)
348- param_names = ["value" ]
377+ params = (["mY" , "mdY" , "hm" ], [ "c" , "python" ] )
378+ param_names = ["value" , "engine" ]
349379 objects = {
350380 "mY" : "01-2019\n 10-2019\n 02/2000\n " ,
351381 "mdY" : "12/02/2010\n " ,
352382 "hm" : "21:34\n " ,
353383 }
354384
355- def setup (self , value ):
385+ def setup (self , value , engine ):
356386 count_elem = 10000
357387 data = self .objects [value ] * count_elem
358388 self .StringIO_input = StringIO (data )
359389
360- def time_read_special_date (self , value ):
390+ def time_read_special_date (self , value , engine ):
361391 read_csv (
362392 self .data (self .StringIO_input ),
393+ engine = engine ,
363394 sep = "," ,
364395 header = None ,
365396 names = ["Date" ],
0 commit comments