@@ -172,7 +172,7 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
172172 return ret ;
173173 }
174174
175- private static DataFrame ReadCsvLinesIntoDataFrame ( IEnumerable < string > lines ,
175+ private static DataFrame ReadCsvLinesIntoDataFrame ( WrappedStreamReaderOrStringReader wrappedReader ,
176176 char separator = ',' , bool header = true ,
177177 string [ ] columnNames = null , Type [ ] dataTypes = null ,
178178 long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false
@@ -183,140 +183,141 @@ private static DataFrame ReadCsvLinesIntoDataFrame(IEnumerable<string> lines,
183183 throw new ArgumentException ( string . Format ( Strings . ExpectedEitherGuessRowsOrDataTypes , nameof ( guessRows ) , nameof ( dataTypes ) ) ) ;
184184 }
185185
186- var linesForGuessType = new List < string [ ] > ( ) ;
187- long rowline = 0 ;
188- int numberOfColumns = dataTypes ? . Length ?? 0 ;
189-
190- if ( header == true && numberOfRowsToRead != - 1 )
186+ List < DataFrameColumn > columns ;
187+ string [ ] fields ;
188+ using ( var textReader = wrappedReader . GetTextReader ( ) )
191189 {
192- numberOfRowsToRead ++ ;
193- }
190+ TextFieldParser parser = new TextFieldParser ( textReader ) ;
191+ parser . SetDelimiters ( separator . ToString ( ) ) ;
194192
195- List < DataFrameColumn > columns ;
196- // First pass: schema and number of rows.
197- string line = null ;
193+ var linesForGuessType = new List < string [ ] > ( ) ;
194+ long rowline = 0 ;
195+ int numberOfColumns = dataTypes ? . Length ?? 0 ;
198196
199- var enumerator = lines . GetEnumerator ( ) ;
200- while ( enumerator . MoveNext ( ) )
201- {
202- line = enumerator . Current ;
203- if ( ( numberOfRowsToRead == - 1 ) || rowline < numberOfRowsToRead )
197+ if ( header == true && numberOfRowsToRead != - 1 )
198+ {
199+ numberOfRowsToRead ++ ;
200+ }
201+
202+ // First pass: schema and number of rows.
203+ while ( ( fields = parser . ReadFields ( ) ) != null )
204204 {
205- if ( linesForGuessType . Count < guessRows || ( header && rowline == 0 ) )
205+ if ( ( numberOfRowsToRead == - 1 ) || rowline < numberOfRowsToRead )
206206 {
207- var spl = line . Split ( separator ) ;
208- if ( header && rowline == 0 )
207+ if ( linesForGuessType . Count < guessRows || ( header && rowline == 0 ) )
209208 {
210- if ( columnNames == null )
209+ string [ ] spl = fields ;
210+ if ( header && rowline == 0 )
211211 {
212- columnNames = spl ;
212+ if ( columnNames == null )
213+ {
214+ columnNames = spl ;
215+ }
216+ }
217+ else
218+ {
219+ linesForGuessType . Add ( spl ) ;
220+ numberOfColumns = Math . Max ( numberOfColumns , spl . Length ) ;
213221 }
214222 }
215- else
216- {
217- linesForGuessType . Add ( spl ) ;
218- numberOfColumns = Math . Max ( numberOfColumns , spl . Length ) ;
219- }
223+ }
224+ ++ rowline ;
225+ if ( rowline == guessRows || guessRows == 0 )
226+ {
227+ break ;
220228 }
221229 }
222- ++ rowline ;
223- if ( rowline == guessRows || guessRows == 0 )
230+
231+ if ( rowline == 0 )
224232 {
225- break ;
233+ throw new FormatException ( Strings . EmptyFile ) ;
226234 }
227- }
228235
229- if ( rowline == 0 )
230- {
231- throw new FormatException ( Strings . EmptyFile ) ;
232- }
233-
234- columns = new List < DataFrameColumn > ( numberOfColumns ) ;
235- // Guesses types or looks up dataTypes and adds columns.
236- for ( int i = 0 ; i < numberOfColumns ; ++ i )
237- {
238- Type kind = dataTypes == null ? GuessKind ( i , linesForGuessType ) : dataTypes [ i ] ;
239- columns . Add ( CreateColumn ( kind , columnNames , i ) ) ;
236+ columns = new List < DataFrameColumn > ( numberOfColumns ) ;
237+ // Guesses types or looks up dataTypes and adds columns.
238+ for ( int i = 0 ; i < numberOfColumns ; ++ i )
239+ {
240+ Type kind = dataTypes == null ? GuessKind ( i , linesForGuessType ) : dataTypes [ i ] ;
241+ columns . Add ( CreateColumn ( kind , columnNames , i ) ) ;
242+ }
240243 }
241244
242245 DataFrame ret = new DataFrame ( columns ) ;
243- line = null ;
244246
245247 // Fill values.
246- enumerator . Reset ( ) ;
247- rowline = 0 ;
248- while ( enumerator . MoveNext ( ) && ( numberOfRowsToRead == - 1 || rowline < numberOfRowsToRead ) )
248+ using ( var textReader = wrappedReader . GetTextReader ( ) )
249249 {
250- line = enumerator . Current ;
251- var spl = line . Split ( separator ) ;
252- if ( header && rowline == 0 )
253- {
254- // Skips.
255- }
256- else
250+ TextFieldParser parser = new TextFieldParser ( textReader ) ;
251+ parser . SetDelimiters ( separator . ToString ( ) ) ;
252+
253+ long rowline = 0 ;
254+ while ( ( fields = parser . ReadFields ( ) ) != null && ( numberOfRowsToRead == - 1 || rowline < numberOfRowsToRead ) )
257255 {
258- ret . Append ( spl , inPlace : true ) ;
256+ string [ ] spl = fields ;
257+ if ( header && rowline == 0 )
258+ {
259+ // Skips.
260+ }
261+ else
262+ {
263+ ret . Append ( spl , inPlace : true ) ;
264+ }
265+ ++ rowline ;
259266 }
260- ++ rowline ;
261- }
262267
263- if ( addIndexColumn )
264- {
265- PrimitiveDataFrameColumn < int > indexColumn = new PrimitiveDataFrameColumn < int > ( "IndexColumn" , columns [ 0 ] . Length ) ;
266- for ( int i = 0 ; i < columns [ 0 ] . Length ; i ++ )
268+ if ( addIndexColumn )
267269 {
268- indexColumn [ i ] = i ;
270+ PrimitiveDataFrameColumn < int > indexColumn = new PrimitiveDataFrameColumn < int > ( "IndexColumn" , columns [ 0 ] . Length ) ;
271+ for ( int i = 0 ; i < columns [ 0 ] . Length ; i ++ )
272+ {
273+ indexColumn [ i ] = i ;
274+ }
275+ columns . Insert ( 0 , indexColumn ) ;
269276 }
270- columns . Insert ( 0 , indexColumn ) ;
271- }
272- return ret ;
273- }
274277
275- private class CsvLines : IEnumerable < string >
276- {
277- private CsvLineEnumerator enumerator ;
278- public CsvLines ( CsvLineEnumerator csvLineEnumerator )
279- {
280- enumerator = csvLineEnumerator ;
281278 }
282279
283- public IEnumerator < string > GetEnumerator ( ) => enumerator ;
284-
285- IEnumerator IEnumerable . GetEnumerator ( ) => enumerator ;
280+ return ret ;
286281 }
287282
288- private class CsvLineEnumerator : IEnumerator < string >
283+ private class WrappedStreamReaderOrStringReader
289284 {
290- private StreamReader streamReader ;
291- private string currentLine ;
292- private long streamStartPosition ;
293- public CsvLineEnumerator ( StreamReader csvStream )
294- {
295- streamStartPosition = csvStream . BaseStream . Position ;
296- streamReader = csvStream ;
297- currentLine = null ;
298- }
299-
300- public string Current => currentLine ;
301-
302- object IEnumerator . Current => currentLine ;
285+ private Stream _stream ;
286+ private long _initialPosition ;
287+ private Encoding _encoding ;
288+ private string _csvString ;
303289
304- public void Dispose ( )
290+ public WrappedStreamReaderOrStringReader ( Stream stream , Encoding encoding )
305291 {
306- throw new NotImplementedException ( ) ;
292+ _stream = stream ;
293+ _initialPosition = stream . Position ;
294+ _encoding = encoding ;
295+ _csvString = null ;
307296 }
308297
309- public bool MoveNext ( )
298+ public WrappedStreamReaderOrStringReader ( string csvString )
310299 {
311- currentLine = streamReader . ReadLine ( ) ;
312- return currentLine != null ;
300+ _csvString = csvString ;
301+ _initialPosition = 0 ;
302+ _encoding = null ;
303+ _stream = null ;
313304 }
314305
315- public void Reset ( )
306+ // Returns a new TextReader. If the wrapped object is a stream, the stream is reset to its initial position.
307+ public TextReader GetTextReader ( )
316308 {
317- streamReader . DiscardBufferedData ( ) ;
318- streamReader . BaseStream . Seek ( streamStartPosition , SeekOrigin . Begin ) ;
309+ if ( _stream != null )
310+ {
311+ _stream . Seek ( _initialPosition , SeekOrigin . Begin ) ;
312+ return new StreamReader ( _stream , _encoding , detectEncodingFromByteOrderMarks : true , DefaultStreamReaderBufferSize , leaveOpen : true ) ;
313+ }
314+ else
315+ {
316+ return new StringReader ( _csvString ) ;
317+ }
318+
319319 }
320+
320321 }
321322
322323 /// <summary>
@@ -336,8 +337,8 @@ public static DataFrame LoadCsvFromString(string csvString,
336337 string [ ] columnNames = null , Type [ ] dataTypes = null ,
337338 long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false )
338339 {
339- string [ ] lines = csvString . Split ( new [ ] { Environment . NewLine } , StringSplitOptions . None ) ;
340- return ReadCsvLinesIntoDataFrame ( lines , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
340+ WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader ( csvString ) ;
341+ return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
341342 }
342343
343344 /// <summary>
@@ -369,12 +370,8 @@ public static DataFrame LoadCsv(Stream csvStream,
369370 throw new ArgumentException ( string . Format ( Strings . ExpectedEitherGuessRowsOrDataTypes , nameof ( guessRows ) , nameof ( dataTypes ) ) ) ;
370371 }
371372
372- using ( var streamReader = new StreamReader ( csvStream , encoding ?? Encoding . UTF8 , detectEncodingFromByteOrderMarks : true , DefaultStreamReaderBufferSize , leaveOpen : true ) )
373- {
374- CsvLineEnumerator linesEnumerator = new CsvLineEnumerator ( streamReader ) ;
375- IEnumerable < string > lines = new CsvLines ( linesEnumerator ) ;
376- return ReadCsvLinesIntoDataFrame ( lines , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
377- }
373+ WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader ( csvStream , encoding ?? Encoding . UTF8 ) ;
374+ return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
378375 }
379376
380377 /// <summary>
0 commit comments