88using System . Data . Common ;
99using System . Globalization ;
1010using System . IO ;
11+ using System . Linq ;
1112using System . Text ;
1213using System . Threading . Tasks ;
1314
@@ -349,8 +350,8 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
349350 private static DataFrame ReadCsvLinesIntoDataFrame ( WrappedStreamReaderOrStringReader wrappedReader ,
350351 char separator = ',' , bool header = true ,
351352 string [ ] columnNames = null , Type [ ] dataTypes = null ,
352- long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false
353- )
353+ long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
354+ bool renameDuplicatedColumns = false )
354355 {
355356 if ( dataTypes == null && guessRows <= 0 )
356357 {
@@ -376,6 +377,25 @@ private static DataFrame ReadCsvLinesIntoDataFrame(WrappedStreamReaderOrStringRe
376377 // First pass: schema and number of rows.
377378 while ( ( fields = parser . ReadFields ( ) ) != null )
378379 {
380+ if ( renameDuplicatedColumns )
381+ {
382+ var names = new Dictionary < string , int > ( ) ;
383+
384+ for ( int i = 0 ; i < fields . Length ; i ++ )
385+ {
386+ if ( names . TryGetValue ( fields [ i ] , out int index ) )
387+ {
388+ var newName = String . Format ( "{0}.{1}" , fields [ i ] , index ) ;
389+ names [ fields [ i ] ] = ++ index ;
390+ fields [ i ] = newName ;
391+ }
392+ else
393+ {
394+ names . Add ( fields [ i ] , 1 ) ;
395+ }
396+ }
397+ }
398+
379399 if ( ( numberOfRowsToRead == - 1 ) || rowline < numberOfRowsToRead )
380400 {
381401 if ( linesForGuessType . Count < guessRows || ( header && rowline == 0 ) )
@@ -525,12 +545,13 @@ public static DataFrame LoadCsvFromString(string csvString,
525545 /// <param name="guessRows">number of rows used to guess types</param>
526546 /// <param name="addIndexColumn">add one column with the row index</param>
527547 /// <param name="encoding">The character encoding. Defaults to UTF8 if not specified</param>
548+ /// <param name="renameDuplicatedColumns">If set to true, columns with repeated names are auto-renamed.</param>
528549 /// <returns><see cref="DataFrame"/></returns>
529550 public static DataFrame LoadCsv ( Stream csvStream ,
530551 char separator = ',' , bool header = true ,
531552 string [ ] columnNames = null , Type [ ] dataTypes = null ,
532553 long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
533- Encoding encoding = null )
554+ Encoding encoding = null , bool renameDuplicatedColumns = false )
534555 {
535556 if ( ! csvStream . CanSeek )
536557 {
@@ -543,7 +564,7 @@ public static DataFrame LoadCsv(Stream csvStream,
543564 }
544565
545566 WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader ( csvStream , encoding ?? Encoding . UTF8 ) ;
546- return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
567+ return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn , renameDuplicatedColumns ) ;
547568 }
548569
549570 /// <summary>
0 commit comments