88using System . Data . Common ;
99using System . Globalization ;
1010using System . IO ;
11+ using System . Linq ;
1112using System . Text ;
1213using System . Threading . Tasks ;
1314
@@ -349,8 +350,8 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
349350 private static DataFrame ReadCsvLinesIntoDataFrame ( WrappedStreamReaderOrStringReader wrappedReader ,
350351 char separator = ',' , bool header = true ,
351352 string [ ] columnNames = null , Type [ ] dataTypes = null ,
352- long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false
353- )
353+ long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
354+ bool renameDuplicatedColumns = false )
354355 {
355356 if ( dataTypes == null && guessRows <= 0 )
356357 {
@@ -376,6 +377,25 @@ private static DataFrame ReadCsvLinesIntoDataFrame(WrappedStreamReaderOrStringRe
376377 // First pass: schema and number of rows.
377378 while ( ( fields = parser . ReadFields ( ) ) != null )
378379 {
380+ if ( renameDuplicatedColumns )
381+ {
382+ var names = new Dictionary < string , int > ( ) ;
383+
384+ for ( int i = 0 ; i < fields . Length ; i ++ )
385+ {
386+ if ( names . TryGetValue ( fields [ i ] , out int index ) )
387+ {
388+ var newName = String . Format ( "{0}.{1}" , fields [ i ] , index ) ;
389+ names [ fields [ i ] ] = ++ index ;
390+ fields [ i ] = newName ;
391+ }
392+ else
393+ {
394+ names . Add ( fields [ i ] , 1 ) ;
395+ }
396+ }
397+ }
398+
379399 if ( ( numberOfRowsToRead == - 1 ) || rowline < numberOfRowsToRead )
380400 {
381401 if ( linesForGuessType . Count < guessRows || ( header && rowline == 0 ) )
@@ -524,12 +544,13 @@ public static DataFrame LoadCsvFromString(string csvString,
524544 /// <param name="guessRows">number of rows used to guess types</param>
525545 /// <param name="addIndexColumn">add one column with the row index</param>
526546 /// <param name="encoding">The character encoding. Defaults to UTF8 if not specified</param>
547+ /// <param name="renameDuplicatedColumns">If set to true, columns with repeated names are auto-renamed.</param>
527548 /// <returns><see cref="DataFrame"/></returns>
528549 public static DataFrame LoadCsv ( Stream csvStream ,
529550 char separator = ',' , bool header = true ,
530551 string [ ] columnNames = null , Type [ ] dataTypes = null ,
531552 long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
532- Encoding encoding = null )
553+ Encoding encoding = null , bool renameDuplicatedColumns = false )
533554 {
534555 if ( ! csvStream . CanSeek )
535556 {
@@ -542,7 +563,7 @@ public static DataFrame LoadCsv(Stream csvStream,
542563 }
543564
544565 WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader ( csvStream , encoding ?? Encoding . UTF8 ) ;
545- return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
566+ return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn , renameDuplicatedColumns ) ;
546567 }
547568
548569 /// <summary>
0 commit comments