Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions src/Microsoft.Data.Analysis/DataFrame.IO.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,28 +450,25 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream,
{
if (dataFrame != null)
{
var columnNames = dataFrame.Columns.GetColumnNames();

if (header)
{
var headerColumns = string.Join(separator.ToString(), columnNames);
csvFile.WriteLine(headerColumns);
WriteHeader(csvFile, dataFrame.Columns.GetColumnNames(), separator);
}

var record = new StringBuilder();

foreach (var row in dataFrame.Rows)
{
bool firstRow = true;
bool firstCell = true;
foreach (var cell in row)
{
if (!firstRow)
if (!firstCell)
{
record.Append(separator);
}
else
{
firstRow = false;
firstCell = false;
}

Type t = cell?.GetType();
Expand Down Expand Up @@ -500,6 +497,18 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream,
continue;
}

if (t == typeof(string))
{
bool needsQuotes = ((string)cell).IndexOf(separator) != -1 || ((string)cell).IndexOf('\n') != -1;
if (needsQuotes)
{
record.Append('\"');
record.Append(cell);
record.Append('\"');
continue;
}
}

record.Append(cell);
}

Expand All @@ -510,5 +519,34 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream,
}
}
}
private static void WriteHeader(StreamWriter csvFile, IReadOnlyList<string> columnNames, char separator)
{
bool firstColumn = true;
foreach (string name in columnNames)
{
if (!firstColumn)
{
csvFile.Write(separator);
}
else
{
firstColumn = false;
}

bool needsQuotes = name.IndexOf(separator) != -1 || name.IndexOf('\n') != -1;
if (needsQuotes)
{
csvFile.Write('\"');
csvFile.Write(name);
csvFile.Write('\"');
}
else
{
csvFile.Write(name);
}
}

csvFile.WriteLine();
}
}
}
185 changes: 185 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
Expand Down Expand Up @@ -1019,5 +1020,189 @@ public void TestMixedDataTypesInCsv()
Assert.Equal("", emptyColumn[i]);
}
}

public readonly struct LoadCsvVerifyingHelper
{
private readonly int _columnCount;
private readonly long _rowCount;
private readonly string[] _columnNames;
private readonly Type[] _columnTypes;
private readonly object[][] _cells;

public LoadCsvVerifyingHelper(int columnCount, long rowCount, string[] columnNames, Type[] columnTypes, object[][] cells)
{
_columnCount = columnCount;
_rowCount = rowCount;
_columnNames = columnNames;
_columnTypes = columnTypes;
_cells = cells;

}

public void VerifyLoadCsv(DataFrame df)
{
Assert.Equal(_rowCount, df.Rows.Count);
Assert.Equal(_columnCount, df.Columns.Count);

for (int j = 0; j < _columnCount; j++)
{
Assert.True(_columnTypes[j] == df.Columns[j].DataType);
Assert.Equal(_columnNames[j], df.Columns[j].Name);

}

VerifyColumnTypes(df);

for (int i = 0; i < _rowCount; i++)
{
Assert.Equal(_cells[i], df.Rows[i]);
}
}
}

public static IEnumerable<object[]> CsvWithTextQualifiers_TestData()
{
yield return new object[] // Comma Separators in Data
{
@"Name,Age,Description
Paul,34,""Paul lives in Vermont, VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Name", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, "Paul lives in Vermont, VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Colon Separators in Data
{
@"Name:Age:Description
Paul:34:""Paul lives in Vermont, VA.""
Victor:29:""Victor: Funny guy""
Maria:31:",
':',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Name", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, "Paul lives in Vermont, VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Comma Separators in Header
{
@"""Na,me"",Age,Description
Paul,34,""Paul lives in Vermont, VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Na,me", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, "Paul lives in Vermont, VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Newlines In Data
{
@"Name,Age,Description
Paul,34,""Paul lives in Vermont
VA.""
Victor,29,""Victor: Funny guy""
Maria,31,",
',',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { "Name", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, @"Paul lives in Vermont
VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
yield return new object[] // Newlines In Header
{
@"""Na
me"":Age:Description
Paul:34:""Paul lives in Vermont, VA.""
Victor:29:""Victor: Funny guy""
Maria:31:",
':',
new Type[] { typeof(string), typeof(int), typeof(string) },
new LoadCsvVerifyingHelper(
3,
3,
new string[] { @"Na
me", "Age", "Description" },
new Type[] { typeof(string), typeof(int), typeof(string) },
new object[][]
{
new object[] { "Paul", 34, "Paul lives in Vermont, VA." },
new object[] { "Victor", 29, "Victor: Funny guy" },
new object[] { "Maria", 31, "" }
}
)
};
}

[Theory]
[MemberData(nameof(CsvWithTextQualifiers_TestData))]
public void TestLoadCsvWithTextQualifiersFromStream(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper)
{
DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: dataTypes, separator: separator);
helper.VerifyLoadCsv(df);
}

[Theory]
[MemberData(nameof(CsvWithTextQualifiers_TestData))]
public void TestLoadCsvWithTextQualifiersFromString(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper)
{
DataFrame df = DataFrame.LoadCsvFromString(data, dataTypes: dataTypes, separator: separator);
helper.VerifyLoadCsv(df);
}

[Theory]
[MemberData(nameof(CsvWithTextQualifiers_TestData))]
public void TestWriteCsvWithTextQualifiers(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper)
{
DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: dataTypes, separator: separator);

using MemoryStream csvStream = new MemoryStream();
DataFrame.WriteCsv(df, csvStream, separator: separator);

// We are verifying that WriteCsv works by reading the result back to a DataFrame and verifying correctness,
// ensuring no information loss
csvStream.Seek(0, SeekOrigin.Begin);
DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: dataTypes, separator: separator);
helper.VerifyLoadCsv(df2);
}
}
}