From f74d1ab174f9a1c4421543d25a6709f45d1433b1 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 23 Aug 2022 16:16:30 -0500 Subject: [PATCH 01/20] Add DataFrame.IO tests with separators in data --- .../DataFrame.IOTests.cs | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index cfe996b589..014474c2ad 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -465,6 +465,142 @@ void Verify(DataFrame df, bool verifyDataTypes) Verify(df, false); } + [Fact] + public void TestReadCsvWithCommaSeparatorsInData() + { + string data = @"Name,Age,Description +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,"; + + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Name", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + } + + [Fact] + public void TestReadCsvWithColonSeparatorsInData() + { + string data = @"Name:Age:Description +Paul:34:""Paul lives in Vermont, VA."" +Victor:29:""Victor: Funny guy"" +Maria:31:"; + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Name", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); + Verify(df); + } + + [Fact] + public void TestReadCsvWithNewlinesInData() + { + string data = @"Name,Age,Description +Paul,34,""Paul lives in Vermont +VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,"; + + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Name", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal(@"Paul lives in Vermont +VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + } + [Fact] public void TestReadCsvWithPipeSeparator() { From d72df26d32a8d5884604ab35520d6c5ffe09731f Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 23 Aug 2022 17:08:13 -0500 Subject: [PATCH 02/20] Add test where comma is in header --- .../DataFrame.IOTests.cs | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 014474c2ad..9c78c947e0 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -554,6 +554,51 @@ void Verify(DataFrame df) Verify(df); } + [Fact] + public void TestReadCsvWithCommaSeparatorsInHeaderData() + { + string data = @"""Na,me"",Age,Description +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,"; + + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Na,me", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + } + [Fact] public void TestReadCsvWithNewlinesInData() { From e9e4254bd22484a2b25240c7c41e926bf34892fc Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Wed, 24 Aug 2022 18:03:55 -0500 Subject: [PATCH 03/20] Add two versions of test cases, likely going to use the helper version --- .../DataFrame.IOTests.cs | 456 +++++++++++------- 1 file changed, 275 insertions(+), 181 deletions(-) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 9c78c947e0..18937548fb 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -3,6 +3,8 @@ // See the LICENSE file in the project root for more information. using System; +using System.CodeDom; +using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; @@ -465,187 +467,6 @@ void Verify(DataFrame df, bool verifyDataTypes) Verify(df, false); } - [Fact] - public void TestReadCsvWithCommaSeparatorsInData() - { - string data = @"Name,Age,Description -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,"; - - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Name", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - } - - [Fact] - public void TestReadCsvWithColonSeparatorsInData() - { - string data = @"Name:Age:Description -Paul:34:""Paul lives in Vermont, VA."" -Victor:29:""Victor: Funny guy"" -Maria:31:"; - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Name", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); - Verify(df); - } - - [Fact] - public void TestReadCsvWithCommaSeparatorsInHeaderData() - { - string data = @"""Na,me"",Age,Description -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,"; - - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Na,me", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - } - - [Fact] - public void TestReadCsvWithNewlinesInData() - { - string data = @"Name,Age,Description -Paul,34,""Paul lives in Vermont -VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,"; - - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Name", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal(@"Paul lives in Vermont -VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - } - [Fact] public void TestReadCsvWithPipeSeparator() { @@ -1200,5 +1021,278 @@ public void TestMixedDataTypesInCsv() Assert.Equal("", emptyColumn[i]); } } + + public struct LoadCsvVerifyingHelper + { + int _columnCount; + long _rowCount; + string[] _columnNames; + Type[] _columnTypes; + object[][] _cells; + + public LoadCsvVerifyingHelper(int columnCount, long rowCount, string[] columnNames, Type[] columnTypes, object[][] cells) + { + _columnCount = columnCount; + _rowCount = rowCount; + _columnNames = columnNames; + _columnTypes = columnTypes; + _cells = cells; + + } + + public void VerifyLoadCsv(DataFrame df) + { + Assert.Equal(_rowCount, df.Rows.Count); + Assert.Equal(_columnCount, df.Columns.Count); + + for (int j = 0; j < _columnCount; j++) + { + Assert.True(_columnTypes[j] == df.Columns[j].DataType); + Assert.Equal(_columnNames[j], df.Columns[j].Name); + + } + + VerifyColumnTypes(df); + + for (int i = 0; i < _rowCount; i++) + { + for (int j = 0; j < _columnCount; j++) + { + Assert.Equal(_cells[i][j], df.Rows[i][j]); + } + } + } + + } + + public static IEnumerable LoadCsv_TestData() + { + yield return new object[] + { + @"Name,Age,Description +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,", + ',', + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Name", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, "Paul lives in Vermont, VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; + yield return new object[] + { + @"Name:Age:Description +Paul:34:""Paul lives in Vermont, VA."" +Victor:29:""Victor: Funny guy"" +Maria:31:", + ':', + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Name", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, "Paul lives in Vermont, VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; + } + + [Theory] + [MemberData(nameof(LoadCsv_TestData))] + public void TestReadWriteCsvWithCommaSeparatorsInData(string data, char separator, LoadCsvVerifyingHelper helper) + { + // Read data to a DataFrame in two ways and verify correctness + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + helper.VerifyLoadCsv(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + helper.VerifyLoadCsv(df); + + // Write DataFrame to a MemoryStream + using MemoryStream csvStream = new MemoryStream(); + DataFrame.WriteCsv(df, csvStream, separator: separator); + + // Read MemoryStream back to DataFrame and verify correctness + csvStream.Seek(0, SeekOrigin.Begin); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + helper.VerifyLoadCsv(df2); + } + + [Fact] + public void TestReadWriteCsvWithColonSeparatorsInData() + { + string data = @"Name:Age:Description +Paul:34:""Paul lives in Vermont, VA."" +Victor:29:""Victor: Funny guy"" +Maria:31:"; + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Name", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + // Read data to a DataFrame in two ways and verify correctness + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); + Verify(df); + + // Write DataFrame to a MemoryStream + using MemoryStream csvStream = new MemoryStream(); + DataFrame.WriteCsv(df, csvStream, separator: ':'); + + // Read MemoryStream back to DataFrame and verify correctness + csvStream.Seek(0, SeekOrigin.Begin); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); + Verify(df2); + } + + [Fact] + public void TestReadWriteCsvWithCommaSeparatorsInHeaderData() + { + string data = @"""Na,me"",Age,Description +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,"; + + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Na,me", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + // Read data to a DataFrame in two ways and verify correctness + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + + // Write DataFrame to a MemoryStream + using MemoryStream csvStream = new MemoryStream(); + DataFrame.WriteCsv(df, csvStream); + + // Read MemoryStream back to DataFrame and verify correctness + csvStream.Seek(0, SeekOrigin.Begin); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df2); + } + + [Fact] + public void TestReadWriteCsvWithNewlinesInData() + { + string data = @"Name,Age,Description +Paul,34,""Paul lives in Vermont +VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,"; + + void Verify(DataFrame df) + { + Assert.Equal(3, df.Rows.Count); + Assert.Equal(3, df.Columns.Count); + + Assert.True(typeof(string) == df.Columns[0].DataType); + Assert.True(typeof(int) == df.Columns[1].DataType); + Assert.True(typeof(string) == df.Columns[2].DataType); + + + Assert.Equal("Name", df.Columns[0].Name); + Assert.Equal("Age", df.Columns[1].Name); + Assert.Equal("Description", df.Columns[2].Name); + VerifyColumnTypes(df); + + var paulRow = df.Rows[0]; + Assert.Equal("Paul", paulRow[0]); + Assert.Equal(34, paulRow[1]); + Assert.Equal(@"Paul lives in Vermont +VA.", paulRow[2]); + + var victorRow = df.Rows[1]; + Assert.Equal("Victor", victorRow[0]); + Assert.Equal(29, victorRow[1]); + Assert.Equal("Victor: Funny guy", victorRow[2]); + + var mariaRow = df.Rows[2]; + Assert.Equal("Maria", mariaRow[0]); + Assert.Equal(31, mariaRow[1]); + Assert.Equal("", mariaRow[2]); + } + + // Read data to a DataFrame in two ways and verify correctness + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df); + + // Write DataFrame to a MemoryStream + using MemoryStream csvStream = new MemoryStream(); + DataFrame.WriteCsv(df, csvStream); + + // Read MemoryStream back to DataFrame and verify correctness + csvStream.Seek(0, SeekOrigin.Begin); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); + Verify(df2); + } } } From e0ff9a9c6ce3bb7202abe9664a239699f821a524 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Wed, 24 Aug 2022 18:28:39 -0500 Subject: [PATCH 04/20] Fix separators in data --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 30f395352c..7842ff0cca 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -500,6 +500,14 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, continue; } + if (t == typeof(string) && ((string)cell).Contains(separator.ToString())) // TODO why doesn't Contains(char) work? + { + record.Append("\""); + record.Append(cell); + record.Append("\""); + continue; + } + record.Append(cell); } From 6a568da2e58c15e2e3124deaf77c66fb3b657966 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Wed, 24 Aug 2022 18:38:55 -0500 Subject: [PATCH 05/20] Fix separators in header --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 25 ++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 7842ff0cca..a543aaebf1 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -454,7 +454,30 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (header) { - var headerColumns = string.Join(separator.ToString(), columnNames); + bool firstColumn = true; + var headerColumns = new StringBuilder(); + foreach (string name in columnNames) + { + if (!firstColumn) + { + headerColumns.Append(separator); + } + else + { + firstColumn = false; + } + + if (name.Contains(separator.ToString())) // TODO why doesn't Contains(char) work? + { + headerColumns.Append("\""); + headerColumns.Append(name); + headerColumns.Append("\""); + } + else + { + headerColumns.Append(name); + } + } csvFile.WriteLine(headerColumns); } From 9c02ab16ab1343127a13645fa11a3dce20a4fb6b Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 25 Aug 2022 19:25:30 -0500 Subject: [PATCH 06/20] Clean up tests --- .../DataFrame.IOTests.cs | 257 ++++++------------ 1 file changed, 83 insertions(+), 174 deletions(-) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 18937548fb..c463c331b9 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1062,12 +1062,11 @@ public void VerifyLoadCsv(DataFrame df) } } } - } - public static IEnumerable LoadCsv_TestData() + public static IEnumerable CsvWithTextQualifiers_TestData() { - yield return new object[] + yield return new object[] // Comma Separators in Data { @"Name,Age,Description Paul,34,""Paul lives in Vermont, VA."" @@ -1087,7 +1086,7 @@ public static IEnumerable LoadCsv_TestData() } ) }; - yield return new object[] + yield return new object[] // Colon Separators in Data { @"Name:Age:Description Paul:34:""Paul lives in Vermont, VA."" @@ -1107,192 +1106,102 @@ public static IEnumerable LoadCsv_TestData() } ) }; + yield return new object[] // Comma Separators in Header + { + @"""Na,me"",Age,Description +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,", + ',', + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Na,me", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, "Paul lives in Vermont, VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; + yield return new object[] // Newlines In Data + { + @"Name,Age,Description +Paul,34,""Paul lives in Vermont +VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,", + ',', + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Name", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, @"Paul lives in Vermont +VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; + yield return new object[] // Newlines In Header + { + @"""Na +me"":Age:Description +Paul:34:""Paul lives in Vermont, VA."" +Victor:29:""Victor: Funny guy"" +Maria:31:", + ':', + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { @"Na +me", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, "Paul lives in Vermont, VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; } [Theory] - [MemberData(nameof(LoadCsv_TestData))] - public void TestReadWriteCsvWithCommaSeparatorsInData(string data, char separator, LoadCsvVerifyingHelper helper) + [MemberData(nameof(CsvWithTextQualifiers_TestData))] + public void TestLoadCsvWithTextQualifiersFromStream(string data, char separator, LoadCsvVerifyingHelper helper) { - // Read data to a DataFrame in two ways and verify correctness DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); helper.VerifyLoadCsv(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); - helper.VerifyLoadCsv(df); - - // Write DataFrame to a MemoryStream - using MemoryStream csvStream = new MemoryStream(); - DataFrame.WriteCsv(df, csvStream, separator: separator); - - // Read MemoryStream back to DataFrame and verify correctness - csvStream.Seek(0, SeekOrigin.Begin); - DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); - helper.VerifyLoadCsv(df2); - } - - [Fact] - public void TestReadWriteCsvWithColonSeparatorsInData() - { - string data = @"Name:Age:Description -Paul:34:""Paul lives in Vermont, VA."" -Victor:29:""Victor: Funny guy"" -Maria:31:"; - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Name", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - // Read data to a DataFrame in two ways and verify correctness - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); - Verify(df); - - // Write DataFrame to a MemoryStream - using MemoryStream csvStream = new MemoryStream(); - DataFrame.WriteCsv(df, csvStream, separator: ':'); - - // Read MemoryStream back to DataFrame and verify correctness - csvStream.Seek(0, SeekOrigin.Begin); - DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: ':'); - Verify(df2); } - [Fact] - public void TestReadWriteCsvWithCommaSeparatorsInHeaderData() + [Theory] + [MemberData(nameof(CsvWithTextQualifiers_TestData))] + public void TestLoadCsvWithTextQualifiersFromString(string data, char separator, LoadCsvVerifyingHelper helper) { - string data = @"""Na,me"",Age,Description -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,"; - - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Na,me", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal("Paul lives in Vermont, VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - // Read data to a DataFrame in two ways and verify correctness - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - - // Write DataFrame to a MemoryStream - using MemoryStream csvStream = new MemoryStream(); - DataFrame.WriteCsv(df, csvStream); - - // Read MemoryStream back to DataFrame and verify correctness - csvStream.Seek(0, SeekOrigin.Begin); - DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df2); + DataFrame df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + helper.VerifyLoadCsv(df); } - [Fact] - public void TestReadWriteCsvWithNewlinesInData() + [Theory] + [MemberData(nameof(CsvWithTextQualifiers_TestData))] + public void TestWriteCsvWithTextQualifiers(string data, char separator, LoadCsvVerifyingHelper helper) { - string data = @"Name,Age,Description -Paul,34,""Paul lives in Vermont -VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,"; - - void Verify(DataFrame df) - { - Assert.Equal(3, df.Rows.Count); - Assert.Equal(3, df.Columns.Count); - - Assert.True(typeof(string) == df.Columns[0].DataType); - Assert.True(typeof(int) == df.Columns[1].DataType); - Assert.True(typeof(string) == df.Columns[2].DataType); - - - Assert.Equal("Name", df.Columns[0].Name); - Assert.Equal("Age", df.Columns[1].Name); - Assert.Equal("Description", df.Columns[2].Name); - VerifyColumnTypes(df); - - var paulRow = df.Rows[0]; - Assert.Equal("Paul", paulRow[0]); - Assert.Equal(34, paulRow[1]); - Assert.Equal(@"Paul lives in Vermont -VA.", paulRow[2]); - - var victorRow = df.Rows[1]; - Assert.Equal("Victor", victorRow[0]); - Assert.Equal(29, victorRow[1]); - Assert.Equal("Victor: Funny guy", victorRow[2]); - - var mariaRow = df.Rows[2]; - Assert.Equal("Maria", mariaRow[0]); - Assert.Equal(31, mariaRow[1]); - Assert.Equal("", mariaRow[2]); - } - - // Read data to a DataFrame in two ways and verify correctness - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); - df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df); + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); - // Write DataFrame to a MemoryStream using MemoryStream csvStream = new MemoryStream(); - DataFrame.WriteCsv(df, csvStream); + DataFrame.WriteCsv(df, csvStream, separator: separator); - // Read MemoryStream back to DataFrame and verify correctness + // We are verifying that WriteCsv works by reading the result back to a DataFrame and verifying correctness, + // ensuring no information loss csvStream.Seek(0, SeekOrigin.Begin); - DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }); - Verify(df2); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + helper.VerifyLoadCsv(df2); } } } From b9bec7aac0d26b561a80b70bdce0e53029f32017 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 25 Aug 2022 19:27:31 -0500 Subject: [PATCH 07/20] Fix issue with not wrapping output with newlines in quotations --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 25 +++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index a543aaebf1..2a28324845 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -467,7 +467,9 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, firstColumn = false; } - if (name.Contains(separator.ToString())) // TODO why doesn't Contains(char) work? + // TODO why doesn't Contains(char) work? + bool needsQuotes = ((string)name).Contains(separator.ToString()) || ((string)name).Contains("\n"); + if (needsQuotes) { headerColumns.Append("\""); headerColumns.Append(name); @@ -485,16 +487,16 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, foreach (var row in dataFrame.Rows) { - bool firstRow = true; + bool firstCell = true; foreach (var cell in row) { - if (!firstRow) + if (!firstCell) { record.Append(separator); } else { - firstRow = false; + firstCell = false; } Type t = cell?.GetType(); @@ -523,12 +525,17 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, continue; } - if (t == typeof(string) && ((string)cell).Contains(separator.ToString())) // TODO why doesn't Contains(char) work? + if (t == typeof(string)) { - record.Append("\""); - record.Append(cell); - record.Append("\""); - continue; + // TODO why doesn't Contains(char) work? + bool needsQuotes = ((string)cell).Contains(separator.ToString()) || ((string)cell).Contains("\n"); + if (needsQuotes) + { + record.Append("\""); + record.Append(cell); + record.Append("\""); + continue; + } } record.Append(cell); From 6f8029785a274c8d26efd5ea6af8cfcb3c24b65e Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 25 Aug 2022 19:41:09 -0500 Subject: [PATCH 08/20] Accidental commit --- test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index c463c331b9..ed7aed1312 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.CodeDom; using System.Collections.Generic; using System.Globalization; using System.IO; From d5aaff99099ed4dcc6d8eae68f66b00153d8d72c Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Fri, 9 Sep 2022 13:01:00 -0500 Subject: [PATCH 09/20] Clean up --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 2 -- test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 2a28324845..55c24cfb14 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -467,7 +467,6 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, firstColumn = false; } - // TODO why doesn't Contains(char) work? bool needsQuotes = ((string)name).Contains(separator.ToString()) || ((string)name).Contains("\n"); if (needsQuotes) { @@ -527,7 +526,6 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (t == typeof(string)) { - // TODO why doesn't Contains(char) work? bool needsQuotes = ((string)cell).Contains(separator.ToString()) || ((string)cell).Contains("\n"); if (needsQuotes) { diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index ed7aed1312..20199e839c 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1055,10 +1055,7 @@ public void VerifyLoadCsv(DataFrame df) for (int i = 0; i < _rowCount; i++) { - for (int j = 0; j < _columnCount; j++) - { - Assert.Equal(_cells[i][j], df.Rows[i][j]); - } + Assert.Equal(_cells[i], df.Rows[i]); } } } From be4fcb06a0eaf3e91e96a25fec1da661dba61bc7 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Fri, 9 Sep 2022 17:23:43 -0500 Subject: [PATCH 10/20] Clean up mini test framework a bit --- .../DataFrame.IOTests.cs | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 20199e839c..a34009ec65 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1069,6 +1069,7 @@ public static IEnumerable CsvWithTextQualifiers_TestData() Victor,29,""Victor: Funny guy"" Maria,31,", ',', + new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, @@ -1089,6 +1090,7 @@ public static IEnumerable CsvWithTextQualifiers_TestData() Victor:29:""Victor: Funny guy"" Maria:31:", ':', + new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, @@ -1104,11 +1106,12 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Comma Separators in Header { - @"""Na,me"",Age,Description + @"""Na,me"",Age,Description Paul,34,""Paul lives in Vermont, VA."" Victor,29,""Victor: Funny guy"" Maria,31,", - ',', + ',', + new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, @@ -1130,6 +1133,7 @@ public static IEnumerable CsvWithTextQualifiers_TestData() Victor,29,""Victor: Funny guy"" Maria,31,", ',', + new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, @@ -1152,6 +1156,7 @@ public static IEnumerable CsvWithTextQualifiers_TestData() Victor:29:""Victor: Funny guy"" Maria:31:", ':', + new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, @@ -1170,25 +1175,25 @@ public static IEnumerable CsvWithTextQualifiers_TestData() [Theory] [MemberData(nameof(CsvWithTextQualifiers_TestData))] - public void TestLoadCsvWithTextQualifiersFromStream(string data, char separator, LoadCsvVerifyingHelper helper) + public void TestLoadCsvWithTextQualifiersFromStream(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper) { - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: dataTypes, separator: separator); helper.VerifyLoadCsv(df); } [Theory] [MemberData(nameof(CsvWithTextQualifiers_TestData))] - public void TestLoadCsvWithTextQualifiersFromString(string data, char separator, LoadCsvVerifyingHelper helper) + public void TestLoadCsvWithTextQualifiersFromString(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper) { - DataFrame df = DataFrame.LoadCsvFromString(data, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + DataFrame df = DataFrame.LoadCsvFromString(data, dataTypes: dataTypes, separator: separator); helper.VerifyLoadCsv(df); } [Theory] [MemberData(nameof(CsvWithTextQualifiers_TestData))] - public void TestWriteCsvWithTextQualifiers(string data, char separator, LoadCsvVerifyingHelper helper) + public void TestWriteCsvWithTextQualifiers(string data, char separator, Type[] dataTypes, LoadCsvVerifyingHelper helper) { - DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + DataFrame df = DataFrame.LoadCsv(GetStream(data), dataTypes: dataTypes, separator: separator); using MemoryStream csvStream = new MemoryStream(); DataFrame.WriteCsv(df, csvStream, separator: separator); @@ -1196,7 +1201,7 @@ public void TestWriteCsvWithTextQualifiers(string data, char separator, LoadCsvV // We are verifying that WriteCsv works by reading the result back to a DataFrame and verifying correctness, // ensuring no information loss csvStream.Seek(0, SeekOrigin.Begin); - DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: new Type[] { typeof(string), typeof(int), typeof(string) }, separator: separator); + DataFrame df2 = DataFrame.LoadCsv(csvStream, dataTypes: dataTypes, separator: separator); helper.VerifyLoadCsv(df2); } } From de5abe8ced1d88f795160e8cec4c9a32b473e60c Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Mon, 12 Sep 2022 14:00:21 -0500 Subject: [PATCH 11/20] Fix WriteCsv bug when quotations are present in data. --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 12 ++++-- .../DataFrame.IOTests.cs | 42 +++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 55c24cfb14..093522f5fa 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -8,6 +8,7 @@ using System.Globalization; using System.IO; using System.Text; +using System.Xml.Linq; namespace Microsoft.Data.Analysis { @@ -467,11 +468,12 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, firstColumn = false; } - bool needsQuotes = ((string)name).Contains(separator.ToString()) || ((string)name).Contains("\n"); + bool needsQuotes = name.Contains(separator.ToString()) || name.Contains("\n") || name.Contains("\""); if (needsQuotes) { + headerColumns.Append("\""); - headerColumns.Append(name); + headerColumns.Append(name.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation headerColumns.Append("\""); } else @@ -526,11 +528,13 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (t == typeof(string)) { - bool needsQuotes = ((string)cell).Contains(separator.ToString()) || ((string)cell).Contains("\n"); + string stringCell = (string)cell; + bool needsQuotes = stringCell.Contains(separator.ToString()) || stringCell.Contains("\n") || stringCell.Contains("\""); if (needsQuotes) { + record.Append("\""); - record.Append(cell); + record.Append(stringCell.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation record.Append("\""); continue; } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index a34009ec65..06e9c315db 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1171,6 +1171,48 @@ public static IEnumerable CsvWithTextQualifiers_TestData() } ) }; + yield return new object[] // Quotations in Data + { + @"Name,Age,Description +Paul,34,""Paul lives in """"Vermont VA""""."" +Victor,29,""Victor: Funny guy"" +Maria,31,", + ',', + new Type[] { typeof(string), typeof(int), typeof(string) }, + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Name", "Age", "Description" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, @"Paul lives in ""Vermont VA""." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; + yield return new object[] // Quotations in Header + { + @"Name,Age,""De""""script""""ion"" +Paul,34,""Paul lives in Vermont, VA."" +Victor,29,""Victor: Funny guy"" +Maria,31,", + ',', + new Type[] { typeof(string), typeof(int), typeof(string) }, + new LoadCsvVerifyingHelper( + 3, + 3, + new string[] { "Name", "Age", @"De""script""ion" }, + new Type[] { typeof(string), typeof(int), typeof(string) }, + new object[][] + { + new object[] { "Paul", 34, @"Paul lives in Vermont, VA." }, + new object[] { "Victor", 29, "Victor: Funny guy" }, + new object[] { "Maria", 31, "" } + } + ) + }; } [Theory] From 493a750cf58c4cab0704c172b5384cf5cd799a1b Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Mon, 12 Sep 2022 14:02:16 -0500 Subject: [PATCH 12/20] Accidental include --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 093522f5fa..9ac4e5c7a3 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -8,7 +8,6 @@ using System.Globalization; using System.IO; using System.Text; -using System.Xml.Linq; namespace Microsoft.Data.Analysis { From 3e363b1b42e89d5a203350f6a92d1b38b056fd3a Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 27 Sep 2022 17:49:27 -0500 Subject: [PATCH 13/20] Manually merge with main --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 64 ++++++++++--------- .../DataFrame.IOTests.cs | 12 ++-- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 9ac4e5c7a3..ec19b0573c 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -8,6 +8,7 @@ using System.Globalization; using System.IO; using System.Text; +using System.Xml.Linq; namespace Microsoft.Data.Analysis { @@ -454,33 +455,7 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (header) { - bool firstColumn = true; - var headerColumns = new StringBuilder(); - foreach (string name in columnNames) - { - if (!firstColumn) - { - headerColumns.Append(separator); - } - else - { - firstColumn = false; - } - - bool needsQuotes = name.Contains(separator.ToString()) || name.Contains("\n") || name.Contains("\""); - if (needsQuotes) - { - - headerColumns.Append("\""); - headerColumns.Append(name.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation - headerColumns.Append("\""); - } - else - { - headerColumns.Append(name); - } - } - csvFile.WriteLine(headerColumns); + WriteHeader(csvFile, dataFrame.Columns.GetColumnNames(), separator); } var record = new StringBuilder(); @@ -528,13 +503,13 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (t == typeof(string)) { string stringCell = (string)cell; - bool needsQuotes = stringCell.Contains(separator.ToString()) || stringCell.Contains("\n") || stringCell.Contains("\""); + bool needsQuotes = stringCell.IndexOf(separator) != -1 || stringCell.IndexOf('\n') != -1 || stringCell.IndexOf('\"') != -1; if (needsQuotes) { - record.Append("\""); + record.Append('\"'); record.Append(stringCell.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation - record.Append("\""); + record.Append('\"'); continue; } } @@ -549,5 +524,34 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, } } } + private static void WriteHeader(StreamWriter csvFile, IReadOnlyList columnNames, char separator) + { + bool firstColumn = true; + foreach (string name in columnNames) + { + if (!firstColumn) + { + csvFile.Write(separator); + } + else + { + firstColumn = false; + } + + bool needsQuotes = name.IndexOf(separator) != -1 || name.IndexOf('\n') != -1 || name.IndexOf('\"') != -1; + if (needsQuotes) + { + + csvFile.Write('\"'); + csvFile.Write(name.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation + csvFile.Write('\"'); + } + else + { + csvFile.Write(name); + } + } + csvFile.WriteLine(); + } } } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 06e9c315db..7be525b7fa 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1021,13 +1021,13 @@ public void TestMixedDataTypesInCsv() } } - public struct LoadCsvVerifyingHelper + public readonly struct LoadCsvVerifyingHelper { - int _columnCount; - long _rowCount; - string[] _columnNames; - Type[] _columnTypes; - object[][] _cells; + private readonly int _columnCount; + private readonly long _rowCount; + private readonly string[] _columnNames; + private readonly Type[] _columnTypes; + private readonly object[][] _cells; public LoadCsvVerifyingHelper(int columnCount, long rowCount, string[] columnNames, Type[] columnTypes, object[][] cells) { From 06e018ebb73160802b700813f213168271151fa6 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 27 Sep 2022 17:53:19 -0500 Subject: [PATCH 14/20] Delete extra line --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index ec19b0573c..9fae05aa88 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -451,8 +451,6 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, { if (dataFrame != null) { - var columnNames = dataFrame.Columns.GetColumnNames(); - if (header) { WriteHeader(csvFile, dataFrame.Columns.GetColumnNames(), separator); From 54815a6feec92c2d6ece9729c12b8de057b4607f Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 27 Sep 2022 17:57:23 -0500 Subject: [PATCH 15/20] Accidental includes --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 9fae05aa88..0df86a0572 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -3,12 +3,10 @@ // See the LICENSE file in the project root for more information. using System; -using System.Collections; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Text; -using System.Xml.Linq; namespace Microsoft.Data.Analysis { From a2de77d492d3ae462560a54ea793d23821640053 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 29 Sep 2022 12:25:41 -0500 Subject: [PATCH 16/20] Use new raw string literal syntax --- .../DataFrame.IOTests.cs | 102 +++++++++++------- 1 file changed, 65 insertions(+), 37 deletions(-) diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs index 7be525b7fa..1b2e34a0ff 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs @@ -1064,10 +1064,12 @@ public static IEnumerable CsvWithTextQualifiers_TestData() { yield return new object[] // Comma Separators in Data { - @"Name,Age,Description -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,", + """ + Name,Age,Description + Paul,34,"Paul lives in Vermont, VA." + Victor,29,"Victor: Funny guy" + Maria,31, + """, ',', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( @@ -1085,10 +1087,12 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Colon Separators in Data { - @"Name:Age:Description -Paul:34:""Paul lives in Vermont, VA."" -Victor:29:""Victor: Funny guy"" -Maria:31:", + """ + Name:Age:Description + Paul:34:"Paul lives in Vermont, VA." + Victor:29:"Victor: Funny guy" + Maria:31: + """, ':', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( @@ -1106,10 +1110,12 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Comma Separators in Header { - @"""Na,me"",Age,Description -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,", + """ + "Na,me",Age,Description + Paul,34,"Paul lives in Vermont, VA." + Victor,29,"Victor: Funny guy" + Maria,31, + """, ',', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( @@ -1127,11 +1133,13 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Newlines In Data { - @"Name,Age,Description -Paul,34,""Paul lives in Vermont -VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,", + """ + Name,Age,Description + Paul,34,"Paul lives in Vermont + VA." + Victor,29,"Victor: Funny guy" + Maria,31, + """, ',', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( @@ -1141,8 +1149,15 @@ public static IEnumerable CsvWithTextQualifiers_TestData() new Type[] { typeof(string), typeof(int), typeof(string) }, new object[][] { - new object[] { "Paul", 34, @"Paul lives in Vermont -VA." }, + new object[] + { + "Paul", + 34, + """ + Paul lives in Vermont + VA. + """ + }, new object[] { "Victor", 29, "Victor: Funny guy" }, new object[] { "Maria", 31, "" } } @@ -1150,18 +1165,27 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Newlines In Header { - @"""Na -me"":Age:Description -Paul:34:""Paul lives in Vermont, VA."" -Victor:29:""Victor: Funny guy"" -Maria:31:", + """ + "Na + me":Age:Description + Paul:34:"Paul lives in Vermont, VA." + Victor:29:"Victor: Funny guy" + Maria:31: + """, ':', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, - new string[] { @"Na -me", "Age", "Description" }, + new string[] + { + """ + Na + me + """, + "Age", + "Description" + }, new Type[] { typeof(string), typeof(int), typeof(string) }, new object[][] { @@ -1173,10 +1197,12 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Quotations in Data { - @"Name,Age,Description -Paul,34,""Paul lives in """"Vermont VA""""."" -Victor,29,""Victor: Funny guy"" -Maria,31,", + """ + Name,Age,Description + Paul,34,"Paul lives in ""Vermont VA""." + Victor,29,"Victor: Funny guy" + Maria,31, + """, ',', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( @@ -1186,7 +1212,7 @@ public static IEnumerable CsvWithTextQualifiers_TestData() new Type[] { typeof(string), typeof(int), typeof(string) }, new object[][] { - new object[] { "Paul", 34, @"Paul lives in ""Vermont VA""." }, + new object[] { "Paul", 34, """Paul lives in "Vermont VA".""" }, new object[] { "Victor", 29, "Victor: Funny guy" }, new object[] { "Maria", 31, "" } } @@ -1194,20 +1220,22 @@ public static IEnumerable CsvWithTextQualifiers_TestData() }; yield return new object[] // Quotations in Header { - @"Name,Age,""De""""script""""ion"" -Paul,34,""Paul lives in Vermont, VA."" -Victor,29,""Victor: Funny guy"" -Maria,31,", + """ + Name,Age,"De""script""ion" + Paul,34,"Paul lives in Vermont, VA." + Victor,29,"Victor: Funny guy" + Maria,31, + """, ',', new Type[] { typeof(string), typeof(int), typeof(string) }, new LoadCsvVerifyingHelper( 3, 3, - new string[] { "Name", "Age", @"De""script""ion" }, + new string[] { "Name", "Age", """De"script"ion""" }, new Type[] { typeof(string), typeof(int), typeof(string) }, new object[][] { - new object[] { "Paul", 34, @"Paul lives in Vermont, VA." }, + new object[] { "Paul", 34, "Paul lives in Vermont, VA." }, new object[] { "Victor", 29, "Victor: Funny guy" }, new object[] { "Maria", 31, "" } } From 546c5ff21891dcfd4fe607ff836b7e89baf89c15 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 29 Sep 2022 14:35:50 -0500 Subject: [PATCH 17/20] Add NeedsQuotes helper --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 0df86a0572..6fa40eef7a 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -499,10 +499,8 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, if (t == typeof(string)) { string stringCell = (string)cell; - bool needsQuotes = stringCell.IndexOf(separator) != -1 || stringCell.IndexOf('\n') != -1 || stringCell.IndexOf('\"') != -1; - if (needsQuotes) + if (NeedsQuotes(stringCell, separator)) { - record.Append('\"'); record.Append(stringCell.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation record.Append('\"'); @@ -520,6 +518,7 @@ public static void WriteCsv(DataFrame dataFrame, Stream csvStream, } } } + private static void WriteHeader(StreamWriter csvFile, IReadOnlyList columnNames, char separator) { bool firstColumn = true; @@ -534,10 +533,8 @@ private static void WriteHeader(StreamWriter csvFile, IReadOnlyList colu firstColumn = false; } - bool needsQuotes = name.IndexOf(separator) != -1 || name.IndexOf('\n') != -1 || name.IndexOf('\"') != -1; - if (needsQuotes) + if (NeedsQuotes(name, separator)) { - csvFile.Write('\"'); csvFile.Write(name.Replace("\"", "\"\"")); // Quotations in CSV data must be escaped with another quotation csvFile.Write('\"'); @@ -549,5 +546,10 @@ private static void WriteHeader(StreamWriter csvFile, IReadOnlyList colu } csvFile.WriteLine(); } + + private static bool NeedsQuotes(string csvCell, char separator) + { + return csvCell.IndexOf(separator) != -1 || csvCell.IndexOf('\n') != -1 || csvCell.IndexOf('\"') != -1; + } } } From cc016d39393c13b72f5d2fdc61c48c51533762e4 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Thu, 29 Sep 2022 14:36:55 -0500 Subject: [PATCH 18/20] IndexOfAny refactor --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index 6fa40eef7a..dfaf819eb3 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -7,6 +7,7 @@ using System.Globalization; using System.IO; using System.Text; +using static System.Net.Mime.MediaTypeNames; namespace Microsoft.Data.Analysis { @@ -549,7 +550,7 @@ private static void WriteHeader(StreamWriter csvFile, IReadOnlyList colu private static bool NeedsQuotes(string csvCell, char separator) { - return csvCell.IndexOf(separator) != -1 || csvCell.IndexOf('\n') != -1 || csvCell.IndexOf('\"') != -1; + return csvCell.AsSpan().IndexOfAny(separator, '\n', '\"') != -1; } } } From c9ddd21e0f7b73055f8ad31b1ed8cdc21bfcca8e Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Fri, 30 Sep 2022 10:45:53 -0500 Subject: [PATCH 19/20] Remove accidental include --- src/Microsoft.Data.Analysis/DataFrame.IO.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.IO.cs b/src/Microsoft.Data.Analysis/DataFrame.IO.cs index dfaf819eb3..74e0f5f2b4 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.IO.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.IO.cs @@ -7,7 +7,6 @@ using System.Globalization; using System.IO; using System.Text; -using static System.Net.Mime.MediaTypeNames; namespace Microsoft.Data.Analysis { From d7dac25c2810a1c34c304a089feffac7978b6240 Mon Sep 17 00:00:00 2001 From: Drew Kersnar <18474647+dakersnar@users.noreply.github.com> Date: Tue, 4 Oct 2022 10:58:55 -0500 Subject: [PATCH 20/20] Add preview language tag to csproj --- .../Microsoft.Data.Analysis.Tests.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj index 8032812924..07c3ea2c33 100644 --- a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj +++ b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj @@ -1,6 +1,7 @@  $(NoWarn);MSML_ParameterLocalVarName;MSML_PrivateFieldName;MSML_ExtendBaseTestClass;MSML_GeneralName + preview