Skip to content

Commit d692751

Browse files
Append dataframe rows based on column names (#6808)
* Append dataframe rows based on column names * Update DataFrame.cs --------- Co-authored-by: Michael Sharp <[email protected]>
1 parent d9dbf99 commit d692751

File tree

3 files changed

+48
-5
lines changed

3 files changed

+48
-5
lines changed

src/Microsoft.Data.Analysis/DataFrame.cs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Licensed to the .NET Foundation under one or more agreements.
1+
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

@@ -483,6 +483,7 @@ private void ResizeByOneAndAppend(DataFrameColumn column, object value)
483483
/// </summary>
484484
/// <remarks>If an input column's value doesn't match a DataFrameColumn's data type, a conversion will be attempted</remarks>
485485
/// <remarks>If a <seealso cref="DataFrameRow"/> in <paramref name="rows"/> is null, a null value is appended to each column</remarks>
486+
/// <remarks> Values are appended based on the column names</remarks>
486487
/// <param name="rows">The rows to be appended to this DataFrame </param>
487488
/// <param name="inPlace">If set, appends <paramref name="rows"/> in place. Otherwise, a new DataFrame is returned with the <paramref name="rows"/> appended</param>
488489
/// <param name="cultureInfo">culture info for formatting values</param>
@@ -491,7 +492,7 @@ public DataFrame Append(IEnumerable<DataFrameRow> rows, bool inPlace = false, Cu
491492
DataFrame ret = inPlace ? this : Clone();
492493
foreach (DataFrameRow row in rows)
493494
{
494-
ret.Append(row, inPlace: true, cultureInfo: cultureInfo);
495+
ret.Append(row.GetValues(), inPlace: true, cultureInfo: cultureInfo);
495496
}
496497
return ret;
497498
}
@@ -503,7 +504,7 @@ public DataFrame Append(IEnumerable<DataFrameRow> rows, bool inPlace = false, Cu
503504
/// <remarks>If <paramref name="row"/> is null, a null value is appended to each column</remarks>
504505
/// <param name="row"></param>
505506
/// <param name="inPlace">If set, appends a <paramref name="row"/> in place. Otherwise, a new DataFrame is returned with an appended <paramref name="row"/> </param>
506-
/// <param name="cultureInfo">culture info for formatting values</param>
507+
/// <param name="cultureInfo">Culture info for formatting values</param>
507508
public DataFrame Append(IEnumerable<object> row = null, bool inPlace = false, CultureInfo cultureInfo = null)
508509
{
509510
if (cultureInfo == null)
@@ -586,8 +587,14 @@ public DataFrame Append(IEnumerable<object> row = null, bool inPlace = false, Cu
586587
/// <remarks>If a column's value doesn't match its column's data type, a conversion will be attempted</remarks>
587588
/// <param name="row">An enumeration of column name and value to be appended</param>
588589
/// <param name="inPlace">If set, appends <paramref name="row"/> in place. Otherwise, a new DataFrame is returned with an appended <paramref name="row"/> </param>
589-
public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPlace = false)
590+
/// <param name="cultureInfo">Culture info for formatting values</param>
591+
public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPlace = false, CultureInfo cultureInfo = null)
590592
{
593+
if (cultureInfo == null)
594+
{
595+
cultureInfo = CultureInfo.CurrentCulture;
596+
}
597+
591598
DataFrame ret = inPlace ? this : Clone();
592599
if (row == null)
593600
{
@@ -608,7 +615,7 @@ public DataFrame Append(IEnumerable<KeyValuePair<string, object>> row, bool inPl
608615
object value = columnAndValue.Value;
609616
if (value != null)
610617
{
611-
value = Convert.ChangeType(value, column.DataType);
618+
value = Convert.ChangeType(value, column.DataType, cultureInfo);
612619
if (value is null)
613620
{
614621
throw new ArgumentException(string.Format(Strings.MismatchedValueType, column.DataType), column.Name);

src/Microsoft.Data.Analysis/DataFrameRow.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Collections;
77
using System.Collections.Generic;
88
using System.Diagnostics;
9+
using System.Linq;
910
using System.Text;
1011

1112
namespace Microsoft.Data.Analysis
@@ -17,6 +18,7 @@ public class DataFrameRow : IEnumerable<object>
1718
{
1819
private readonly DataFrame _dataFrame;
1920
private readonly long _rowIndex;
21+
2022
internal DataFrameRow(DataFrame df, long rowIndex)
2123
{
2224
Debug.Assert(rowIndex < df.Columns.RowCount);
@@ -35,6 +37,11 @@ public IEnumerator<object> GetEnumerator()
3537
}
3638
}
3739

40+
public IEnumerable<KeyValuePair<string, object>> GetValues()
41+
{
42+
return _dataFrame.Columns.Select(col => new KeyValuePair<string, object>(col.Name, col[_rowIndex]));
43+
}
44+
3845
/// <summary>
3946
/// An indexer to return the value at <paramref name="index"/>.
4047
/// </summary>

test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3206,6 +3206,35 @@ void Verify(DataFrame ret, DataFrame check1, DataFrame check2)
32063206
Verify(df, dfClone, df2);
32073207
}
32083208

3209+
[Fact]
3210+
public void TestAppendRowsIfColumnAreOutOfOrder()
3211+
{
3212+
var dataFrame = new DataFrame(
3213+
new StringDataFrameColumn("ColumnA", new string[] { "a", "b", "c" }),
3214+
new Int32DataFrameColumn("ColumnB", new int[] { 1, 2, 3 }),
3215+
new Int32DataFrameColumn("ColumnC", new int[] { 10, 20, 30 }));
3216+
3217+
//ColumnC and ColumnB are swaped
3218+
var dataFrame2 = new DataFrame(
3219+
new StringDataFrameColumn("ColumnA", new string[] { "d", "e", "f" }),
3220+
new Int32DataFrameColumn("ColumnC", new int[] { 40, 50, 60 }),
3221+
new Int32DataFrameColumn("ColumnB", new int[] { 4, 5, 6 }));
3222+
3223+
var resultDataFrame = dataFrame.Append(dataFrame2.Rows);
3224+
3225+
Assert.Equal(3, resultDataFrame.Columns.Count);
3226+
Assert.Equal(6, resultDataFrame.Rows.Count);
3227+
3228+
Assert.Equal("c", resultDataFrame["ColumnA"][2]);
3229+
Assert.Equal("d", resultDataFrame["ColumnA"][3]);
3230+
3231+
Assert.Equal(3, resultDataFrame["ColumnB"][2]);
3232+
Assert.Equal(4, resultDataFrame["ColumnB"][3]);
3233+
3234+
Assert.Equal(30, resultDataFrame["ColumnC"][2]);
3235+
Assert.Equal(40, resultDataFrame["ColumnC"][3]);
3236+
}
3237+
32093238
[Fact]
32103239
public void TestAppendRow()
32113240
{

0 commit comments

Comments
 (0)