-
Notifications
You must be signed in to change notification settings - Fork 341
Data frame binary operations #2656
Changes from all commits
3624484
196ad6f
004fb10
76d28b9
2963c89
3d358b6
1c16625
00ee04d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Text; | ||
|
|
||
| namespace Microsoft.Data | ||
| { | ||
| /// <summary> | ||
| /// The base column type. All APIs should have atleast a stub here first | ||
| /// </summary> | ||
| public abstract class BaseDataFrameColumn | ||
| { | ||
| public BaseDataFrameColumn(string name, long length = 0) | ||
| { | ||
| Length = length; | ||
| Name = name; | ||
| } | ||
|
|
||
| private long _length; | ||
| public long Length | ||
| { | ||
| get => _length; | ||
| protected set | ||
| { | ||
| if (value < 0) throw new ArgumentOutOfRangeException(); | ||
| _length = value; | ||
| } | ||
| } | ||
|
|
||
| public long NullCount { get; protected set; } | ||
|
|
||
| public string Name; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Public fields is a bad practice. Must be replaced by a property. |
||
|
|
||
| public virtual object this[long rowIndex] { get { throw new NotImplementedException(); } set { throw new NotImplementedException(); } } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is temporary code, but long term it should be on several lines (using => syntax if it's oneliners) |
||
|
|
||
| public virtual object this[long startIndex, int length] { get { throw new NotImplementedException(); } } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is how current API of
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are overridden in DataFrameColumn at the moment?. Do you mean they should occur only in DataFrameColumn and not in the base class? I'm not completely convinced about that yet. At the moment, the DataFrame class has a DataFrameTable which has an IList called columns i.e. the DataFrameTable does not know the real type of the columns it holds => therefore, the approach at the moment is to add APIs on BaseDataFrameColumn and override them in the derived columns. It is possible that this will change down the line as I add more features, but for the moment I think this looks reasonable. Thoughts?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, let's change it a bit later. |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
|
|
||
| namespace Microsoft.Data | ||
| { | ||
| /// <summary> | ||
| /// A DataFrame to support indexing, binary operations, sorting, selection and other APIs. This will eventually also expose an IDataView for ML.NET | ||
| /// </summary> | ||
| public partial class DataFrame | ||
| { | ||
| private readonly DataFrameTable _table; | ||
| public DataFrame() | ||
| { | ||
| _table = new DataFrameTable(); | ||
| } | ||
|
|
||
| public long RowCount => _table.RowCount; | ||
|
|
||
| public int ColumnCount => _table.ColumnCount; | ||
|
|
||
| public IList<string> Columns | ||
| { | ||
| get | ||
| { | ||
| var ret = new List<string>(ColumnCount); | ||
| for (int i = 0; i < ColumnCount; i++) | ||
| { | ||
| ret.Add(_table.Column(i).Name); | ||
| } | ||
| return ret; | ||
| } | ||
| } | ||
|
|
||
| public BaseDataFrameColumn Column(int index) => _table.Column(index); | ||
pgovind marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| public void InsertColumn(int columnIndex, BaseDataFrameColumn column) => _table.InsertColumn(columnIndex, column); | ||
|
|
||
| public void SetColumn(int columnIndex, BaseDataFrameColumn column) => _table.SetColumn(columnIndex, column); | ||
|
|
||
| public void RemoveColumn(int columnIndex) => _table.RemoveColumn(columnIndex); | ||
|
|
||
| public void RemoveColumn(string columnName) => _table.RemoveColumn(columnName); | ||
|
|
||
| public object this[long rowIndex, int columnIndex] | ||
pgovind marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| get => _table.Column(columnIndex)[rowIndex]; | ||
| set => _table.Column(columnIndex)[rowIndex] = value; | ||
| } | ||
|
|
||
| #region Operators | ||
| public IList<object> this[long rowIndex] | ||
| { | ||
| get | ||
| { | ||
| return _table.GetRow(rowIndex); | ||
| } | ||
| //TODO?: set? | ||
| } | ||
|
|
||
| public object this[string columnName] | ||
| { | ||
| get | ||
| { | ||
| int columnIndex = _table.GetColumnIndex(columnName); | ||
| if (columnIndex == -1) throw new ArgumentException($"{columnName} does not exist"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not throwing yet because I want to support things like: df["Int3"] = df["Int1"] + df["Int2"]; where df["Int3"] will be created if it doesn't exist. So, I'm not throwing yet. I don't like returning -1 either, so if I can write the setter (when I get around to it) without the -1, I'll add code to throw then. |
||
| return _table.Column(columnIndex); //[0, (int)Math.Min(_table.NumRows, Int32.MaxValue)]; | ||
| } | ||
| } | ||
|
|
||
| public IList<IList<object>> Head(int numberOfRows) | ||
| { | ||
| var ret = new List<IList<object>>(); | ||
| for (int i= 0; i< numberOfRows; i++) | ||
| { | ||
| ret.Add(this[i]); | ||
| } | ||
| return ret; | ||
| } | ||
|
|
||
| public IList<IList<object>> Tail(int numberOfRows) | ||
| { | ||
| var ret = new List<IList<object>>(); | ||
| for (long i = RowCount - numberOfRows; i < RowCount; i++) | ||
| { | ||
| ret.Add(this[i]); | ||
| } | ||
| return ret; | ||
| } | ||
| // TODO: Add strongly typed versions of these APIs | ||
| #endregion | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
atleastis two words.