diff --git a/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs b/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs index 56041711e3..d0b9479e17 100644 --- a/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/ArrowStringDataFrameColumn.cs @@ -213,9 +213,9 @@ private void Append(ReadOnlySpan value) _offsetsBuffers.Add(mutableOffsetsBuffer); mutableOffsetsBuffer.Append(0); } - mutableDataBuffer.EnsureCapacity(value.Length); - value.CopyTo(mutableDataBuffer.RawSpan.Slice(mutableDataBuffer.Length)); - mutableDataBuffer.Length += value.Length; + var startIndex = mutableDataBuffer.Length; + mutableDataBuffer.IncreaseSize(value.Length); + value.CopyTo(mutableDataBuffer.RawSpan.Slice(startIndex)); mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + value.Length); } SetValidityBit(Length - 1, value != default); diff --git a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs index e4ee20f9b6..fd7f23964f 100644 --- a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs @@ -15,6 +15,8 @@ namespace Microsoft.Data.Analysis internal class DataFrameBuffer : ReadOnlyDataFrameBuffer where T : unmanaged { + private const int MinCapacity = 8; + private Memory _memory; public override ReadOnlyMemory ReadOnlyBuffer => _memory; @@ -36,24 +38,35 @@ public Span RawSpan get => MemoryMarshal.Cast(Buffer.Span); } - public DataFrameBuffer(int numberOfValues = 8) : base(numberOfValues) { } + public DataFrameBuffer(int capacity = 0) + { + if ((long)capacity * Size > MaxCapacity) + { + throw new ArgumentException($"{capacity} exceeds buffer capacity", nameof(capacity)); + } + + _memory = new byte[Math.Max(capacity, MinCapacity)]; + } - internal DataFrameBuffer(ReadOnlyMemory buffer, int length) : base(buffer, length) + internal DataFrameBuffer(ReadOnlyMemory buffer, int length) { _memory = new byte[buffer.Length]; buffer.CopyTo(_memory); + Length = length; } public void Append(T value) { - if (Length == MaxCapacity) - { - throw new ArgumentException("Current buffer is full", nameof(value)); - } EnsureCapacity(1); - if (Length < MaxCapacity) - ++Length; - Span[Length - 1] = value; + + RawSpan[Length] = value; + Length++; + } + + public void IncreaseSize(int numberOfValues) + { + EnsureCapacity(numberOfValues); + Length += numberOfValues; } public void EnsureCapacity(int numberOfValues) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 1e37ac2206..8c9700fb0e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -67,9 +67,8 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory dataBuffer; if (buffer.IsEmpty) { - DataFrameBuffer mutableBuffer = new DataFrameBuffer(); - mutableBuffer.EnsureCapacity(length); - mutableBuffer.Length = length; + DataFrameBuffer mutableBuffer = new DataFrameBuffer(length); + mutableBuffer.IncreaseSize(length); mutableBuffer.RawSpan.Fill(default(T)); dataBuffer = mutableBuffer; } @@ -172,15 +171,12 @@ public void AppendMany(T? value, long count) //Calculate how many values we can additionaly allocate and not exceed the MaxCapacity int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity - mutableLastBuffer.Length); - mutableLastBuffer.EnsureCapacity(allocatable); + mutableLastBuffer.IncreaseSize(allocatable); DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(NullBitMapBuffers.Count - 1); int nullBufferAllocatable = (allocatable + 7) / 8; - lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); + lastNullBitMapBuffer.IncreaseSize(nullBufferAllocatable); - - mutableLastBuffer.Length += allocatable; - lastNullBitMapBuffer.Length += nullBufferAllocatable; Length += allocatable; if (value.HasValue) @@ -436,13 +432,8 @@ private List> CloneNullBitMapBuffers() List> ret = new List>(); foreach (ReadOnlyDataFrameBuffer buffer in NullBitMapBuffers) { - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.ReadOnlyBuffer, buffer.Length); ret.Add(newBuffer); - ReadOnlySpan span = buffer.ReadOnlySpan; - for (int i = 0; i < span.Length; i++) - { - newBuffer.Append(span[i]); - } } return ret; } @@ -518,14 +509,9 @@ public PrimitiveColumnContainer Clone() var ret = new PrimitiveColumnContainer(); foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.ReadOnlyBuffer, buffer.Length); ret.Buffers.Add(newBuffer); - ReadOnlySpan span = buffer.ReadOnlySpan; ret.Length += buffer.Length; - for (int i = 0; i < span.Length; i++) - { - newBuffer.Append(span[i]); - } } ret.NullBitMapBuffers = CloneNullBitMapBuffers(); ret.NullCount = NullCount; @@ -537,9 +523,10 @@ internal PrimitiveColumnContainer CloneAsBoolContainer() var ret = new PrimitiveColumnContainer(); foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); + newBuffer.IncreaseSize(buffer.Length); + if (typeof(T) == typeof(bool)) { var localBuffer = buffer; @@ -550,7 +537,6 @@ internal PrimitiveColumnContainer CloneAsBoolContainer() { newBuffer.Span.Fill(false); } - newBuffer.Length = buffer.Length; ret.Length += buffer.Length; } ret.NullBitMapBuffers = CloneNullBitMapBuffers(); @@ -564,9 +550,8 @@ internal PrimitiveColumnContainer CloneAsByteContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -584,9 +569,8 @@ internal PrimitiveColumnContainer CloneAsSByteContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -604,9 +588,8 @@ internal PrimitiveColumnContainer CloneAsDoubleContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -624,9 +607,8 @@ internal PrimitiveColumnContainer CloneAsDecimalContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -644,9 +626,8 @@ internal PrimitiveColumnContainer CloneAsShortContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -664,9 +645,8 @@ internal PrimitiveColumnContainer CloneAsUShortContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -684,9 +664,8 @@ internal PrimitiveColumnContainer CloneAsIntContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -704,9 +683,8 @@ internal PrimitiveColumnContainer CloneAsUIntContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -724,9 +702,8 @@ internal PrimitiveColumnContainer CloneAsLongContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -744,9 +721,8 @@ internal PrimitiveColumnContainer CloneAsULongContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -764,9 +740,8 @@ internal PrimitiveColumnContainer CloneAsFloatContainer() foreach (ReadOnlyDataFrameBuffer buffer in Buffers) { ret.Length += buffer.Length; - DataFrameBuffer newBuffer = new DataFrameBuffer(); + DataFrameBuffer newBuffer = new DataFrameBuffer(buffer.Length); ret.Buffers.Add(newBuffer); - newBuffer.EnsureCapacity(buffer.Length); ReadOnlySpan span = buffer.ReadOnlySpan; for (int i = 0; i < span.Length; i++) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.cs index ac8035f0ba..6a54acb85e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.cs @@ -19,7 +19,7 @@ internal DecimalDataFrameColumn AddImplementation(DecimalDataFrameColumn column, { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DecimalDataFrameColumn newColumn = inPlace ? this : CloneAsDecimalColumn(); + DecimalDataFrameColumn newColumn = inPlace ? this : (DecimalDataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -32,7 +32,7 @@ internal DoubleDataFrameColumn AddImplementation(DoubleDataFrameColumn column, b { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DoubleDataFrameColumn newColumn = inPlace ? this : CloneAsDoubleColumn(); + DoubleDataFrameColumn newColumn = inPlace ? this : (DoubleDataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -45,7 +45,7 @@ internal SingleDataFrameColumn AddImplementation(SingleDataFrameColumn column, b { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - SingleDataFrameColumn newColumn = inPlace ? this : CloneAsSingleColumn(); + SingleDataFrameColumn newColumn = inPlace ? this : (SingleDataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -58,7 +58,7 @@ internal Int32DataFrameColumn AddImplementation(Int32DataFrameColumn column, boo { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int32DataFrameColumn newColumn = inPlace ? this : CloneAsInt32Column(); + Int32DataFrameColumn newColumn = inPlace ? this : (Int32DataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -71,7 +71,7 @@ internal Int64DataFrameColumn AddImplementation(Int64DataFrameColumn column, boo { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int64DataFrameColumn newColumn = inPlace ? this : CloneAsInt64Column(); + Int64DataFrameColumn newColumn = inPlace ? this : (Int64DataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -84,7 +84,7 @@ internal UInt32DataFrameColumn AddImplementation(UInt32DataFrameColumn column, b { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt32DataFrameColumn newColumn = inPlace ? this : CloneAsUInt32Column(); + UInt32DataFrameColumn newColumn = inPlace ? this : (UInt32DataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -97,7 +97,7 @@ internal UInt64DataFrameColumn AddImplementation(UInt64DataFrameColumn column, b { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt64DataFrameColumn newColumn = inPlace ? this : CloneAsUInt64Column(); + UInt64DataFrameColumn newColumn = inPlace ? this : (UInt64DataFrameColumn)Clone(); newColumn.ColumnContainer.Add(column.ColumnContainer); return newColumn; } @@ -250,7 +250,7 @@ internal DecimalDataFrameColumn SubtractImplementation(DecimalDataFrameColumn co { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DecimalDataFrameColumn newColumn = inPlace ? this : CloneAsDecimalColumn(); + DecimalDataFrameColumn newColumn = inPlace ? this : (DecimalDataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -263,7 +263,7 @@ internal DoubleDataFrameColumn SubtractImplementation(DoubleDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DoubleDataFrameColumn newColumn = inPlace ? this : CloneAsDoubleColumn(); + DoubleDataFrameColumn newColumn = inPlace ? this : (DoubleDataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -276,7 +276,7 @@ internal SingleDataFrameColumn SubtractImplementation(SingleDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - SingleDataFrameColumn newColumn = inPlace ? this : CloneAsSingleColumn(); + SingleDataFrameColumn newColumn = inPlace ? this : (SingleDataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -289,7 +289,7 @@ internal Int32DataFrameColumn SubtractImplementation(Int32DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int32DataFrameColumn newColumn = inPlace ? this : CloneAsInt32Column(); + Int32DataFrameColumn newColumn = inPlace ? this : (Int32DataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -302,7 +302,7 @@ internal Int64DataFrameColumn SubtractImplementation(Int64DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int64DataFrameColumn newColumn = inPlace ? this : CloneAsInt64Column(); + Int64DataFrameColumn newColumn = inPlace ? this : (Int64DataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -315,7 +315,7 @@ internal UInt32DataFrameColumn SubtractImplementation(UInt32DataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt32DataFrameColumn newColumn = inPlace ? this : CloneAsUInt32Column(); + UInt32DataFrameColumn newColumn = inPlace ? this : (UInt32DataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -328,7 +328,7 @@ internal UInt64DataFrameColumn SubtractImplementation(UInt64DataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt64DataFrameColumn newColumn = inPlace ? this : CloneAsUInt64Column(); + UInt64DataFrameColumn newColumn = inPlace ? this : (UInt64DataFrameColumn)Clone(); newColumn.ColumnContainer.Subtract(column.ColumnContainer); return newColumn; } @@ -481,7 +481,7 @@ internal DecimalDataFrameColumn MultiplyImplementation(DecimalDataFrameColumn co { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DecimalDataFrameColumn newColumn = inPlace ? this : CloneAsDecimalColumn(); + DecimalDataFrameColumn newColumn = inPlace ? this : (DecimalDataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -494,7 +494,7 @@ internal DoubleDataFrameColumn MultiplyImplementation(DoubleDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DoubleDataFrameColumn newColumn = inPlace ? this : CloneAsDoubleColumn(); + DoubleDataFrameColumn newColumn = inPlace ? this : (DoubleDataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -507,7 +507,7 @@ internal SingleDataFrameColumn MultiplyImplementation(SingleDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - SingleDataFrameColumn newColumn = inPlace ? this : CloneAsSingleColumn(); + SingleDataFrameColumn newColumn = inPlace ? this : (SingleDataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -520,7 +520,7 @@ internal Int32DataFrameColumn MultiplyImplementation(Int32DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int32DataFrameColumn newColumn = inPlace ? this : CloneAsInt32Column(); + Int32DataFrameColumn newColumn = inPlace ? this : (Int32DataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -533,7 +533,7 @@ internal Int64DataFrameColumn MultiplyImplementation(Int64DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int64DataFrameColumn newColumn = inPlace ? this : CloneAsInt64Column(); + Int64DataFrameColumn newColumn = inPlace ? this : (Int64DataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -546,7 +546,7 @@ internal UInt32DataFrameColumn MultiplyImplementation(UInt32DataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt32DataFrameColumn newColumn = inPlace ? this : CloneAsUInt32Column(); + UInt32DataFrameColumn newColumn = inPlace ? this : (UInt32DataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -559,7 +559,7 @@ internal UInt64DataFrameColumn MultiplyImplementation(UInt64DataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt64DataFrameColumn newColumn = inPlace ? this : CloneAsUInt64Column(); + UInt64DataFrameColumn newColumn = inPlace ? this : (UInt64DataFrameColumn)Clone(); newColumn.ColumnContainer.Multiply(column.ColumnContainer); return newColumn; } @@ -712,7 +712,7 @@ internal DecimalDataFrameColumn DivideImplementation(DecimalDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DecimalDataFrameColumn newColumn = inPlace ? this : CloneAsDecimalColumn(); + DecimalDataFrameColumn newColumn = inPlace ? this : (DecimalDataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -725,7 +725,7 @@ internal DoubleDataFrameColumn DivideImplementation(DoubleDataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DoubleDataFrameColumn newColumn = inPlace ? this : CloneAsDoubleColumn(); + DoubleDataFrameColumn newColumn = inPlace ? this : (DoubleDataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -738,7 +738,7 @@ internal SingleDataFrameColumn DivideImplementation(SingleDataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - SingleDataFrameColumn newColumn = inPlace ? this : CloneAsSingleColumn(); + SingleDataFrameColumn newColumn = inPlace ? this : (SingleDataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -751,7 +751,7 @@ internal Int32DataFrameColumn DivideImplementation(Int32DataFrameColumn column, { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int32DataFrameColumn newColumn = inPlace ? this : CloneAsInt32Column(); + Int32DataFrameColumn newColumn = inPlace ? this : (Int32DataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -764,7 +764,7 @@ internal Int64DataFrameColumn DivideImplementation(Int64DataFrameColumn column, { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int64DataFrameColumn newColumn = inPlace ? this : CloneAsInt64Column(); + Int64DataFrameColumn newColumn = inPlace ? this : (Int64DataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -777,7 +777,7 @@ internal UInt32DataFrameColumn DivideImplementation(UInt32DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt32DataFrameColumn newColumn = inPlace ? this : CloneAsUInt32Column(); + UInt32DataFrameColumn newColumn = inPlace ? this : (UInt32DataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -790,7 +790,7 @@ internal UInt64DataFrameColumn DivideImplementation(UInt64DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt64DataFrameColumn newColumn = inPlace ? this : CloneAsUInt64Column(); + UInt64DataFrameColumn newColumn = inPlace ? this : (UInt64DataFrameColumn)Clone(); newColumn.ColumnContainer.Divide(column.ColumnContainer); return newColumn; } @@ -943,7 +943,7 @@ internal DecimalDataFrameColumn ModuloImplementation(DecimalDataFrameColumn colu { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DecimalDataFrameColumn newColumn = inPlace ? this : CloneAsDecimalColumn(); + DecimalDataFrameColumn newColumn = inPlace ? this : (DecimalDataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -956,7 +956,7 @@ internal DoubleDataFrameColumn ModuloImplementation(DoubleDataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - DoubleDataFrameColumn newColumn = inPlace ? this : CloneAsDoubleColumn(); + DoubleDataFrameColumn newColumn = inPlace ? this : (DoubleDataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -969,7 +969,7 @@ internal SingleDataFrameColumn ModuloImplementation(SingleDataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - SingleDataFrameColumn newColumn = inPlace ? this : CloneAsSingleColumn(); + SingleDataFrameColumn newColumn = inPlace ? this : (SingleDataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -982,7 +982,7 @@ internal Int32DataFrameColumn ModuloImplementation(Int32DataFrameColumn column, { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int32DataFrameColumn newColumn = inPlace ? this : CloneAsInt32Column(); + Int32DataFrameColumn newColumn = inPlace ? this : (Int32DataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -995,7 +995,7 @@ internal Int64DataFrameColumn ModuloImplementation(Int64DataFrameColumn column, { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - Int64DataFrameColumn newColumn = inPlace ? this : CloneAsInt64Column(); + Int64DataFrameColumn newColumn = inPlace ? this : (Int64DataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -1008,7 +1008,7 @@ internal UInt32DataFrameColumn ModuloImplementation(UInt32DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt32DataFrameColumn newColumn = inPlace ? this : CloneAsUInt32Column(); + UInt32DataFrameColumn newColumn = inPlace ? this : (UInt32DataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } @@ -1021,7 +1021,7 @@ internal UInt64DataFrameColumn ModuloImplementation(UInt64DataFrameColumn column { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - UInt64DataFrameColumn newColumn = inPlace ? this : CloneAsUInt64Column(); + UInt64DataFrameColumn newColumn = inPlace ? this : (UInt64DataFrameColumn)Clone(); newColumn.ColumnContainer.Modulo(column.ColumnContainer); return newColumn; } diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.tt b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.tt index 001aab32b1..89eede1a0c 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.tt +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.tt @@ -76,7 +76,7 @@ void GenerateAllBinaryCombinationsForMethod(string inputMethodName) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } - <#=fullReturnType#> newColumn = inPlace ? this : CloneAs<#=capitalizedReturnType#>Column(); + <#=fullReturnType#> newColumn = inPlace ? this : (<#=fullReturnType#>)Clone(); newColumn.ColumnContainer.<#=inputMethodName#>(column.ColumnContainer); return newColumn; } diff --git a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs index a6a41089e7..069aa3e94a 100644 --- a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs @@ -48,15 +48,16 @@ public ReadOnlySpan ReadOnlySpan get => (MemoryMarshal.Cast(ReadOnlyBuffer.Span)).Slice(0, Length); } - public int Length { get; internal set; } + public int Length { get; protected set; } - public ReadOnlyDataFrameBuffer(int numberOfValues = 8) + public ReadOnlyDataFrameBuffer(int length = 0) { - if ((long)numberOfValues * Size > MaxCapacity) + if ((long)length * Size > MaxCapacity) { - throw new ArgumentException($"{numberOfValues} exceeds buffer capacity", nameof(numberOfValues)); + throw new ArgumentException($"{length} exceeds buffer capacity", nameof(length)); } - _readOnlyBuffer = new byte[numberOfValues * Size]; + _readOnlyBuffer = new byte[length * Size]; + Length = length; } public ReadOnlyDataFrameBuffer(ReadOnlyMemory buffer, int length) diff --git a/test/Microsoft.Data.Analysis.Tests/BufferTests.cs b/test/Microsoft.Data.Analysis.Tests/BufferTests.cs index bc8f66d822..3a88e2eddc 100644 --- a/test/Microsoft.Data.Analysis.Tests/BufferTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/BufferTests.cs @@ -79,7 +79,6 @@ public void TestNullCounts() Assert.Equal(2, intColumn[3]); Assert.Null(intColumn[4]); Assert.Equal(3, intColumn[5]); - } [Fact] @@ -134,6 +133,20 @@ public void TestAppendMany() Assert.False(intColumn.IsValid(7)); } + [Fact] + public void TestClone() + { + PrimitiveDataFrameColumn intColumn = new PrimitiveDataFrameColumn("Int1", new int?[] { 1, 2, 3, 4, null }); + var copy = intColumn.Clone(); + + Assert.Equal(intColumn.Name, copy.Name); + Assert.Equal(intColumn.Length, copy.Length); + Assert.Equal(intColumn.DataType, copy.DataType); + + for (int i = 0; i < intColumn.Length; i++) + Assert.Equal(intColumn[i], copy[i]); + } + [Fact] public void TestBasicArrowStringColumn() { diff --git a/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs b/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs index e539c10907..35994e6a44 100644 --- a/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs +++ b/test/Microsoft.ML.Fairlearn.Tests/GridSearchTest.cs @@ -45,7 +45,7 @@ private DataFrame CreateDummyDataset() } // Data generated so it is identical from Binary_Classification.ipynb from Fairlearn.github on Github - private DataFrame CreateGridScearhDataset() + private DataFrame CreateGridSearchDataset() { float[] score_feature = new float[52]; int index = 0; @@ -89,7 +89,7 @@ public void TestGridSearchTrialRunner2() } }; var experiment = context.Auto().CreateExperiment(); - var df = CreateGridScearhDataset(); + var df = CreateGridSearchDataset(); var shuffledDataset = context.Data.ShuffleRows(df); var trainTestSplit = context.Data.TrainTestSplit(shuffledDataset, 0.2); var pipeline = context.Transforms.Categorical.OneHotHashEncoding("sensitiveFeature_encode", "sensitiveFeature")