From c77f5f7297118cc05dfe9507a452c356ab9afe38 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Mon, 16 Oct 2023 15:01:42 +0300 Subject: [PATCH 1/2] Avoid Boxing/Unboxing on accessing elements of VBufferDataFrameColumn --- .../PrimitiveDataFrameColumn.cs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 54fc1744a2..7991d6b728 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -250,17 +250,7 @@ protected override void SetValue(long rowIndex, object value) public new T? this[long rowIndex] { get => GetTypedValue(rowIndex); - set - { - if (value == null || value.GetType() == typeof(T)) - { - _columnContainer[rowIndex] = value; - } - else - { - throw new ArgumentException(string.Format(Strings.MismatchedValueType, DataType), nameof(value)); - } - } + set => _columnContainer[rowIndex] = value; } public override double Median() From f0ac9544a3ae7c277c11ea2d33856a78b884f324 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Tue, 17 Oct 2023 13:32:40 +0300 Subject: [PATCH 2/2] Avoid boxing for vbuffer column --- .../VBufferDataFrameColumn.cs | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs b/src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs index fc15c873cb..064f9cf433 100644 --- a/src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs @@ -18,7 +18,6 @@ namespace Microsoft.Data.Analysis /// public partial class VBufferDataFrameColumn : DataFrameColumn, IEnumerable> { - public static int MaxCapacity = ArrayUtility.ArrayMaxSize / Unsafe.SizeOf>(); private readonly List>> _vBuffers = new List>>(); // To store more than intMax number of vbuffers @@ -56,9 +55,7 @@ public VBufferDataFrameColumn(string name, IEnumerable> values) : bas } } - private long _nullCount; - - public override long NullCount => _nullCount; + public override long NullCount => 0; protected internal override void Resize(long length) { @@ -94,6 +91,11 @@ private int GetBufferIndexContainingRowIndex(long rowIndex) } protected override object GetValue(long rowIndex) + { + return GetTypedValue(rowIndex); + } + + protected VBuffer GetTypedValue(long rowIndex) { int bufferIndex = GetBufferIndexContainingRowIndex(rowIndex); return _vBuffers[bufferIndex][(int)(rowIndex % MaxCapacity)]; @@ -118,19 +120,13 @@ protected override IReadOnlyList GetValues(long startIndex, int length) protected override void SetValue(long rowIndex, object value) { - if (value == null || value is VBuffer) + if (value == null) { - int bufferIndex = GetBufferIndexContainingRowIndex(rowIndex); - int bufferOffset = (int)(rowIndex % MaxCapacity); - var oldValue = _vBuffers[bufferIndex][bufferOffset]; - _vBuffers[bufferIndex][bufferOffset] = (VBuffer)value; - if (!oldValue.Equals((VBuffer)value)) - { - if (value == null) - _nullCount++; - if (oldValue.Length == 0 && _nullCount > 0) - _nullCount--; - } + throw new NotSupportedException("Null values are not supported by VBufferDataFrameColumn"); + } + else if (value is VBuffer vbuffer) + { + SetTypedValue(rowIndex, vbuffer); } else { @@ -138,10 +134,16 @@ protected override void SetValue(long rowIndex, object value) } } + protected void SetTypedValue(long rowIndex, VBuffer value) + { + int bufferIndex = GetBufferIndexContainingRowIndex(rowIndex); + _vBuffers[bufferIndex][(int)(rowIndex % MaxCapacity)] = value; + } + public new VBuffer this[long rowIndex] { - get => (VBuffer)GetValue(rowIndex); - set => SetValue(rowIndex, value); + get => GetTypedValue(rowIndex); + set => SetTypedValue(rowIndex, value); } ///