From 871bed6087d7d3c751c7530714e585c7369d1e94 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 03:36:37 -0700 Subject: [PATCH 1/8] Try to clean up PrimitiveColumnContainer --- .../DataFrameBuffer.cs | 2 +- ...imitiveColumnContainer.BinaryOperations.cs | 2 +- .../PrimitiveColumnContainer.cs | 165 +++++++++--------- .../PrimitiveColumnContainerHelpers.cs | 31 ++++ .../PrimitiveDataFrameColumnArithmetic.cs | 6 +- .../PrimitiveDataFrameColumnComputations.cs | 6 +- .../ReadOnlyDataFrameBuffer.cs | 2 +- 7 files changed, 123 insertions(+), 91 deletions(-) create mode 100644 src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs diff --git a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs index 44a8d3faaa..352b853ddc 100644 --- a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs @@ -13,7 +13,7 @@ namespace Microsoft.Data.Analysis /// /// internal class DataFrameBuffer : ReadOnlyDataFrameBuffer - where T : struct + where T : unmanaged { private Memory _memory; diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs index 4020c6de7e..64b12c0607 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs @@ -8,7 +8,7 @@ namespace Microsoft.Data.Analysis { internal partial class PrimitiveColumnContainer - where T : struct + where T : unmanaged { public PrimitiveColumnContainer Add(PrimitiveColumnContainer right) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 063a9b50af..f23f963ac0 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -17,7 +17,7 @@ namespace Microsoft.Data.Analysis /// /// internal partial class PrimitiveColumnContainer : IEnumerable - where T : struct + where T : unmanaged { public IList> Buffers = new List>(); @@ -90,6 +90,7 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory(buffer, length); } Buffers.Add(dataBuffer); + int bitMapBufferLength = (length + 7) / 8; ReadOnlyDataFrameBuffer nullDataFrameBuffer; if (nullBitMap.IsEmpty) @@ -127,31 +128,40 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory 0) + /* + var remaining = length; + while (remaining > 0) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer lastBuffer = (DataFrameBuffer)Buffers[Buffers.Count - 1]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) + + if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); + Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - int allocatable = (int)Math.Min(length, ReadOnlyDataFrameBuffer.MaxCapacity); - lastBuffer.EnsureCapacity(allocatable); - DataFrameBuffer lastNullBitMapBuffer = (DataFrameBuffer)(NullBitMapBuffers[NullBitMapBuffers.Count - 1]); + + DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); + + int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); + + mutableLastBuffer.EnsureCapacity(allocatable); + mutableLastBuffer.Length = allocatable; + + DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetMutable(NullBitMapBuffers.Count - 1); int nullBufferAllocatable = (allocatable + 7) / 8; lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); - lastBuffer.Length = allocatable; lastNullBitMapBuffer.Length = nullBufferAllocatable; - length -= allocatable; - Length += lastBuffer.Length; - NullCount += lastBuffer.Length; - } + + remaining -= allocatable; + Length += mutableLastBuffer.Length; + NullCount += mutableLastBuffer.Length; + }*/ + + AppendMany(null, length); } public void Resize(long length) @@ -168,16 +178,14 @@ public void Append(T? value) Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - int bufferIndex = Buffers.Count - 1; - ReadOnlyDataFrameBuffer lastBuffer = Buffers[bufferIndex]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) + + if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); + Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = DataFrameBuffer.GetMutableBuffer(lastBuffer); - Buffers[bufferIndex] = mutableLastBuffer; + + DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); mutableLastBuffer.Append(value ?? default); SetValidityBit(Length, value.HasValue); Length++; @@ -190,42 +198,50 @@ public void AppendMany(T? value, long count) NullCount += count; } - while (count > 0) + var remaining = count; + while (remaining > 0) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - int bufferIndex = Buffers.Count - 1; - ReadOnlyDataFrameBuffer lastBuffer = Buffers[bufferIndex]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) + + if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); + Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = DataFrameBuffer.GetMutableBuffer(lastBuffer); - Buffers[bufferIndex] = mutableLastBuffer; - int allocatable = (int)Math.Min(count, ReadOnlyDataFrameBuffer.MaxCapacity); + + + DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); + + int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); + mutableLastBuffer.EnsureCapacity(allocatable); - mutableLastBuffer.RawSpan.Slice(lastBuffer.Length, allocatable).Fill(value ?? default); - mutableLastBuffer.Length += allocatable; - Length += allocatable; + mutableLastBuffer.Length = allocatable; + + DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetMutable(NullBitMapBuffers.Count - 1); + int nullBufferAllocatable = (allocatable + 7) / 8; + lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); + lastNullBitMapBuffer.Length = nullBufferAllocatable; - int nullBitMapBufferIndex = NullBitMapBuffers.Count - 1; - ReadOnlyDataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers[nullBitMapBufferIndex]; - DataFrameBuffer mutableLastNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(lastNullBitMapBuffer); - NullBitMapBuffers[nullBitMapBufferIndex] = mutableLastNullBitMapBuffer; - int nullBitMapAllocatable = (int)(((uint)allocatable) / 8) + 1; - mutableLastNullBitMapBuffer.EnsureCapacity(nullBitMapAllocatable); - _modifyNullCountWhileIndexing = false; - for (long i = Length - count; i < Length; i++) + remaining -= allocatable; + Length += mutableLastBuffer.Length; + + // PR Question: Does this need to be called if it's value is null/doesn't have value? + if (value.HasValue) { - SetValidityBit(i, value.HasValue ? true : false); + mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length, allocatable).Fill(value ?? default); + + _modifyNullCountWhileIndexing = false; + for (long i = Length - remaining; i < Length; i++) + { + SetValidityBit(i, value.HasValue); + } + _modifyNullCountWhileIndexing = true; } - _modifyNullCountWhileIndexing = true; - count -= allocatable; + } } @@ -233,21 +249,18 @@ public void ApplyElementwise(Func func) { for (int b = 0; b < Buffers.Count; b++) { - ReadOnlyDataFrameBuffer buffer = Buffers[b]; long prevLength = checked(Buffers[0].Length * b); - DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - Buffers[b] = mutableBuffer; - Span span = mutableBuffer.Span; - DataFrameBuffer mutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[b]); - NullBitMapBuffers[b] = mutableNullBitMapBuffer; - Span nullBitMapSpan = mutableNullBitMapBuffer.Span; - for (int i = 0; i < span.Length; i++) + + Span mutableBuffer = Buffers.GetMutable(b).Span; + Span mutableNullBitMapBuffer = NullBitMapBuffers.GetMutable(b).Span; + + for (int i = 0; i < mutableBuffer.Length; i++) { long curIndex = i + prevLength; - bool isValid = IsValid(nullBitMapSpan, i); - T? value = func(isValid ? span[i] : default(T?), curIndex); - span[i] = value.GetValueOrDefault(); - SetValidityBit(nullBitMapSpan, i, value != null); + bool isValid = IsValid(mutableNullBitMapBuffer, i); + T? value = func(isValid ? mutableBuffer[i] : default(T?), curIndex); + mutableBuffer[i] = value.GetValueOrDefault(); + SetValidityBit(mutableNullBitMapBuffer, i, value != null); } } } @@ -257,31 +270,20 @@ public void Apply(Func func, PrimitiveColumnContainer buffer = Buffers[b]; long prevLength = checked(Buffers[0].Length * b); - DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - Buffers[b] = mutableBuffer; - Span span = mutableBuffer.Span; - DataFrameBuffer mutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[b]); - NullBitMapBuffers[b] = mutableNullBitMapBuffer; - Span nullBitMapSpan = mutableNullBitMapBuffer.Span; - - ReadOnlyDataFrameBuffer resultBuffer = resultContainer.Buffers[b]; - long resultPrevLength = checked(resultContainer.Buffers[0].Length * b); - DataFrameBuffer resultMutableBuffer = DataFrameBuffer.GetMutableBuffer(resultBuffer); - resultContainer.Buffers[b] = resultMutableBuffer; - Span resultSpan = resultMutableBuffer.Span; - DataFrameBuffer resultMutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(resultContainer.NullBitMapBuffers[b]); - resultContainer.NullBitMapBuffers[b] = resultMutableNullBitMapBuffer; - Span resultNullBitMapSpan = resultMutableNullBitMapBuffer.Span; + var sourceBuffer = Buffers[b]; + var sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan; - for (int i = 0; i < span.Length; i++) + Span mutableResultBuffer = resultContainer.Buffers.GetMutable(b).Span; + Span mutableResultNullBitMapBuffers = resultContainer.NullBitMapBuffers.GetMutable(b).Span; + + for (int i = 0; i < sourceBuffer.Length; i++) { long curIndex = i + prevLength; - bool isValid = IsValid(nullBitMapSpan, i); - TResult? value = func(isValid ? span[i] : default(T?)); - resultSpan[i] = value.GetValueOrDefault(); - SetValidityBit(resultNullBitMapSpan, i, value != null); + bool isValid = IsValid(sourceNullBitMap, i); + TResult? value = func(isValid ? sourceBuffer[i] : default(T?)); + mutableResultBuffer[i] = value.GetValueOrDefault(); + resultContainer.SetValidityBit(mutableResultNullBitMapBuffers, i, value != null); } } } @@ -448,11 +450,10 @@ public T? this[long rowIndex] { int arrayIndex = GetArrayContainingRowIndex(rowIndex); rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer.MaxCapacity; - ReadOnlyDataFrameBuffer buffer = Buffers[arrayIndex]; - DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - Buffers[arrayIndex] = mutableBuffer; - DataFrameBuffer mutableNullBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[arrayIndex]); - NullBitMapBuffers[arrayIndex] = mutableNullBuffer; + + Buffers.EnsureMutable(arrayIndex); + NullBitMapBuffers.EnsureMutable(arrayIndex); + if (value.HasValue) { Buffers[arrayIndex][(int)rowIndex] = value.Value; diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs new file mode 100644 index 0000000000..50b862d7cf --- /dev/null +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; + +namespace Microsoft.Data.Analysis +{ + internal static class PrimitiveColumnContainerHelpers + { + internal static DataFrameBuffer GetMutable(this IList> bufferList, int index) + where T : unmanaged + { + ReadOnlyDataFrameBuffer sourceBuffer = bufferList[index]; + DataFrameBuffer mutableBuffer = sourceBuffer as DataFrameBuffer; + + if (mutableBuffer == null) + { + mutableBuffer = DataFrameBuffer.GetMutableBuffer(sourceBuffer); + } + + return mutableBuffer; + } + + internal static void EnsureMutable(this IList> bufferList, int index) + where T : unmanaged + { + bufferList.GetMutable(index); + } + } +} diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs index f432805185..5498e9bae0 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs @@ -11,7 +11,7 @@ namespace Microsoft.Data.Analysis { internal interface IPrimitiveDataFrameColumnArithmetic - where T : struct + where T : unmanaged { void Add(PrimitiveColumnContainer left, PrimitiveColumnContainer right); void Add(PrimitiveColumnContainer column, T scalar); @@ -54,7 +54,7 @@ internal interface IPrimitiveDataFrameColumnArithmetic } internal static class PrimitiveDataFrameColumnArithmetic - where T : struct + where T : unmanaged { public static IPrimitiveDataFrameColumnArithmetic Instance { get; } = PrimitiveDataFrameColumnArithmetic.GetArithmetic(); } @@ -62,7 +62,7 @@ internal static class PrimitiveDataFrameColumnArithmetic internal static class PrimitiveDataFrameColumnArithmetic { public static IPrimitiveDataFrameColumnArithmetic GetArithmetic() - where T : struct + where T : unmanaged { if (typeof(T) == typeof(bool)) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs index a4c5d73b0d..4349caf4b7 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs @@ -12,7 +12,7 @@ namespace Microsoft.Data.Analysis { internal interface IPrimitiveColumnComputation - where T : struct + where T : unmanaged { void Abs(PrimitiveColumnContainer column); void All(PrimitiveColumnContainer column, out bool ret); @@ -37,7 +37,7 @@ internal interface IPrimitiveColumnComputation } internal static class PrimitiveColumnComputation - where T : struct + where T : unmanaged { public static IPrimitiveColumnComputation Instance { get; } = PrimitiveColumnComputation.GetComputation(); } @@ -45,7 +45,7 @@ internal static class PrimitiveColumnComputation internal static class PrimitiveColumnComputation { public static IPrimitiveColumnComputation GetComputation() - where T : struct + where T : unmanaged { if (typeof(T) == typeof(bool)) { diff --git a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs index 59394eed8e..2bc41ebe51 100644 --- a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs @@ -15,7 +15,7 @@ namespace Microsoft.Data.Analysis /// /// internal class ReadOnlyDataFrameBuffer - where T : struct + where T : unmanaged { private readonly ReadOnlyMemory _readOnlyBuffer; From ec3fb29b35c6398fe006667083ae2cd11786ad52 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 19:59:01 -0700 Subject: [PATCH 2/8] Fix helper --- src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs index 50b862d7cf..6abe5f755c 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs @@ -17,8 +17,10 @@ internal static DataFrameBuffer GetMutable(this IList.GetMutableBuffer(sourceBuffer); + bufferList[index] = mutableBuffer; } + return mutableBuffer; } From 1d7a2b31fe9280a60c36f363d121cd021fcad9a9 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 20:02:31 -0700 Subject: [PATCH 3/8] Remove duplicate --- .../PrimitiveColumnContainer.cs | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index f23f963ac0..709ce47dd6 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -128,39 +128,6 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory 0) - { - if (Buffers.Count == 0) - { - Buffers.Add(new DataFrameBuffer()); - NullBitMapBuffers.Add(new DataFrameBuffer()); - } - - if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) - { - Buffers.Add(new DataFrameBuffer()); - NullBitMapBuffers.Add(new DataFrameBuffer()); - } - - DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); - - int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); - - mutableLastBuffer.EnsureCapacity(allocatable); - mutableLastBuffer.Length = allocatable; - - DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetMutable(NullBitMapBuffers.Count - 1); - int nullBufferAllocatable = (allocatable + 7) / 8; - lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); - lastNullBitMapBuffer.Length = nullBufferAllocatable; - - remaining -= allocatable; - Length += mutableLastBuffer.Length; - NullCount += mutableLastBuffer.Length; - }*/ - AppendMany(null, length); } From 1fac8ab1637c5de1e177a19970aaeeae30449f81 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 20:46:29 -0700 Subject: [PATCH 4/8] More clean up --- .../PrimitiveColumnContainer.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 709ce47dd6..d1a0b5cc0a 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -182,33 +182,32 @@ public void AppendMany(T? value, long count) DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); - int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); - mutableLastBuffer.EnsureCapacity(allocatable); - mutableLastBuffer.Length = allocatable; DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetMutable(NullBitMapBuffers.Count - 1); int nullBufferAllocatable = (allocatable + 7) / 8; lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); - lastNullBitMapBuffer.Length = nullBufferAllocatable; - remaining -= allocatable; - Length += mutableLastBuffer.Length; - // PR Question: Does this need to be called if it's value is null/doesn't have value? + mutableLastBuffer.Length += allocatable; + lastNullBitMapBuffer.Length += nullBufferAllocatable; + Length += allocatable; + if (value.HasValue) { - mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length, allocatable).Fill(value ?? default); + mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length - allocatable - 1, allocatable).Fill(value ?? default); _modifyNullCountWhileIndexing = false; - for (long i = Length - remaining; i < Length; i++) + for (long i = Length - allocatable; i < Length; i++) { SetValidityBit(i, value.HasValue); } _modifyNullCountWhileIndexing = true; } + + remaining -= allocatable; } } From 1c91301777cd703b114e6ce5d7ec0f4b45811794 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 20:49:59 -0700 Subject: [PATCH 5/8] Fix slice --- src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index d1a0b5cc0a..ef6d7b7d46 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -196,7 +196,7 @@ public void AppendMany(T? value, long count) if (value.HasValue) { - mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length - allocatable - 1, allocatable).Fill(value ?? default); + mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length - allocatable, allocatable).Fill(value ?? default); _modifyNullCountWhileIndexing = false; for (long i = Length - allocatable; i < Length; i++) From 7a6d91176213ac3ae2f60a0550d2ba378d023d17 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 20:52:33 -0700 Subject: [PATCH 6/8] Update name to GetOrCreateMutable --- .../PrimitiveColumnContainer.cs | 18 +++++++++--------- .../PrimitiveColumnContainerHelpers.cs | 8 +------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index ef6d7b7d46..70a3615d3d 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -152,7 +152,7 @@ public void Append(T? value) NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); + DataFrameBuffer mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1); mutableLastBuffer.Append(value ?? default); SetValidityBit(Length, value.HasValue); Length++; @@ -181,11 +181,11 @@ public void AppendMany(T? value, long count) } - DataFrameBuffer mutableLastBuffer = Buffers.GetMutable(Buffers.Count - 1); + DataFrameBuffer mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1); int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); mutableLastBuffer.EnsureCapacity(allocatable); - DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetMutable(NullBitMapBuffers.Count - 1); + DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(NullBitMapBuffers.Count - 1); int nullBufferAllocatable = (allocatable + 7) / 8; lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); @@ -217,8 +217,8 @@ public void ApplyElementwise(Func func) { long prevLength = checked(Buffers[0].Length * b); - Span mutableBuffer = Buffers.GetMutable(b).Span; - Span mutableNullBitMapBuffer = NullBitMapBuffers.GetMutable(b).Span; + Span mutableBuffer = Buffers.GetOrCreateMutable(b).Span; + Span mutableNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(b).Span; for (int i = 0; i < mutableBuffer.Length; i++) { @@ -240,8 +240,8 @@ public void Apply(Func func, PrimitiveColumnContainer mutableResultBuffer = resultContainer.Buffers.GetMutable(b).Span; - Span mutableResultNullBitMapBuffers = resultContainer.NullBitMapBuffers.GetMutable(b).Span; + Span mutableResultBuffer = resultContainer.Buffers.GetOrCreateMutable(b).Span; + Span mutableResultNullBitMapBuffers = resultContainer.NullBitMapBuffers.GetOrCreateMutable(b).Span; for (int i = 0; i < sourceBuffer.Length; i++) { @@ -417,8 +417,8 @@ public T? this[long rowIndex] int arrayIndex = GetArrayContainingRowIndex(rowIndex); rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer.MaxCapacity; - Buffers.EnsureMutable(arrayIndex); - NullBitMapBuffers.EnsureMutable(arrayIndex); + Buffers.GetOrCreateMutable(arrayIndex); + NullBitMapBuffers.GetOrCreateMutable(arrayIndex); if (value.HasValue) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs index 6abe5f755c..22ef32d979 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs @@ -8,7 +8,7 @@ namespace Microsoft.Data.Analysis { internal static class PrimitiveColumnContainerHelpers { - internal static DataFrameBuffer GetMutable(this IList> bufferList, int index) + internal static DataFrameBuffer GetOrCreateMutable(this IList> bufferList, int index) where T : unmanaged { ReadOnlyDataFrameBuffer sourceBuffer = bufferList[index]; @@ -23,11 +23,5 @@ internal static DataFrameBuffer GetMutable(this IList(this IList> bufferList, int index) - where T : unmanaged - { - bufferList.GetMutable(index); - } } } From 0b8575d09ac980edf61c47ef267fef06ad5c2358 Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Fri, 5 May 2023 20:56:05 -0700 Subject: [PATCH 7/8] Use null instead of default for nullable values. --- src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 70a3615d3d..989883d2a7 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -224,7 +224,7 @@ public void ApplyElementwise(Func func) { long curIndex = i + prevLength; bool isValid = IsValid(mutableNullBitMapBuffer, i); - T? value = func(isValid ? mutableBuffer[i] : default(T?), curIndex); + T? value = func(isValid ? mutableBuffer[i] : null, curIndex); mutableBuffer[i] = value.GetValueOrDefault(); SetValidityBit(mutableNullBitMapBuffer, i, value != null); } @@ -247,7 +247,7 @@ public void Apply(Func func, PrimitiveColumnContainer Date: Fri, 5 May 2023 21:57:59 -0700 Subject: [PATCH 8/8] Clean up Apply methods. --- src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 989883d2a7..830440445e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -180,7 +180,6 @@ public void AppendMany(T? value, long count) NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1); int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); mutableLastBuffer.EnsureCapacity(allocatable); @@ -213,9 +212,10 @@ public void AppendMany(T? value, long count) public void ApplyElementwise(Func func) { + var bufferMaxCapacity = ReadOnlyDataFrameBuffer.MaxCapacity; for (int b = 0; b < Buffers.Count; b++) { - long prevLength = checked(Buffers[0].Length * b); + long prevLength = checked(bufferMaxCapacity * b); Span mutableBuffer = Buffers.GetOrCreateMutable(b).Span; Span mutableNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(b).Span; @@ -234,9 +234,10 @@ public void ApplyElementwise(Func func) public void Apply(Func func, PrimitiveColumnContainer resultContainer) where TResult : unmanaged { + var bufferMaxCapacity = ReadOnlyDataFrameBuffer.MaxCapacity; for (int b = 0; b < Buffers.Count; b++) { - long prevLength = checked(Buffers[0].Length * b); + long prevLength = checked(bufferMaxCapacity * b); var sourceBuffer = Buffers[b]; var sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan; @@ -245,7 +246,6 @@ public void Apply(Func func, PrimitiveColumnContainer