Skip to content

Commit 0ccca6a

Browse files
committed
Fix DataFrame incorrectly sets column value for index higher than Buffer.MaxCapacity
1 parent 7fe293d commit 0ccca6a

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,13 @@ private byte SetBit(byte curBitMap, int index, bool value)
359359
}
360360

361361
// private function. Faster to use when we already have a span since it avoids indexing
362-
private void SetValidityBit(Span<byte> bitMapBufferSpan, int index, bool value)
362+
private void SetValidityBit(Span<byte> validitySpan, int index, bool value)
363363
{
364364
int bitMapBufferIndex = (int)((uint)index / 8);
365-
Debug.Assert(bitMapBufferSpan.Length >= bitMapBufferIndex);
366-
byte curBitMap = bitMapBufferSpan[bitMapBufferIndex];
365+
Debug.Assert(validitySpan.Length >= bitMapBufferIndex);
366+
byte curBitMap = validitySpan[bitMapBufferIndex];
367367
byte newBitMap = SetBit(curBitMap, index, value);
368-
bitMapBufferSpan[bitMapBufferIndex] = newBitMap;
368+
validitySpan[bitMapBufferIndex] = newBitMap;
369369
}
370370

371371
/// <summary>
@@ -457,25 +457,25 @@ public T? this[long rowIndex]
457457
return null;
458458
}
459459
int arrayIndex = GetArrayContainingRowIndex(rowIndex);
460-
rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer<T>.MaxCapacity;
461-
return Buffers[arrayIndex][(int)rowIndex];
460+
var bufferOffset = (int)(rowIndex % ReadOnlyDataFrameBuffer<T>.MaxCapacity);
461+
return Buffers[arrayIndex][bufferOffset];
462462
}
463463
set
464464
{
465465
int arrayIndex = GetArrayContainingRowIndex(rowIndex);
466-
rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer<T>.MaxCapacity;
466+
var bufferOffset = (int)(rowIndex % ReadOnlyDataFrameBuffer<T>.MaxCapacity);
467467

468468
Buffers.GetOrCreateMutable(arrayIndex);
469469
NullBitMapBuffers.GetOrCreateMutable(arrayIndex);
470470

471471
if (value.HasValue)
472472
{
473-
Buffers[arrayIndex][(int)rowIndex] = value.Value;
473+
Buffers[arrayIndex][bufferOffset] = value.Value;
474474
SetValidityBit(rowIndex, true);
475475
}
476476
else
477477
{
478-
Buffers[arrayIndex][(int)rowIndex] = default;
478+
Buffers[arrayIndex][bufferOffset] = default;
479479
SetValidityBit(rowIndex, false);
480480
}
481481
}

test/Microsoft.Data.Analysis.Tests/BufferTests.cs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,27 @@
88
using System.Text;
99
using Apache.Arrow;
1010
using Microsoft.ML.TestFramework.Attributes;
11+
using Newtonsoft.Json.Linq;
1112
using Xunit;
1213

1314
namespace Microsoft.Data.Analysis.Tests
1415
{
1516
public class BufferTests
1617
{
18+
[X64Fact("32-bit doesn't allow to allocate more than 2 Gb")]
19+
public void TestGetterAndSetterForColumnsGreaterThanMaxCapacity()
20+
{
21+
const int MaxCapacityInBytes = 2147483591;
22+
23+
var length = MaxCapacityInBytes + 5;
24+
var column = new PrimitiveDataFrameColumn<byte>("LargeColumn", length);
25+
var index = length - 1;
26+
column[index] = 33;
27+
28+
Assert.Equal((byte)33, column[index]);
29+
Assert.Null(column[index % MaxCapacityInBytes]);
30+
}
31+
1732
[Fact]
1833
public void TestNullCounts()
1934
{
@@ -446,15 +461,15 @@ public void TestArrowStringColumnClone()
446461
Assert.Null(clone[i]);
447462
}
448463

449-
[X64Fact("32-bit dosn't allow to allocate more than 2 Gb")]
464+
[X64Fact("32-bit doesn't allow to allocate more than 2 Gb")]
450465
public void TestAppend_SizeMoreThanMaxBufferCapacity()
451466
{
452467
//Check appending value, than can increase buffer size over MaxCapacity (default strategy is to double buffer capacity)
453468
PrimitiveDataFrameColumn<byte> intColumn = new PrimitiveDataFrameColumn<byte>("Byte1", int.MaxValue / 2 - 1);
454469
intColumn.Append(10);
455470
}
456471

457-
[X64Fact("32-bit dosn't allow to allocate more than 2 Gb")]
472+
[X64Fact("32-bit doesn't allow to allocate more than 2 Gb")]
458473
public void TestAppendMany_SizeMoreThanMaxBufferCapacity()
459474
{
460475
const int MaxCapacityInBytes = 2147483591;

0 commit comments

Comments
 (0)