Skip to content

Commit dcfeceb

Browse files
adamreeveraulcd
authored andcommitted
GH-41140: [C#] Account for offset and length in union arrays (#41165)
### Rationale for this change See #41140. This makes a sliced union array behave as expected without having to manually account for the array offset unless accessing the underlying buffers. ### What changes are included in this PR? Accounts for the offset and length when getting type ids, value offsets and field arrays for sparse and dense union arrays. ### Are these changes tested? Yes, I've updated the union array tests to cover this. ### Are there any user-facing changes? Yes, this is a user facing bug fix. * GitHub Issue: #41140 Authored-by: Adam Reeve <[email protected]> Signed-off-by: Curt Hagenlocher <[email protected]>
1 parent b28633c commit dcfeceb

File tree

4 files changed

+85
-19
lines changed

4 files changed

+85
-19
lines changed

csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public class DenseUnionArray : UnionArray
2424
{
2525
public ArrowBuffer ValueOffsetBuffer => Data.Buffers[1];
2626

27-
public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>();
27+
public ReadOnlySpan<int> ValueOffsets => ValueOffsetBuffer.Span.CastTo<int>().Slice(Offset, Length);
2828

2929
public DenseUnionArray(
3030
IArrowType dataType,
@@ -38,7 +38,6 @@ public DenseUnionArray(
3838
dataType, length, nullCount, offset, new[] { typeIds, valuesOffsetBuffer },
3939
children.Select(child => child.Data)))
4040
{
41-
_fields = children.ToArray();
4241
ValidateMode(UnionMode.Dense, Type.Mode);
4342
}
4443

csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ public SparseUnionArray(
3232
dataType, length, nullCount, offset, new[] { typeIds },
3333
children.Select(child => child.Data)))
3434
{
35-
_fields = children.ToArray();
3635
ValidateMode(UnionMode.Sparse, Type.Mode);
3736
}
3837

csharp/src/Apache.Arrow/Arrays/UnionArray.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public abstract class UnionArray : IArrowArray
2525
protected IReadOnlyList<IArrowArray> _fields;
2626

2727
public IReadOnlyList<IArrowArray> Fields =>
28-
LazyInitializer.EnsureInitialized(ref _fields, () => InitializeFields());
28+
LazyInitializer.EnsureInitialized(ref _fields, InitializeFields);
2929

3030
public ArrayData Data { get; }
3131

@@ -35,7 +35,7 @@ public abstract class UnionArray : IArrowArray
3535

3636
public ArrowBuffer TypeBuffer => Data.Buffers[0];
3737

38-
public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
38+
public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span.Slice(Offset, Length);
3939

4040
public int Length => Data.Length;
4141

@@ -106,7 +106,14 @@ private IReadOnlyList<IArrowArray> InitializeFields()
106106
IArrowArray[] result = new IArrowArray[Data.Children.Length];
107107
for (int i = 0; i < Data.Children.Length; i++)
108108
{
109-
result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]);
109+
var childData = Data.Children[i];
110+
if (Mode == UnionMode.Sparse && (Data.Offset != 0 || childData.Length != Data.Length))
111+
{
112+
// We only slice the child data for sparse mode,
113+
// so that the sliced value offsets remain valid in dense mode
114+
childData = childData.Slice(Data.Offset, Data.Length);
115+
}
116+
result[i] = ArrowArrayFactory.BuildArray(childData);
110117
}
111118
return result;
112119
}

csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// See the License for the specific language governing permissions and
1414
// limitations under the License.
1515

16+
using System;
1617
using System.Linq;
1718
using Apache.Arrow.Types;
1819
using Xunit;
@@ -24,7 +25,7 @@ public class UnionArrayTests
2425
[Theory]
2526
[InlineData(UnionMode.Sparse)]
2627
[InlineData(UnionMode.Dense)]
27-
public void UnionArray_IsNull(UnionMode mode)
28+
public void UnionArrayIsNull(UnionMode mode)
2829
{
2930
var (array, expectedNull) = BuildUnionArray(mode, 100);
3031

@@ -38,40 +39,100 @@ public void UnionArray_IsNull(UnionMode mode)
3839
[Theory]
3940
[InlineData(UnionMode.Sparse)]
4041
[InlineData(UnionMode.Dense)]
41-
public void UnionArray_Slice(UnionMode mode)
42+
public void UnionArraySlice(UnionMode mode)
4243
{
4344
var (array, expectedNull) = BuildUnionArray(mode, 10);
4445

4546
for (var offset = 0; offset < array.Length; ++offset)
4647
{
4748
for (var length = 0; length < array.Length - offset; ++length)
4849
{
49-
var slicedArray = ArrowArrayFactory.Slice(array, offset, length);
50+
var slicedArray = (UnionArray)ArrowArrayFactory.Slice(array, offset, length);
5051

5152
var nullCount = 0;
5253
for (var i = 0; i < slicedArray.Length; ++i)
5354
{
54-
// TODO: Shouldn't need to add offset in IsNull/IsValid calls,
55-
// see https://github.com/apache/arrow/issues/41140
56-
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(offset + i));
57-
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(offset + i));
55+
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
56+
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(i));
5857
nullCount += expectedNull[offset + i] ? 1 : 0;
58+
59+
CompareValue(array, offset + i, slicedArray, i);
5960
}
6061

61-
Assert.True(nullCount == slicedArray.NullCount, $"offset = {offset}, length = {length}");
6262
Assert.Equal(nullCount, slicedArray.NullCount);
6363
}
6464
}
6565
}
6666

67-
private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length)
67+
[Theory]
68+
[InlineData(UnionMode.Sparse)]
69+
[InlineData(UnionMode.Dense)]
70+
public void UnionArrayConstructedWithOffset(UnionMode mode)
71+
{
72+
const int length = 10;
73+
var (array, expectedNull) = BuildUnionArray(mode, length);
74+
75+
for (var offset = 0; offset < array.Length; ++offset)
76+
{
77+
var (slicedArray, _) = BuildUnionArray(mode, length, offset);
78+
79+
var nullCount = 0;
80+
for (var i = 0; i < slicedArray.Length; ++i)
81+
{
82+
Assert.Equal(expectedNull[offset + i], slicedArray.IsNull(i));
83+
Assert.Equal(!expectedNull[offset + i], slicedArray.IsValid(i));
84+
nullCount += expectedNull[offset + i] ? 1 : 0;
85+
86+
CompareValue(array, offset + i, slicedArray, i);
87+
}
88+
89+
Assert.Equal(nullCount, slicedArray.NullCount);
90+
}
91+
}
92+
93+
private static void CompareValue(UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
94+
{
95+
var typeId = originalArray.TypeIds[originalIndex];
96+
var sliceTypeId = slicedArray.TypeIds[sliceIndex];
97+
Assert.Equal(typeId, sliceTypeId);
98+
99+
switch (typeId)
100+
{
101+
case 0:
102+
CompareFieldValue<int, Int32Array>(typeId, originalArray, originalIndex, slicedArray, sliceIndex);
103+
break;
104+
case 1:
105+
CompareFieldValue<float, FloatArray>(typeId, originalArray, originalIndex, slicedArray, sliceIndex);
106+
break;
107+
default:
108+
throw new Exception($"Unexpected type id {typeId}");
109+
}
110+
}
111+
112+
private static void CompareFieldValue<T, TArray>(byte typeId, UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex)
113+
where T: struct
114+
where TArray : PrimitiveArray<T>
115+
{
116+
if (originalArray is DenseUnionArray denseOriginalArray)
117+
{
118+
Assert.IsType<DenseUnionArray>(slicedArray);
119+
120+
originalIndex = denseOriginalArray.ValueOffsets[originalIndex];
121+
sliceIndex = ((DenseUnionArray)slicedArray).ValueOffsets[sliceIndex];
122+
}
123+
var originalValue = ((TArray)originalArray.Fields[typeId]).GetValue(originalIndex);
124+
var sliceValue = ((TArray)slicedArray.Fields[typeId]).GetValue(sliceIndex);
125+
Assert.Equal(originalValue, sliceValue);
126+
}
127+
128+
private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode, int length, int offset=0)
68129
{
69130
var fields = new Field[]
70131
{
71132
new Field("field0", new Int32Type(), true),
72133
new Field("field1", new FloatType(), true),
73134
};
74-
var typeIds = fields.Select(f => (int) f.DataType.TypeId).ToArray();
135+
var typeIds = new[] { 0, 1 };
75136
var type = new UnionType(fields, typeIds, mode);
76137

77138
var nullCount = 0;
@@ -85,7 +146,7 @@ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode,
85146
{
86147
var isNull = i % 3 == 0;
87148
expectedNull[i] = isNull;
88-
nullCount += isNull ? 1 : 0;
149+
nullCount += (isNull && i >= offset) ? 1 : 0;
89150

90151
if (i % 2 == 0)
91152
{
@@ -140,8 +201,8 @@ private static (UnionArray array, bool[] isNull) BuildUnionArray(UnionMode mode,
140201
};
141202

142203
UnionArray array = mode == UnionMode.Dense
143-
? new DenseUnionArray(type, length, children, typeIdsBuffer, valuesOffsetBuffer, nullCount)
144-
: new SparseUnionArray(type, length, children, typeIdsBuffer, nullCount);
204+
? new DenseUnionArray(type, length - offset, children, typeIdsBuffer, valuesOffsetBuffer, nullCount, offset)
205+
: new SparseUnionArray(type, length - offset, children, typeIdsBuffer, nullCount, offset);
145206

146207
return (array, expectedNull);
147208
}

0 commit comments

Comments
 (0)