Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions vector/src/main/codegen/templates/UnionMapWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -243,4 +243,16 @@ public ExtensionWriter extension(ArrowType type) {
return super.extension(type);
}
}

@Override
public FixedSizeBinaryWriter fixedSizeBinary() {
switch (mode) {
case KEY:
return entryWriter.fixedSizeBinary(MapVector.KEY_NAME);
case VALUE:
return entryWriter.fixedSizeBinary(MapVector.VALUE_NAME);
default:
return this;
}
}
}
214 changes: 214 additions & 0 deletions vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it technically works but I find it very confusing to recycle the test, especially since the map value was always BigInt before. I'd rather see a new test that exercises both the key/value paths explicitly and is explicitly marked as a regression test.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lidavidm , thank you for looking into it! Sorry for the confusion. Let me introduce a separate unit test.

--

Could you please clarify what do you mean by

explicitly marked as a regression test

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/** Regression test for https://github.com/apache/arrow-java/issues/586 */
@Test
void mapFixedSizeBinary() {
  // ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lidavidm , I've implemented a separate unit test for fixedSizeBinary.

However, I came across an issue. When I call writer.key().fixedSizeBinary() for the first time, it fails with NPE, as it falls back to NullableStructWriter which requires writer to be not-null (checkout the testFixedSizeBinaryWriterInitializationError test).
It work when I first initialize writers for both key and value fields with other writes, e.g. bigInt (checkout testFixedSizeBinaryWriter test).

Could you help me here? What would be the right way to initialize writes for key and value fields for the UnionMapWriter?

Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
*/
package org.apache.arrow.vector;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.ByteBuffer;
Expand All @@ -41,6 +43,7 @@
import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
import org.apache.arrow.vector.complex.writer.FieldWriter;
import org.apache.arrow.vector.holder.UuidHolder;
import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
Expand Down Expand Up @@ -1359,4 +1362,215 @@ public void testCopyFromForExtensionType() throws Exception {
assertEquals(u2, actualUuid);
}
}

private FixedSizeBinaryHolder getFixedSizeBinaryHolder(byte[] array) {
FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder();
holder.byteWidth = array.length;
holder.buffer = allocator.buffer(array.length);
for (int i = 0; i < array.length; i++) {
holder.buffer.setByte(i, array[i]);
}

return holder;
}

/**
* Regression test for GH-586: UnionMapWriter.fixedSizeBinary() should properly delegate to the
* entry writer for both key and value paths.
*/
@Test
public void testFixedSizeBinaryWriter() {
try (MapVector mapVector = MapVector.empty("map_vector", allocator, false)) {
UnionMapWriter writer = mapVector.getWriter();
writer.allocate();

// populate input vector with the following records
// {1 -> 2, 2 -> 3} - is used to initialize `key` and `value` writers - todo: this needs to be
// removed (test should work without it)
// {1 -> [11, 22], 2 -> [32, 21]}
// null
// {[11, 22] -> 1, [32, 21] -> 2}
// {[11, 22] -> null}
// {null -> [32, 21]} - wrong "for a given entry, the "key" is non-nullable" - todo: it
// shouldn't work. Should it?
writer.setPosition(0); // optional
writer.startMap();
writer.startEntry();
writer.key().bigInt().writeBigInt(1);
writer.value().bigInt().writeBigInt(2);
writer.endEntry();
writer.startEntry();
writer.key().bigInt().writeBigInt(2);
writer.value().bigInt().writeBigInt(3);
writer.endEntry();
writer.endMap();

// {1 -> [11, 22], 2 -> [32, 21]}
FixedSizeBinaryHolder holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
FixedSizeBinaryHolder holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
writer.setPosition(1);
writer.startMap();
writer.startEntry();
writer.key().bigInt().writeBigInt(1);
writer.value().fixedSizeBinary().write(holder1);
writer.endEntry();
holder1.buffer.close();
writer.startEntry();
writer.key().bigInt().writeBigInt(2);
writer.value().fixedSizeBinary().write(holder2);
writer.endEntry();
writer.endMap();
holder2.buffer.close();

// {[11, 22] -> 1, [32, 21] -> 2}
holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
writer.setPosition(3);
writer.startMap();
writer.startEntry();
writer.key().fixedSizeBinary().write(holder1);
writer.value().bigInt().writeBigInt(1);
writer.endEntry();
holder1.buffer.close();
writer.startEntry();
writer.key().fixedSizeBinary().write(holder2);
writer.value().bigInt().writeBigInt(2);
writer.endEntry();
writer.endMap();
holder2.buffer.close();

// {[11, 22] -> null}
holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
writer.setPosition(4);
writer.startMap();
writer.startEntry();
writer.key().fixedSizeBinary().write(holder1);
writer.endEntry();
writer.endMap();
holder1.buffer.close();

// {null -> [32, 21]}
holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});
writer.setPosition(5);
writer.startMap();
writer.startEntry();
writer.value().fixedSizeBinary().write(holder2);
writer.endEntry();
writer.endMap();
holder2.buffer.close();

writer.setValueCount(6);

// assert the output vector is correct
FieldReader reader = mapVector.getReader();
assertTrue(reader.isSet(), "shouldn't be null");
reader.setPosition(1);
assertTrue(reader.isSet(), "shouldn't be null");
reader.setPosition(2);
assertFalse(reader.isSet(), "should be null");
reader.setPosition(3);
assertTrue(reader.isSet(), "shouldn't be null");
reader.setPosition(4);
assertTrue(reader.isSet(), "shouldn't be null");
reader.setPosition(5);
assertTrue(reader.isSet(), "shouldn't be null");

/* index 0 */
Object result = mapVector.getObject(0);
ArrayList<?> resultSet = (ArrayList<?>) result;
assertEquals(2, resultSet.size());
Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
assertEquals(1L, getResultKey(resultStruct));
assertEquals(2L, getResultValue(resultStruct));
resultStruct = (Map<?, ?>) resultSet.get(1);
assertEquals(2L, getResultKey(resultStruct));
assertEquals(3L, getResultValue(resultStruct));

/* index 1 */
result = mapVector.getObject(1);
resultSet = (ArrayList<?>) result;
assertEquals(2, resultSet.size());
resultStruct = (Map<?, ?>) resultSet.get(0);
assertEquals(1L, getResultKey(resultStruct));
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
resultStruct = (Map<?, ?>) resultSet.get(1);
assertEquals(2L, getResultKey(resultStruct));
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));

/* index 2 */
result = mapVector.getObject(2);
assertNull(result);

/* index 3 */
result = mapVector.getObject(3);
resultSet = (ArrayList<?>) result;
assertEquals(2, resultSet.size());
resultStruct = (Map<?, ?>) resultSet.get(0);
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
assertEquals(1L, getResultValue(resultStruct));
resultStruct = (Map<?, ?>) resultSet.get(1);
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
assertEquals(2L, getResultValue(resultStruct));

/* index 4 */
result = mapVector.getObject(4);
resultSet = (ArrayList<?>) result;
assertEquals(1, resultSet.size());
resultStruct = (Map<?, ?>) resultSet.get(0);
assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
assertArrayEquals(new byte[] {11, 22}, (byte[]) resultStruct.get(MapVector.KEY_NAME));
assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME));

/* index 5 */
result = mapVector.getObject(5);
resultSet = (ArrayList<?>) result;
assertEquals(1, resultSet.size());
resultStruct = (Map<?, ?>) resultSet.get(0);
assertFalse(resultStruct.containsKey(MapVector.KEY_NAME));
assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
assertArrayEquals(new byte[] {32, 21}, (byte[]) resultStruct.get(MapVector.VALUE_NAME));
}
}

@Test
public void testFixedSizeBinaryWriterInitializationError() {
try (MapVector mapVector = MapVector.empty("map_vector", allocator, false)) {
UnionMapWriter writer = mapVector.getWriter();
writer.allocate();

// populate input vector with the following records
// {[11, 22] -> [32, 21]} - todo: it shouldn't throw. Should it?
FixedSizeBinaryHolder holder1 = getFixedSizeBinaryHolder(new byte[] {11, 22});
FixedSizeBinaryHolder holder2 = getFixedSizeBinaryHolder(new byte[] {32, 21});

writer.setPosition(0); // optional
writer.startMap();
writer.startEntry();
assertThrows(NullPointerException.class, () -> writer.key().fixedSizeBinary().write(holder1));
assertThrows(
NullPointerException.class, () -> writer.value().fixedSizeBinary().write(holder2));
writer.endEntry();
holder1.buffer.close();
holder2.buffer.close();
writer.endMap();

writer.setValueCount(1);

// assert the output vector is correct
FieldReader reader = mapVector.getReader();
assertTrue(reader.isSet(), "shouldn't be null");

/* index 0 */
Object result = mapVector.getObject(0);
ArrayList<?> resultSet = (ArrayList<?>) result;
assertEquals(1, resultSet.size());
Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
assertFalse(resultStruct.containsKey(MapVector.KEY_NAME));
assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME));
}
}
}