Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
@InterfaceAudience.Public
public class FuzzyRowFilter extends FilterBase implements HintingFilter {
private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned();
private List<Pair<byte[], byte[]>> fuzzyKeysData;
private final List<Pair<byte[], byte[]>> fuzzyKeysData;
// Used to record whether we want to skip the current row.
// Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey
// returns true, we will just skip to next row, instead of calling getNextCellHint to determine
Expand All @@ -89,7 +89,7 @@ public class FuzzyRowFilter extends FilterBase implements HintingFilter {
/**
* Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned)
*/
private RowTracker tracker;
private final RowTracker tracker;

public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
Expand Down Expand Up @@ -200,7 +200,7 @@ public boolean filterRow() throws IOException {

@Override
public ReturnCode filterCell(final Cell c) {
final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
final int startIndex = Math.max(lastFoundIndex, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have checked, this compiles to the same inline, so it's not a performance problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many thanks. 👍
Yes, this line change was not because of performance. Actually here just IntelliJ IDEA suggested to replace this to make it simpler to understand. It can be reverted if needed.

final int size = fuzzyKeysData.size();
for (int i = startIndex; i < size + startIndex; i++) {
final int index = i % size;
Expand All @@ -226,7 +226,7 @@ public ReturnCode filterCell(final Cell c) {
@Override
public Cell getNextCellHint(Cell currentCell) {
boolean result = tracker.updateTracker(currentCell);
if (result == false) {
if (!result) {
done = true;
return null;
}
Expand Down Expand Up @@ -574,25 +574,29 @@ public static Order orderFor(boolean reverse) {
}

/**
* @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, null
* otherwise
* Find out the closes next byte array that satisfies fuzzy rule and is after the given one. In
* the reverse case it returns increased byte array to make sure that the proper row is selected
* next.
* @return byte array which is after the given row and which satisfies the fuzzy rule if it
* exists, null otherwise
*/
static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
// To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than
// the given one we do the following:
// To find out the closest next byte array that satisfies fuzzy rule and is after the given one
// we do the following:
// 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes
// 2. if during the first step given row did not increase, then we increase the value at
// the first "non-fixed" position (where it is not maximum already)

// It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position
// values than otherwise.
byte[] result =
Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
if (reverse && length > fuzzyKeyBytes.length) {
// we need trailing 0xff's instead of trailing 0x00's
for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
result[i] = (byte) 0xFF;
byte[] result = Arrays.copyOf(fuzzyKeyBytes, Math.max(length, fuzzyKeyBytes.length));
if (reverse) {
// we need 0xff's instead of 0x00's
for (int i = 0; i < result.length; i++) {
if (result[i] == 0) {
result[i] = (byte) 0xFF;
}
}
}
int toInc = -1;
Expand Down Expand Up @@ -638,7 +642,14 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l
}
}

return reverse ? result : trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
if (reverse) {
// In the reverse case we increase last non-max byte to make sure that the proper row is
// selected next.
return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed);
} else {
return trailingZerosTrimmed;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.hadoop.hbase.filter;

import java.util.ArrayList;
import java.util.Arrays;
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.PrivateCellUtil;
Expand Down Expand Up @@ -57,7 +56,7 @@ private void createCellHints() {
return;
}
// On reversed scan hint should be the prefix with last byte incremented
byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix);
byte[] reversedHintBytes = PrivateCellUtil.increaseLastNonMaxByte(this.prefix);
this.reversedNextCellHint =
PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
// On forward scan hint should be the prefix
Expand Down Expand Up @@ -132,18 +131,6 @@ public Cell getNextCellHint(Cell cell) {
}
}

private byte[] increaseLastNonMaxByte(byte[] bytes) {
byte[] result = Arrays.copyOf(bytes, bytes.length);
for (int i = bytes.length - 1; i >= 0; i--) {
byte b = bytes[i];
if (b < Byte.MAX_VALUE) {
result[i] = (byte) (b + 1);
break;
}
}
return result;
}

public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s",
filterArguments.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
Expand Down Expand Up @@ -3095,4 +3096,16 @@ public static long getSequenceId(Cell c) {
return HConstants.NO_SEQNUM;
}
}

public static byte[] increaseLastNonMaxByte(byte[] bytes) {
byte[] result = Arrays.copyOf(bytes, bytes.length);
for (int i = bytes.length - 1; i >= 0; i--) {
byte b = bytes[i];
if (b < Byte.MAX_VALUE) {
result[i] = (byte) (b + 1);
break;
}
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,20 @@ public void testGetNextForFuzzyRuleForward() {
new byte[] { 1, 0, 2, 0, 1 }, // current
new byte[] { 1, 1, 0, 2 }); // expected next

assertNext(false, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 },
new byte[] { 1, (byte) 128, 2, 0, 1 }, new byte[] { 1, (byte) 129, 1 });
assertNext(false, new byte[] { 1, 0, 1 }, // fuzzy row
new byte[] { -1, 0, -1 }, // mask
new byte[] { 1, (byte) 128, 2, 0, 1 }, // current
new byte[] { 1, (byte) 129, 1 }); // expected next

assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
new byte[] { 5, 1, 0, 1 }, new byte[] { 5, 1, 1, 1 });
assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, 0, 1 }, // current
new byte[] { 5, 1, 1, 1 }); // expected next

assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
new byte[] { 5, 1, 0, 1, 1 }, new byte[] { 5, 1, 0, 1, 2 });
assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, 0, 1, 1 }, // current
new byte[] { 5, 1, 0, 1, 2 }); // expected next

assertNext(false, new byte[] { 0, 1, 0, 0 }, // fuzzy row
new byte[] { 0, -1, 0, 0 }, // mask
Expand All @@ -188,23 +194,35 @@ public void testGetNextForFuzzyRuleForward() {
new byte[] { 5, 1, (byte) 255, 0 }, // current
new byte[] { 5, 1, (byte) 255, 1 }); // expected next

assertNext(false, new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 },
new byte[] { 5, 1, (byte) 255, 1 }, new byte[] { 5, 1, (byte) 255, 2 });
assertNext(false, new byte[] { 5, 1, 1, 0 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 5, 1, (byte) 255, 1 }, // current
new byte[] { 5, 1, (byte) 255, 2 }); // expected next

assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 },
new byte[] { 1, 1, 2, 2 }, new byte[] { 1, 1, 2, 3 });
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 1, 1, 2, 2 }, // current
new byte[] { 1, 1, 2, 3 }); // expected next

assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 },
new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 });
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 1, 1, 3, 2 }, // current
new byte[] { 1, 1, 3, 3 }); // expected next

assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 },
new byte[] { 1, 1, 2, 3 }, new byte[] { 1, 1, 2, 4 });
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { 0, 0, 0, 0 }, // mask
new byte[] { 1, 1, 2, 3 }, // current
new byte[] { 1, 1, 2, 4 }); // expected next

assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 },
new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 });
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { 0, 0, 0, 0 }, // mask
new byte[] { 1, 1, 3, 2 }, // current
new byte[] { 1, 1, 3, 3 }); // expected next

assertNext(false, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 },
new byte[] { 0, 1, 3, 2 }, new byte[] { 1, 1 });
assertNext(false, new byte[] { 1, 1, 0, 0 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 0, 1, 3, 2 }, // current
new byte[] { 1, 1 }); // expected next

// No next for this one
Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(new byte[] { 2, 3, 1, 1, 1 }, // row to
Expand All @@ -221,94 +239,100 @@ public void testGetNextForFuzzyRuleForward() {

@Test
public void testGetNextForFuzzyRuleReverse() {
// In these reverse cases for the next row key the last non-max byte should be increased
// to make sure that the proper row is selected next by the scanner.
// For example:
// fuzzy row: 0,1,2
// mask: 0,-1,-1
// current: 1,2,1,0,1
// next would be: 1,1,2
// this has to be increased to 1,1,3 to make sure that the proper row is selected next.
assertNext(true, new byte[] { 0, 1, 2 }, // fuzzy row
new byte[] { 0, -1, -1 }, // mask
new byte[] { 1, 2, 1, 0, 1 }, // current
// TODO: should be {1, 1, 3} ?
new byte[] { 1, 1, 2, (byte) 0xFF, (byte) 0xFF }); // expected next
new byte[] { 1, 1, 3 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 2, 0 }, // fuzzy row
new byte[] { 0, -1, 0, -1, 0 }, // mask
new byte[] { 1, 2, 1, 3, 1 }, // current
// TODO: should be {1, 1, 1, 3} ?
new byte[] { 1, 1, 0, 2, 0 }); // expected next
new byte[] { 1, 1, (byte) 255, 3 }); // expected next

assertNext(true, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 },
new byte[] { 1, (byte) 128, 2, 0, 1 },
// TODO: should be {1, (byte) 128, 2} ?
new byte[] { 1, (byte) 128, 1, (byte) 0xFF, (byte) 0xFF });
assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
new byte[] { -1, 0, -1 }, // mask
new byte[] { 1, (byte) 128, 2, 0, 1 }, // current
new byte[] { 1, (byte) 128, 2 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
new byte[] { 5, 1, 0, 2, 1 },
// TODO: should be {5, 1, 0, 2} ?
new byte[] { 5, 1, 0, 1, (byte) 0xFF });
assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, 0, 2, 1 }, // current
new byte[] { 5, 1, 0, 2 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 0 }, // fuzzy row
new byte[] { 0, -1, 0, 0 }, // mask
new byte[] { 5, 1, (byte) 255, 1 }, // current
new byte[] { 5, 1, (byte) 255, 0 }); // expected next
new byte[] { 5, 1, (byte) 255, 1 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, 0, 1 }, // current
new byte[] { 4, 1, (byte) 255, 1 }); // expected next
new byte[] { 4, 1, (byte) 255, 2 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, (byte) 255, 0 }, // current
new byte[] { 5, 1, (byte) 254, 1 }); // expected next
new byte[] { 5, 1, (byte) 254, 2 }); // expected next

assertNext(true, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 },
new byte[] { 2, 1, 3, 2 },
// TODO: should be {1, 0} ?
new byte[] { 1, 1, 0, 0 });
assertNext(true, new byte[] { 1, 1, 0, 0 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 2, 1, 3, 2 }, // current
new byte[] { 1, 2 }); // expected next

assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
new byte[] { -1, 0, -1 }, // mask
new byte[] { 2, 3, 1, 1, 1 }, // row to check
// TODO: should be {1, (byte) 0xFF, 2} ?
new byte[] { 1, 0, 1, (byte) 0xFF, (byte) 0xFF });

assertNext(true, new byte[] { 1, 1, 0, 3 }, new byte[] { -1, -1, 0, -1 },
new byte[] { 1, (byte) 245, 1, 3, 0 },
// TODO: should be {1, 1, (byte) 255, 4} ?
new byte[] { 1, 1, 0, 3, (byte) 0xFF });

assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 },
new byte[] { 1, 3, 1, 3, 0 },
// TODO: should be 1, 2, (byte) 255, 4 ?
new byte[] { 1, 2, 0, 3, (byte) 0xFF });

assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 },
new byte[] { 2, 1, 1, 1, 0 },
// TODO: should be {1, 2, (byte) 255, 4} ?
new byte[] { 1, 2, 0, 3, (byte) 0xFF });

assertNext(true,
// TODO: should be null?
new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 }, new byte[] { 1, (byte) 128, 2 },
new byte[] { 1, (byte) 128, 1 });

assertNext(true,
// TODO: should be null?
new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, new byte[] { 5, 1, 0, 2 },
new byte[] { 5, 1, 0, 1 });

assertNext(true,
// TODO: should be null?
new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 5, 1, (byte) 0xFF, 1 },
new byte[] { 5, 1, (byte) 0xFF, 0 });

assertNext(true,
// TODO: should be null?
new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 1, 1, 2, 2 },
new byte[] { 1, 1, 2, 1 });

assertNext(true,
// TODO: should be null?
new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 }, new byte[] { 1, 1, 2, 3 },
new byte[] { 1, 1, 2, 2 });
new byte[] { 1, (byte) 255, 2 }); // expected next

assertNext(true, new byte[] { 1, 1, 0, 3 }, // fuzzy row
new byte[] { -1, -1, 0, -1 }, // mask
new byte[] { 1, (byte) 245, 1, 3, 0 }, // row to check
new byte[] { 1, 1, (byte) 255, 4 }); // expected next

assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row
new byte[] { -1, -1, 0, -1 }, // mask
new byte[] { 1, 3, 1, 3, 0 }, // row to check
new byte[] { 1, 2, (byte) 255, 4 }); // expected next

assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row
new byte[] { -1, -1, 0, -1 }, // mask
new byte[] { 2, 1, 1, 1, 0 }, // row to check
new byte[] { 1, 2, (byte) 255, 4 }); // expected next

assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
new byte[] { -1, 0, -1 }, // mask
new byte[] { 1, (byte) 128, 2 }, // row to check
new byte[] { 1, (byte) 128, 2 }); // expected next

assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
new byte[] { 0, -1, 0, -1 }, // mask
new byte[] { 5, 1, 0, 2 }, // row to check
new byte[] { 5, 1, 0, 2 }); // expected next

assertNext(true, new byte[] { 5, 1, 1, 0 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 5, 1, (byte) 0xFF, 1 }, // row to check
new byte[] { 5, 1, (byte) 0xFF, 1 }); // expected next

assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { -1, -1, 0, 0 }, // mask
new byte[] { 1, 1, 2, 2 }, // row to check
new byte[] { 1, 1, 2, 2 }); // expected next

assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row
new byte[] { 0, 0, 0, 0 }, // mask
new byte[] { 1, 1, 2, 3 }, // row to check
new byte[] { 1, 1, 2, 3 }); // expected next

// no before cell than current which satisfies the fuzzy row -> null
Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(true, new byte[] { 1, 1, 1, 3, 0 },
new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 }));
}
Expand Down
Loading