Skip to content

Commit abe3e67

Browse files
PDavidstoty
authored andcommitted
HBASE-28634 Fix FuzzyRowFilter may not return data on reverse scans (#6457)
Signed-off-by: Istvan Toth <[email protected]> (cherry picked from commit a735eff)
1 parent 581d8c6 commit abe3e67

File tree

5 files changed

+207
-108
lines changed

5 files changed

+207
-108
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
@InterfaceAudience.Public
6868
public class FuzzyRowFilter extends FilterBase implements HintingFilter {
6969
private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned();
70-
private List<Pair<byte[], byte[]>> fuzzyKeysData;
70+
private final List<Pair<byte[], byte[]>> fuzzyKeysData;
7171
// Used to record whether we want to skip the current row.
7272
// Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey
7373
// returns true, we will just skip to next row, instead of calling getNextCellHint to determine
@@ -89,7 +89,7 @@ public class FuzzyRowFilter extends FilterBase implements HintingFilter {
8989
/**
9090
* Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned)
9191
*/
92-
private RowTracker tracker;
92+
private final RowTracker tracker;
9393

9494
public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
9595
List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
@@ -200,7 +200,7 @@ public boolean filterRow() throws IOException {
200200

201201
@Override
202202
public ReturnCode filterCell(final Cell c) {
203-
final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
203+
final int startIndex = Math.max(lastFoundIndex, 0);
204204
final int size = fuzzyKeysData.size();
205205
for (int i = startIndex; i < size + startIndex; i++) {
206206
final int index = i % size;
@@ -226,7 +226,7 @@ public ReturnCode filterCell(final Cell c) {
226226
@Override
227227
public Cell getNextCellHint(Cell currentCell) {
228228
boolean result = tracker.updateTracker(currentCell);
229-
if (result == false) {
229+
if (!result) {
230230
done = true;
231231
return null;
232232
}
@@ -574,25 +574,29 @@ public static Order orderFor(boolean reverse) {
574574
}
575575

576576
/**
577-
* @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, null
578-
* otherwise
577+
* Find out the closes next byte array that satisfies fuzzy rule and is after the given one. In
578+
* the reverse case it returns increased byte array to make sure that the proper row is selected
579+
* next.
580+
* @return byte array which is after the given row and which satisfies the fuzzy rule if it
581+
* exists, null otherwise
579582
*/
580583
static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
581584
byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
582-
// To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than
583-
// the given one we do the following:
585+
// To find out the closest next byte array that satisfies fuzzy rule and is after the given one
586+
// we do the following:
584587
// 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes
585588
// 2. if during the first step given row did not increase, then we increase the value at
586589
// the first "non-fixed" position (where it is not maximum already)
587590

588591
// It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position
589592
// values than otherwise.
590-
byte[] result =
591-
Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
592-
if (reverse && length > fuzzyKeyBytes.length) {
593-
// we need trailing 0xff's instead of trailing 0x00's
594-
for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
595-
result[i] = (byte) 0xFF;
593+
byte[] result = Arrays.copyOf(fuzzyKeyBytes, Math.max(length, fuzzyKeyBytes.length));
594+
if (reverse) {
595+
// we need 0xff's instead of 0x00's
596+
for (int i = 0; i < result.length; i++) {
597+
if (result[i] == 0) {
598+
result[i] = (byte) 0xFF;
599+
}
596600
}
597601
}
598602
int toInc = -1;
@@ -638,7 +642,14 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l
638642
}
639643
}
640644

641-
return reverse ? result : trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
645+
byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
646+
if (reverse) {
647+
// In the reverse case we increase last non-max byte to make sure that the proper row is
648+
// selected next.
649+
return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed);
650+
} else {
651+
return trailingZerosTrimmed;
652+
}
642653
}
643654

644655
/**

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
package org.apache.hadoop.hbase.filter;
1919

2020
import java.util.ArrayList;
21-
import java.util.Arrays;
2221
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
2322
import org.apache.hadoop.hbase.Cell;
2423
import org.apache.hadoop.hbase.PrivateCellUtil;
@@ -57,7 +56,7 @@ private void createCellHints() {
5756
return;
5857
}
5958
// On reversed scan hint should be the prefix with last byte incremented
60-
byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix);
59+
byte[] reversedHintBytes = PrivateCellUtil.increaseLastNonMaxByte(this.prefix);
6160
this.reversedNextCellHint =
6261
PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
6362
// On forward scan hint should be the prefix
@@ -132,18 +131,6 @@ public Cell getNextCellHint(Cell cell) {
132131
}
133132
}
134133

135-
private byte[] increaseLastNonMaxByte(byte[] bytes) {
136-
byte[] result = Arrays.copyOf(bytes, bytes.length);
137-
for (int i = bytes.length - 1; i >= 0; i--) {
138-
byte b = bytes[i];
139-
if (b < Byte.MAX_VALUE) {
140-
result[i] = (byte) (b + 1);
141-
break;
142-
}
143-
}
144-
return result;
145-
}
146-
147134
public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
148135
Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s",
149136
filterArguments.size());

hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.math.BigDecimal;
2828
import java.nio.ByteBuffer;
2929
import java.util.ArrayList;
30+
import java.util.Arrays;
3031
import java.util.Iterator;
3132
import java.util.List;
3233
import java.util.Map.Entry;
@@ -3095,4 +3096,16 @@ public static long getSequenceId(Cell c) {
30953096
return HConstants.NO_SEQNUM;
30963097
}
30973098
}
3099+
3100+
public static byte[] increaseLastNonMaxByte(byte[] bytes) {
3101+
byte[] result = Arrays.copyOf(bytes, bytes.length);
3102+
for (int i = bytes.length - 1; i >= 0; i--) {
3103+
byte b = bytes[i];
3104+
if (b < Byte.MAX_VALUE) {
3105+
result[i] = (byte) (b + 1);
3106+
break;
3107+
}
3108+
}
3109+
return result;
3110+
}
30983111
}

hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java

Lines changed: 103 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,20 @@ public void testGetNextForFuzzyRuleForward() {
164164
new byte[] { 1, 0, 2, 0, 1 }, // current
165165
new byte[] { 1, 1, 0, 2 }); // expected next
166166

167-
assertNext(false, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 },
168-
new byte[] { 1, (byte) 128, 2, 0, 1 }, new byte[] { 1, (byte) 129, 1 });
167+
assertNext(false, new byte[] { 1, 0, 1 }, // fuzzy row
168+
new byte[] { -1, 0, -1 }, // mask
169+
new byte[] { 1, (byte) 128, 2, 0, 1 }, // current
170+
new byte[] { 1, (byte) 129, 1 }); // expected next
169171

170-
assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
171-
new byte[] { 5, 1, 0, 1 }, new byte[] { 5, 1, 1, 1 });
172+
assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row
173+
new byte[] { 0, -1, 0, -1 }, // mask
174+
new byte[] { 5, 1, 0, 1 }, // current
175+
new byte[] { 5, 1, 1, 1 }); // expected next
172176

173-
assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
174-
new byte[] { 5, 1, 0, 1, 1 }, new byte[] { 5, 1, 0, 1, 2 });
177+
assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row
178+
new byte[] { 0, -1, 0, -1 }, // mask
179+
new byte[] { 5, 1, 0, 1, 1 }, // current
180+
new byte[] { 5, 1, 0, 1, 2 }); // expected next
175181

176182
assertNext(false, new byte[] { 0, 1, 0, 0 }, // fuzzy row
177183
new byte[] { 0, -1, 0, 0 }, // mask
@@ -188,23 +194,35 @@ public void testGetNextForFuzzyRuleForward() {
188194
new byte[] { 5, 1, (byte) 255, 0 }, // current
189195
new byte[] { 5, 1, (byte) 255, 1 }); // expected next
190196

191-
assertNext(false, new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 },
192-
new byte[] { 5, 1, (byte) 255, 1 }, new byte[] { 5, 1, (byte) 255, 2 });
197+
assertNext(false, new byte[] { 5, 1, 1, 0 }, // fuzzy row
198+
new byte[] { -1, -1, 0, 0 }, // mask
199+
new byte[] { 5, 1, (byte) 255, 1 }, // current
200+
new byte[] { 5, 1, (byte) 255, 2 }); // expected next
193201

194-
assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 },
195-
new byte[] { 1, 1, 2, 2 }, new byte[] { 1, 1, 2, 3 });
202+
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
203+
new byte[] { -1, -1, 0, 0 }, // mask
204+
new byte[] { 1, 1, 2, 2 }, // current
205+
new byte[] { 1, 1, 2, 3 }); // expected next
196206

197-
assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 },
198-
new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 });
207+
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
208+
new byte[] { -1, -1, 0, 0 }, // mask
209+
new byte[] { 1, 1, 3, 2 }, // current
210+
new byte[] { 1, 1, 3, 3 }); // expected next
199211

200-
assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 },
201-
new byte[] { 1, 1, 2, 3 }, new byte[] { 1, 1, 2, 4 });
212+
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
213+
new byte[] { 0, 0, 0, 0 }, // mask
214+
new byte[] { 1, 1, 2, 3 }, // current
215+
new byte[] { 1, 1, 2, 4 }); // expected next
202216

203-
assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 },
204-
new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 });
217+
assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row
218+
new byte[] { 0, 0, 0, 0 }, // mask
219+
new byte[] { 1, 1, 3, 2 }, // current
220+
new byte[] { 1, 1, 3, 3 }); // expected next
205221

206-
assertNext(false, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 },
207-
new byte[] { 0, 1, 3, 2 }, new byte[] { 1, 1 });
222+
assertNext(false, new byte[] { 1, 1, 0, 0 }, // fuzzy row
223+
new byte[] { -1, -1, 0, 0 }, // mask
224+
new byte[] { 0, 1, 3, 2 }, // current
225+
new byte[] { 1, 1 }); // expected next
208226

209227
// No next for this one
210228
Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(new byte[] { 2, 3, 1, 1, 1 }, // row to
@@ -221,94 +239,100 @@ public void testGetNextForFuzzyRuleForward() {
221239

222240
@Test
223241
public void testGetNextForFuzzyRuleReverse() {
242+
// In these reverse cases for the next row key the last non-max byte should be increased
243+
// to make sure that the proper row is selected next by the scanner.
244+
// For example:
245+
// fuzzy row: 0,1,2
246+
// mask: 0,-1,-1
247+
// current: 1,2,1,0,1
248+
// next would be: 1,1,2
249+
// this has to be increased to 1,1,3 to make sure that the proper row is selected next.
224250
assertNext(true, new byte[] { 0, 1, 2 }, // fuzzy row
225251
new byte[] { 0, -1, -1 }, // mask
226252
new byte[] { 1, 2, 1, 0, 1 }, // current
227-
// TODO: should be {1, 1, 3} ?
228-
new byte[] { 1, 1, 2, (byte) 0xFF, (byte) 0xFF }); // expected next
253+
new byte[] { 1, 1, 3 }); // expected next
229254

230255
assertNext(true, new byte[] { 0, 1, 0, 2, 0 }, // fuzzy row
231256
new byte[] { 0, -1, 0, -1, 0 }, // mask
232257
new byte[] { 1, 2, 1, 3, 1 }, // current
233-
// TODO: should be {1, 1, 1, 3} ?
234-
new byte[] { 1, 1, 0, 2, 0 }); // expected next
258+
new byte[] { 1, 1, (byte) 255, 3 }); // expected next
235259

236-
assertNext(true, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 },
237-
new byte[] { 1, (byte) 128, 2, 0, 1 },
238-
// TODO: should be {1, (byte) 128, 2} ?
239-
new byte[] { 1, (byte) 128, 1, (byte) 0xFF, (byte) 0xFF });
260+
assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
261+
new byte[] { -1, 0, -1 }, // mask
262+
new byte[] { 1, (byte) 128, 2, 0, 1 }, // current
263+
new byte[] { 1, (byte) 128, 2 }); // expected next
240264

241-
assertNext(true, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 },
242-
new byte[] { 5, 1, 0, 2, 1 },
243-
// TODO: should be {5, 1, 0, 2} ?
244-
new byte[] { 5, 1, 0, 1, (byte) 0xFF });
265+
assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
266+
new byte[] { 0, -1, 0, -1 }, // mask
267+
new byte[] { 5, 1, 0, 2, 1 }, // current
268+
new byte[] { 5, 1, 0, 2 }); // expected next
245269

246270
assertNext(true, new byte[] { 0, 1, 0, 0 }, // fuzzy row
247271
new byte[] { 0, -1, 0, 0 }, // mask
248272
new byte[] { 5, 1, (byte) 255, 1 }, // current
249-
new byte[] { 5, 1, (byte) 255, 0 }); // expected next
273+
new byte[] { 5, 1, (byte) 255, 1 }); // expected next
250274

251275
assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
252276
new byte[] { 0, -1, 0, -1 }, // mask
253277
new byte[] { 5, 1, 0, 1 }, // current
254-
new byte[] { 4, 1, (byte) 255, 1 }); // expected next
278+
new byte[] { 4, 1, (byte) 255, 2 }); // expected next
255279

256280
assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
257281
new byte[] { 0, -1, 0, -1 }, // mask
258282
new byte[] { 5, 1, (byte) 255, 0 }, // current
259-
new byte[] { 5, 1, (byte) 254, 1 }); // expected next
283+
new byte[] { 5, 1, (byte) 254, 2 }); // expected next
260284

261-
assertNext(true, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 },
262-
new byte[] { 2, 1, 3, 2 },
263-
// TODO: should be {1, 0} ?
264-
new byte[] { 1, 1, 0, 0 });
285+
assertNext(true, new byte[] { 1, 1, 0, 0 }, // fuzzy row
286+
new byte[] { -1, -1, 0, 0 }, // mask
287+
new byte[] { 2, 1, 3, 2 }, // current
288+
new byte[] { 1, 2 }); // expected next
265289

266290
assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
267291
new byte[] { -1, 0, -1 }, // mask
268292
new byte[] { 2, 3, 1, 1, 1 }, // row to check
269-
// TODO: should be {1, (byte) 0xFF, 2} ?
270-
new byte[] { 1, 0, 1, (byte) 0xFF, (byte) 0xFF });
271-
272-
assertNext(true, new byte[] { 1, 1, 0, 3 }, new byte[] { -1, -1, 0, -1 },
273-
new byte[] { 1, (byte) 245, 1, 3, 0 },
274-
// TODO: should be {1, 1, (byte) 255, 4} ?
275-
new byte[] { 1, 1, 0, 3, (byte) 0xFF });
276-
277-
assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 },
278-
new byte[] { 1, 3, 1, 3, 0 },
279-
// TODO: should be 1, 2, (byte) 255, 4 ?
280-
new byte[] { 1, 2, 0, 3, (byte) 0xFF });
281-
282-
assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 },
283-
new byte[] { 2, 1, 1, 1, 0 },
284-
// TODO: should be {1, 2, (byte) 255, 4} ?
285-
new byte[] { 1, 2, 0, 3, (byte) 0xFF });
286-
287-
assertNext(true,
288-
// TODO: should be null?
289-
new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 }, new byte[] { 1, (byte) 128, 2 },
290-
new byte[] { 1, (byte) 128, 1 });
291-
292-
assertNext(true,
293-
// TODO: should be null?
294-
new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, new byte[] { 5, 1, 0, 2 },
295-
new byte[] { 5, 1, 0, 1 });
296-
297-
assertNext(true,
298-
// TODO: should be null?
299-
new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 5, 1, (byte) 0xFF, 1 },
300-
new byte[] { 5, 1, (byte) 0xFF, 0 });
301-
302-
assertNext(true,
303-
// TODO: should be null?
304-
new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 1, 1, 2, 2 },
305-
new byte[] { 1, 1, 2, 1 });
306-
307-
assertNext(true,
308-
// TODO: should be null?
309-
new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 }, new byte[] { 1, 1, 2, 3 },
310-
new byte[] { 1, 1, 2, 2 });
293+
new byte[] { 1, (byte) 255, 2 }); // expected next
294+
295+
assertNext(true, new byte[] { 1, 1, 0, 3 }, // fuzzy row
296+
new byte[] { -1, -1, 0, -1 }, // mask
297+
new byte[] { 1, (byte) 245, 1, 3, 0 }, // row to check
298+
new byte[] { 1, 1, (byte) 255, 4 }); // expected next
299+
300+
assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row
301+
new byte[] { -1, -1, 0, -1 }, // mask
302+
new byte[] { 1, 3, 1, 3, 0 }, // row to check
303+
new byte[] { 1, 2, (byte) 255, 4 }); // expected next
304+
305+
assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row
306+
new byte[] { -1, -1, 0, -1 }, // mask
307+
new byte[] { 2, 1, 1, 1, 0 }, // row to check
308+
new byte[] { 1, 2, (byte) 255, 4 }); // expected next
309+
310+
assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row
311+
new byte[] { -1, 0, -1 }, // mask
312+
new byte[] { 1, (byte) 128, 2 }, // row to check
313+
new byte[] { 1, (byte) 128, 2 }); // expected next
314+
315+
assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row
316+
new byte[] { 0, -1, 0, -1 }, // mask
317+
new byte[] { 5, 1, 0, 2 }, // row to check
318+
new byte[] { 5, 1, 0, 2 }); // expected next
319+
320+
assertNext(true, new byte[] { 5, 1, 1, 0 }, // fuzzy row
321+
new byte[] { -1, -1, 0, 0 }, // mask
322+
new byte[] { 5, 1, (byte) 0xFF, 1 }, // row to check
323+
new byte[] { 5, 1, (byte) 0xFF, 1 }); // expected next
324+
325+
assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row
326+
new byte[] { -1, -1, 0, 0 }, // mask
327+
new byte[] { 1, 1, 2, 2 }, // row to check
328+
new byte[] { 1, 1, 2, 2 }); // expected next
329+
330+
assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row
331+
new byte[] { 0, 0, 0, 0 }, // mask
332+
new byte[] { 1, 1, 2, 3 }, // row to check
333+
new byte[] { 1, 1, 2, 3 }); // expected next
311334

335+
// no before cell than current which satisfies the fuzzy row -> null
312336
Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(true, new byte[] { 1, 1, 1, 3, 0 },
313337
new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 }));
314338
}

0 commit comments

Comments
 (0)