diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 016d503f5eab..a3674dd8cb9b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.regionserver.querymatcher.CompactionScanQueryMatcher; import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher; import org.apache.hadoop.hbase.regionserver.querymatcher.UserScanQueryMatcher; +import org.apache.hadoop.hbase.security.visibility.VisibilityUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -163,6 +164,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner protected final long readPt; private boolean topChanged = false; + private final boolean visibilityLabelEnabled; /** An internal constructor. */ private StoreScanner(HStore store, Scan scan, ScanInfo scanInfo, int numColumns, long readPt, @@ -177,6 +179,7 @@ private StoreScanner(HStore store, Scan scan, ScanInfo scanInfo, int numColumns, this.now = EnvironmentEdgeManager.currentTime(); this.oldestUnexpiredTS = scan.isRaw() ? 0L : now - scanInfo.getTtl(); this.minVersions = scanInfo.getMinVersions(); + visibilityLabelEnabled = store != null && VisibilityUtils.isVisibilityLabelEnabled(store.conf); // We look up row-column Bloom filters for multi-column queries as part of // the seek operation. However, we also look the row-column Bloom filter @@ -246,7 +249,7 @@ public StoreScanner(HStore store, ScanInfo scanInfo, Scan scan, NavigableSet outResult, ScannerContext scanner scannerContext.incrementBlockProgress(blockSize); }); - prevCell = cell; scannerContext.setLastPeekedCell(cell); topChanged = false; - ScanQueryMatcher.MatchCode qcode = matcher.match(cell); + ScanQueryMatcher.MatchCode qcode = matcher.match(cell, prevCell); + LOG.trace("next - cell={}, prevCell={}, qCode={}", cell, prevCell, qcode); + prevCell = cell; switch (qcode) { case INCLUDE: case INCLUDE_AND_SEEK_NEXT_ROW: diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java index 9ad3c792345e..66f408a2b8cf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver.querymatcher; import java.io.IOException; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -40,12 +41,19 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher { /** whether time range queries can see rows "behind" a delete */ protected final boolean seePastDeleteMarkers; + private final int scanMaxVersions; + + private final boolean visibilityLabelEnabled; + protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns, - boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) { + boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now, + boolean visibilityLabelEnabled) { super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now); this.deletes = deletes; this.get = scan.isGetScan(); this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE; + this.scanMaxVersions = Math.max(scan.getMaxVersions(), scanInfo.getMaxVersions()); + this.visibilityLabelEnabled = visibilityLabelEnabled; } @Override @@ -56,11 +64,16 @@ public void beforeShipped() throws IOException { @Override public MatchCode match(ExtendedCell cell) throws IOException { + return match(cell, null); + } + + @Override + public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException { if (filter != null && filter.filterAllRemaining()) { return MatchCode.DONE_SCAN; } - MatchCode returnCode = preCheck(cell); - if (returnCode != null) { + MatchCode returnCode; + if ((returnCode = preCheck(cell)) != null) { return returnCode; } long timestamp = cell.getTimestamp(); @@ -71,15 +84,42 @@ public MatchCode match(ExtendedCell cell) throws IOException { if (includeDeleteMarker) { this.deletes.add(cell); } - return MatchCode.SKIP; + // In some cases, optimization can not be done + if (!canOptimizeReadDeleteMarkers()) { + return MatchCode.SKIP; + } } - returnCode = checkDeleted(deletes, cell); - if (returnCode != null) { + // optimization when prevCell is Delete or DeleteFamilyVersion + if ((returnCode = checkDeletedEffectively(cell, prevCell)) != null) { + return returnCode; + } + if ((returnCode = checkDeleted(deletes, cell)) != null) { return returnCode; } return matchColumn(cell, timestamp, typeByte); } + // If prevCell is a delete marker and cell is a delete marked Put or delete marker, + // it means the cell is deleted effectively. + // And we can do SEEK_NEXT_COL. + private MatchCode checkDeletedEffectively(ExtendedCell cell, ExtendedCell prevCell) { + if ( + prevCell != null && canOptimizeReadDeleteMarkers() + && CellUtil.matchingRowColumn(prevCell, cell) && CellUtil.matchingTimestamp(prevCell, cell) + && (PrivateCellUtil.isDeleteType(prevCell) + || PrivateCellUtil.isDeleteFamilyVersion(prevCell)) + ) { + return MatchCode.SEEK_NEXT_COL; + } + return null; + } + + private boolean canOptimizeReadDeleteMarkers() { + // for simplicity, optimization works only for these cases + return !seePastDeleteMarkers && scanMaxVersions == 1 && !visibilityLabelEnabled + && getFilter() == null && !(deletes instanceof NewVersionBehaviorTracker); + } + @Override protected void reset() { deletes.reset(); @@ -92,11 +132,11 @@ protected boolean isGet() { public static NormalUserScanQueryMatcher create(Scan scan, ScanInfo scanInfo, ColumnTracker columns, DeleteTracker deletes, boolean hasNullColumn, long oldestUnexpiredTS, - long now) throws IOException { + long now, boolean visibilityLabelEnabled) throws IOException { if (scan.isReversed()) { if (scan.includeStopRow()) { return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes, - oldestUnexpiredTS, now) { + oldestUnexpiredTS, now, visibilityLabelEnabled) { @Override protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { @@ -105,7 +145,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { }; } else { return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes, - oldestUnexpiredTS, now) { + oldestUnexpiredTS, now, visibilityLabelEnabled) { @Override protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { @@ -116,7 +156,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { } else { if (scan.includeStopRow()) { return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes, - oldestUnexpiredTS, now) { + oldestUnexpiredTS, now, visibilityLabelEnabled) { @Override protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { @@ -125,7 +165,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { }; } else { return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes, - oldestUnexpiredTS, now) { + oldestUnexpiredTS, now, visibilityLabelEnabled) { @Override protected boolean moreRowsMayExistsAfter(int cmpToStopRow) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java index dc3259f03d3a..044f7e62854f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java @@ -238,6 +238,25 @@ protected final MatchCode checkDeleted(DeleteTracker deletes, ExtendedCell cell) */ public abstract MatchCode match(ExtendedCell cell) throws IOException; + /** + * Determines if the caller should do one of several things: + * + * @param cell KeyValue to check + * @param prevCell KeyValue checked previously + * @return The match code instance. + * @throws IOException in case there is an internal consistency problem caused by a data + * corruption. + */ + public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException { + return match(cell); + } + /** Returns the start key */ public ExtendedCell getStartKey() { return startKey; @@ -284,7 +303,8 @@ public ExtendedCell getKeyForNextColumn(ExtendedCell cell) { // see HBASE-18471 for more details // see TestFromClientSide3#testScanAfterDeletingSpecifiedRow // see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2 - if (cell.getQualifierLength() == 0) { + // But we can seek to next column if the cell is a type of DeleteFamily. + if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) { ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell); if (nextKey != cell) { return nextKey; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java index c07b91b77e68..6eda323bfdb6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java @@ -287,7 +287,8 @@ public boolean moreRowsMayExistAfter(ExtendedCell cell) { public static UserScanQueryMatcher create(Scan scan, ScanInfo scanInfo, NavigableSet columns, long oldestUnexpiredTS, long now, - RegionCoprocessorHost regionCoprocessorHost) throws IOException { + RegionCoprocessorHost regionCoprocessorHost, boolean visibilityLabelEnabled) + throws IOException { boolean hasNullColumn = !(columns != null && columns.size() != 0 && columns.first().length != 0); Pair trackers = @@ -299,7 +300,7 @@ public static UserScanQueryMatcher create(Scan scan, ScanInfo scanInfo, oldestUnexpiredTS, now); } else { return NormalUserScanQueryMatcher.create(scan, scanInfo, columnTracker, deleteTracker, - hasNullColumn, oldestUnexpiredTS, now); + hasNullColumn, oldestUnexpiredTS, now, visibilityLabelEnabled); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java index e1975bc3b162..43927e41b513 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.TagType; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.io.util.StreamUtils; @@ -329,6 +330,15 @@ public static List createVisibilityExpTags(String visExpression, return tags; } + public static boolean isVisibilityLabelEnabled(Configuration conf) { + return conf.getInt("hfile.format.version", 0) == 3 + && conf.getBoolean(User.HBASE_SECURITY_AUTHORIZATION_CONF_KEY, false) + && (conf.get(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, "") + .contains(VisibilityController.class.getName()) + || conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "") + .contains(VisibilityController.class.getName())); + } + private static void getLabelOrdinals(ExpressionNode node, List labelOrdinals, Set auths, boolean checkAuths, VisibilityLabelOrdinalProvider ordinalProvider) throws IOException, InvalidLabelException { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java new file mode 100644 index 000000000000..ae9b3bc1201e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.KeyValueTestUtil.create; +import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestStoreScannerDeleteMarkerOptimization { + private static final String CF = "cf"; + private static final byte[] CF_BYTES = Bytes.toBytes(CF); + private static final Configuration CONF = HBaseConfiguration.create(); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestStoreScannerDeleteMarkerOptimization.class); + + private List kvs; + private Pair, Long> result; + + private Pair, Long> scanAll() throws IOException { + return scanAll(KeepDeletedCells.FALSE, 1); + } + + private Pair, Long> scanAll(KeepDeletedCells keepDeletedCells, int maxVersions) + throws IOException { + ScanInfo scanInfo = new ScanInfo(CONF, CF_BYTES, 0, maxVersions, Long.MAX_VALUE, + keepDeletedCells, HConstants.DEFAULT_BLOCKSIZE, 0, CellComparator.getInstance(), false); + List scanners = scanFixture(kvs.toArray(new KeyValue[0])); + Scan scanSpec = new Scan().readVersions(maxVersions); + try (StoreScanner scan = new StoreScanner(scanSpec, scanInfo, null, scanners)) { + List results = new ArrayList<>(); + // noinspection StatementWithEmptyBody + while (scan.next(results)) { + } + return new Pair<>(results, scan.getEstimatedNumberOfKvsScanned()); + } + } + + @BeforeClass + public static void setUpBeforeClass() { + org.apache.logging.log4j.Logger log = + org.apache.logging.log4j.LogManager.getLogger(StoreScanner.class); + org.apache.logging.log4j.core.config.Configurator.setLevel(log.getName(), + org.apache.logging.log4j.Level.TRACE); + } + + @Before + public void setUp() { + kvs = new ArrayList<>(); + } + + @Test + public void testDelete() throws IOException { + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + // kvs0, kvs2, kvs4 + assertEquals(3, result.getSecond().longValue()); + } + + @Test + public void testDuplicatDelete() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + // kvs0, kvs1 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testNotDuplicatDelete() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(2), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteEmptyQualifier() throws IOException { + kvs.add(create("r", CF, "", 2, Type.Delete, "")); + kvs.add(create("r", CF, "", 2, Type.Put, "v")); + kvs.add(create("r", CF, "", 1, Type.Delete, "")); + kvs.add(create("r", CF, "", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteFamilyVersion() throws IOException { + // DeleteFamilyVersion cannot be optimized + kvs.add(create("r", CF, "", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "", 2, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "", 1, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDuplicatDeleteFamilyVersion() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + // kvs0, kvs1 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testNotDuplicatDeleteFamilyVersion() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 2, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 1, Type.DeleteFamilyVersion, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteColumn() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 2, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + // kvs0, kvs4 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testDeleteFamily() throws IOException { + kvs.add(create("r", CF, "", 3, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "", 2, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "", 1, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "q", 4, Type.Put, "v")); + kvs.add(create("r", CF, "q", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(3), result.getFirst().get(0)); + // kvs0, kvs3, kvs4, kvs6 + assertEquals(4, result.getSecond().longValue()); + } + + @Test + public void testKeepDeletedCells() throws IOException { + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + // optimization does not work. optimization works only for KeepDeletedCells.FALSE + result = scanAll(KeepDeletedCells.TRUE, 1); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + + // optimization does not work. optimization works only for KeepDeletedCells.FALSE + result = scanAll(KeepDeletedCells.TTL, 1); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testScanMaxVersions() throws IOException { + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + + // optimization works only for maxVersions = 1 + result = scanAll(KeepDeletedCells.FALSE, 1); + assertEquals(0, result.getFirst().size()); + // kvs0, kvs1 + assertEquals(2, result.getSecond().longValue()); + + // optimization does not work + result = scanAll(KeepDeletedCells.FALSE, 2); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java index efbb4c77d475..f9f7f5415687 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java @@ -64,7 +64,7 @@ public void testNeverIncludeFakeCell() throws IOException { UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam2, 10, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); ExtendedCell kv = new KeyValue(row1, fam2, col2, 1, data); ExtendedCell cell = PrivateCellUtil.createLastOnRowCol(kv); qm.setToNewRow(kv); @@ -91,7 +91,7 @@ public void testMatchExplicitColumns() throws IOException { UserScanQueryMatcher qm = UserScanQueryMatcher.create( scan, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); List memstore = new ArrayList<>(6); memstore.add(new KeyValue(row1, fam2, col1, 1, data)); @@ -132,9 +132,11 @@ public void testMatch_Wildcard() throws IOException { expected.add(ScanQueryMatcher.MatchCode.DONE); long now = EnvironmentEdgeManager.currentTime(); - UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam2, 0, 1, - ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null, - now - ttl, now, null); + UserScanQueryMatcher qm = + UserScanQueryMatcher.create(scan, + new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, now - ttl, now, null, false); List memstore = new ArrayList<>(6); memstore.add(new KeyValue(row1, fam2, col1, 1, data)); @@ -179,7 +181,7 @@ public void testMatch_ExpiredExplicit() throws IOException { UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam2, 0, 1, testTTL, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - testTTL, now, null); + get.getFamilyMap().get(fam2), now - testTTL, now, null, false); KeyValue[] kvs = new KeyValue[] { new KeyValue(row1, fam2, col1, now - 100, data), new KeyValue(row1, fam2, col2, now - 50, data), @@ -218,9 +220,10 @@ public void testMatch_ExpiredWildcard() throws IOException { ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.DONE }; long now = EnvironmentEdgeManager.currentTime(); - UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam2, 0, 1, - testTTL, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null, - now - testTTL, now, null); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, + new ScanInfo(this.conf, fam2, 0, 1, testTTL, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + null, now - testTTL, now, null, false); KeyValue[] kvs = new KeyValue[] { new KeyValue(row1, fam2, col1, now - 100, data), new KeyValue(row1, fam2, col2, now - 50, data), @@ -264,7 +267,7 @@ public void testMatchWhenFilterReturnsIncludeAndSeekNextRow() throws IOException UserScanQueryMatcher qm = UserScanQueryMatcher.create( scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); List memstore = new ArrayList<>(); // ColumnTracker will return INCLUDE_AND_SEEK_NEXT_COL , and filter will return @@ -314,7 +317,7 @@ public void testMergeFilterResponseCase1() throws IOException { UserScanQueryMatcher qm = UserScanQueryMatcher.create( scanWithFilter, new ScanInfo(this.conf, fam2, 0, 3, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); List memstore = new ArrayList<>(); memstore.add(new KeyValue(row1, fam1, col5, 1, data)); // match code will be INCLUDE @@ -334,7 +337,7 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 3, ttl, KeepDeletedCells.FALSE, qm = UserScanQueryMatcher.create( scanWithFilter, new ScanInfo(this.conf, fam2, 0, 2, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); List memstore2 = new ArrayList<>(); memstore2.add(new KeyValue(row2, fam1, col2, 1, data)); // match code will be INCLUDE @@ -374,7 +377,7 @@ public void testMergeFilterResponseCase2() throws Exception { UserScanQueryMatcher qm = UserScanQueryMatcher.create( scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE, HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), - get.getFamilyMap().get(fam2), now - ttl, now, null); + get.getFamilyMap().get(fam2), now - ttl, now, null, false); List memstore = new ArrayList<>(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java new file mode 100644 index 000000000000..e7aa5408156a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.querymatcher; + +import static org.apache.hadoop.hbase.security.visibility.VisibilityTestUtil.enableVisiblityLabels; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.filter.FilterList; +import org.apache.hadoop.hbase.regionserver.ScanInfo; +import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; +import org.apache.hadoop.hbase.security.visibility.VisibilityUtils; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestUserScanQueryMatcherDeleteMarkerOptimization extends AbstractTestScanQueryMatcher { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestUserScanQueryMatcherDeleteMarkerOptimization.class); + + private List> pairs; + + private void verify(List> pairs) throws IOException { + verify(pairs, 1, false, false); + } + + private void verify(List> pairs, int maxVersions, + boolean visibilityEnabled, boolean newVersionBehavior) throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + scan.readVersions(maxVersions); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, + new ScanInfo(this.conf, fam1, 0, maxVersions, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, newVersionBehavior), + get.getFamilyMap().get(fam1), now - ttl, now, null, visibilityEnabled); + + List storedKVs = new ArrayList<>(); + for (Pair Pair : pairs) { + storedKVs.add(Pair.getFirst()); + } + + List scannedKVs = new ArrayList<>(); + qm.setToNewRow(storedKVs.get(0)); + + ExtendedCell prevCell = null; + + for (KeyValue kv : storedKVs) { + MatchCode matchCode = qm.match(kv, prevCell); + prevCell = kv; + scannedKVs.add(matchCode); + } + + assertEquals(pairs.size(), scannedKVs.size()); + for (int i = 0; i < scannedKVs.size(); i++) { + assertEquals("second, index - " + i, pairs.get(i).getSecond(), scannedKVs.get(i)); + } + } + + @Before + public void setUp() throws Exception { + super.setUp(); + pairs = new ArrayList<>(); + } + + private KeyValue createKV(byte[] col, long timestamp, Type delete) { + return new KeyValue(row1, fam1, col, timestamp, delete, data); + } + + @Test + public void testNotDuplicatDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testEffectiveDelete() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.Put), MatchCode.SEEK_NEXT_COL)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteWithMaxVersions() throws IOException { + // For simplicity, do not optimize when maxVersions > 1 + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.INCLUDE)); + verify(pairs, 2, false, false); + } + + @Test + public void testDuplicatDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + // After prevCell is read and it is checked duplicated, the second cell can be SEEK_NEXT_COL + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testChangeColumn() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col2, 2, Type.Put), MatchCode.INCLUDE)); + verify(pairs); + } + + @Test + public void testNotDuplicatDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testEffectiveDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDuplicatDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + // After prevCell is read and it is checked duplicated, the second cell can be SEEK_NEXT_COL + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamilyVersionAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testDifferentDeleteFamilyVersion() throws IOException { + // DeleteFamilyVersion cannot be optimized + pairs.add(new Pair<>(createKV(null, 3, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 3, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testDeleteColumn() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.DeleteColumn), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteColumnAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.DeleteColumn), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamily() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SEEK_NEXT_COL)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamilyAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(null, 3, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testKeyForNextColumnForDeleteFamily() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create( + scan, new ScanInfo(this.conf, fam1, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam1), now - ttl, now, null, false); + + KeyValue kv; + ExtendedCell nextColumnKey; + + // empty qualifier should not return the instance of LastOnRowColCell except for DeleteFamily + kv = createKV(null, 2, Type.Put); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertNotEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + + // empty qualifier should not return the instance of LastOnRowColCell except for DeleteFamily + kv = createKV(null, 2, Type.Delete); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertNotEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + + // empty qualifier should return the instance of LastOnRowColCell only for DeleteFamily + kv = createKV(null, 2, Type.DeleteFamily); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + } + + @Test + public void testNoDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testVisibilityLabelEnabled() throws IOException { + Configuration conf = HBaseConfiguration.create(); + enableVisiblityLabels(conf); + pairs.add(new Pair<>(createKV(col1, 3, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 3, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs, 1, VisibilityUtils.isVisibilityLabelEnabled(conf), false); + } + + @Test + public void testScanWithFilter() throws IOException { + scan.setFilter(new FilterList()); + pairs.add(new Pair<>(createKV(col1, 3, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 3, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testNewVersionBehavior() throws IOException { + pairs.add(new Pair<>(createKV(col1, 3, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 3, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SKIP)); + verify(pairs, 2, false, true); + } +}