apache · EungsopYoo · Dec 23, 2024 · Dec 26, 2024 · Dec 26, 2024 · Jan 16, 2025
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@@ -47,6 +47,7 @@
 import org.apache.hadoop.hbase.regionserver.querymatcher.CompactionScanQueryMatcher;
 import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;
 import org.apache.hadoop.hbase.regionserver.querymatcher.UserScanQueryMatcher;
+import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -163,6 +164,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
 
   protected final long readPt;
   private boolean topChanged = false;
+  private final boolean visibilityLabelEnabled;
 
   /** An internal constructor. */
   private StoreScanner(HStore store, Scan scan, ScanInfo scanInfo, int numColumns, long readPt,
@@ -177,6 +179,7 @@ private StoreScanner(HStore store, Scan scan, ScanInfo scanInfo, int numColumns,
     this.now = EnvironmentEdgeManager.currentTime();
     this.oldestUnexpiredTS = scan.isRaw() ? 0L : now - scanInfo.getTtl();
     this.minVersions = scanInfo.getMinVersions();
+    visibilityLabelEnabled = store != null && VisibilityUtils.isVisibilityLabelEnabled(store.conf);
 
     // We look up row-column Bloom filters for multi-column queries as part of
     // the seek operation. However, we also look the row-column Bloom filter
@@ -246,7 +249,7 @@ public StoreScanner(HStore store, ScanInfo scanInfo, Scan scan, NavigableSet<byt
       throw new DoNotRetryIOException("Cannot specify any column for a raw scan");
     }
     matcher = UserScanQueryMatcher.create(scan, scanInfo, columns, oldestUnexpiredTS, now,
-      store.getCoprocessorHost());
+      store.getCoprocessorHost(), visibilityLabelEnabled);
 
     store.addChangedReaderObserver(this);
 
@@ -360,8 +363,8 @@ public StoreScanner(ScanInfo scanInfo, ScanType scanType,
     this(null, scan, scanInfo, columns != null ? columns.size() : 0, 0L, scan.getCacheBlocks(),
       scanType);
     if (scanType == ScanType.USER_SCAN) {
-      this.matcher =
-        UserScanQueryMatcher.create(scan, scanInfo, columns, oldestUnexpiredTS, now, null);
+      this.matcher = UserScanQueryMatcher.create(scan, scanInfo, columns, oldestUnexpiredTS, now,
+        null, visibilityLabelEnabled);
     } else {
       this.matcher = CompactionScanQueryMatcher.create(scanInfo, scanType, Long.MAX_VALUE,
         PrivateConstants.OLDEST_TIMESTAMP, oldestUnexpiredTS, now, null, null, null);
@@ -375,8 +378,8 @@ public StoreScanner(ScanInfo scanInfo, ScanType scanType,
     // 0 is passed as readpoint because the test bypasses Store
     this(null, scan, scanInfo, columns != null ? columns.size() : 0, 0L, scan.getCacheBlocks(),
       ScanType.USER_SCAN);
-    this.matcher =
-      UserScanQueryMatcher.create(scan, scanInfo, columns, oldestUnexpiredTS, now, null);
+    this.matcher = UserScanQueryMatcher.create(scan, scanInfo, columns, oldestUnexpiredTS, now,
+      null, visibilityLabelEnabled);
     seekAllScanner(scanInfo, scanners);
   }
 
@@ -636,10 +639,11 @@ public boolean next(List<? super ExtendedCell> outResult, ScannerContext scanner
           scannerContext.incrementBlockProgress(blockSize);
         });
 
-        prevCell = cell;
         scannerContext.setLastPeekedCell(cell);
         topChanged = false;
-        ScanQueryMatcher.MatchCode qcode = matcher.match(cell);
+        ScanQueryMatcher.MatchCode qcode = matcher.match(cell, prevCell);
+        LOG.trace("next - cell={}, prevCell={}, qCode={}", cell, prevCell, qcode);
+        prevCell = cell;
         switch (qcode) {
           case INCLUDE:
           case INCLUDE_AND_SEEK_NEXT_ROW:

diff --git a/...in/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java b/...in/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.regionserver.querymatcher;
 
 import java.io.IOException;
+import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.ExtendedCell;
 import org.apache.hadoop.hbase.KeepDeletedCells;
 import org.apache.hadoop.hbase.PrivateCellUtil;
@@ -40,12 +41,19 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
   /** whether time range queries can see rows "behind" a delete */
   protected final boolean seePastDeleteMarkers;
 
+  private final int scanMaxVersions;
+
+  private final boolean visibilityLabelEnabled;
+
   protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
-    boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) {
+    boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now,
+    boolean visibilityLabelEnabled) {
     super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now);
     this.deletes = deletes;
     this.get = scan.isGetScan();
     this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE;
+    this.scanMaxVersions = Math.max(scan.getMaxVersions(), scanInfo.getMaxVersions());
+    this.visibilityLabelEnabled = visibilityLabelEnabled;
   }
 
   @Override
@@ -56,11 +64,16 @@ public void beforeShipped() throws IOException {
 
   @Override
   public MatchCode match(ExtendedCell cell) throws IOException {
+    return match(cell, null);
+  }
+
+  @Override
+  public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException {
     if (filter != null && filter.filterAllRemaining()) {
       return MatchCode.DONE_SCAN;
     }
-    MatchCode returnCode = preCheck(cell);
-    if (returnCode != null) {
+    MatchCode returnCode;
+    if ((returnCode = preCheck(cell)) != null) {
       return returnCode;
     }
     long timestamp = cell.getTimestamp();
@@ -71,15 +84,42 @@ public MatchCode match(ExtendedCell cell) throws IOException {
       if (includeDeleteMarker) {
         this.deletes.add(cell);
       }
-      return MatchCode.SKIP;
+      // In some cases, optimization can not be done
+      if (!canOptimizeReadDeleteMarkers()) {
+        return MatchCode.SKIP;
+      }
     }
-    returnCode = checkDeleted(deletes, cell);
-    if (returnCode != null) {
+    // optimization when prevCell is Delete or DeleteFamilyVersion
+    if ((returnCode = checkDeletedEffectively(cell, prevCell)) != null) {
+      return returnCode;
+    }
+    if ((returnCode = checkDeleted(deletes, cell)) != null) {
       return returnCode;
     }
     return matchColumn(cell, timestamp, typeByte);
   }
 
+  // If prevCell is a delete marker and cell is a delete marked Put or delete marker,
+  // it means the cell is deleted effectively.
+  // And we can do SEEK_NEXT_COL.
+  private MatchCode checkDeletedEffectively(ExtendedCell cell, ExtendedCell prevCell) {
+    if (
+      prevCell != null && canOptimizeReadDeleteMarkers()
+        && CellUtil.matchingRowColumn(prevCell, cell) && CellUtil.matchingTimestamp(prevCell, cell)
+        && (PrivateCellUtil.isDeleteType(prevCell)
+          || PrivateCellUtil.isDeleteFamilyVersion(prevCell))
+    ) {
+      return MatchCode.SEEK_NEXT_COL;
+    }
+    return null;
+  }
+
+  private boolean canOptimizeReadDeleteMarkers() {
+    // for simplicity, optimization works only for these cases
+    return !seePastDeleteMarkers && scanMaxVersions == 1 && !visibilityLabelEnabled
+      && getFilter() == null && !(deletes instanceof NewVersionBehaviorTracker);
+  }
 ScanQueryMatcher.MatchCode qcode = matcher.match(cell, prevCell); 
 case SEEK_NEXT_COL: 
   seekOrSkipToNextColumn(cell); 
   NextState stateAfterSeekNextColumn = needToReturn(outResult); 
   if (stateAfterSeekNextColumn != null) { 
     return scannerContext.setScannerState(stateAfterSeekNextColumn).hasMoreValues(); 
   } 
   break; 
 case SKIP: 
   this.heap.next(); 
   break; 
 ScanQueryMatcher.MatchCode qcode = matcher.match(cell, prevCell); 
 case SEEK_NEXT_COL: 
   seekOrSkipToNextColumn(cell); 
   NextState stateAfterSeekNextColumn = needToReturn(outResult); 
   if (stateAfterSeekNextColumn != null) { 
     return scannerContext.setScannerState(stateAfterSeekNextColumn).hasMoreValues(); 
   } 
   break; 
  
 case SKIP: 
   this.heap.next(); 
   break; 
+
   @Override
   protected void reset() {
     deletes.reset();
@@ -92,11 +132,11 @@ protected boolean isGet() {
 
   public static NormalUserScanQueryMatcher create(Scan scan, ScanInfo scanInfo,
     ColumnTracker columns, DeleteTracker deletes, boolean hasNullColumn, long oldestUnexpiredTS,
-    long now) throws IOException {
+    long now, boolean visibilityLabelEnabled) throws IOException {
     if (scan.isReversed()) {
       if (scan.includeStopRow()) {
         return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes,
-          oldestUnexpiredTS, now) {
+          oldestUnexpiredTS, now, visibilityLabelEnabled) {
 
           @Override
           protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
@@ -105,7 +145,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
         };
       } else {
         return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes,
-          oldestUnexpiredTS, now) {
+          oldestUnexpiredTS, now, visibilityLabelEnabled) {
 
           @Override
           protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
@@ -116,7 +156,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
     } else {
       if (scan.includeStopRow()) {
         return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes,
-          oldestUnexpiredTS, now) {
+          oldestUnexpiredTS, now, visibilityLabelEnabled) {
 
           @Override
           protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
@@ -125,7 +165,7 @@ protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {
         };
       } else {
         return new NormalUserScanQueryMatcher(scan, scanInfo, columns, hasNullColumn, deletes,
-          oldestUnexpiredTS, now) {
+          oldestUnexpiredTS, now, visibilityLabelEnabled) {
 
           @Override
           protected boolean moreRowsMayExistsAfter(int cmpToStopRow) {

diff --git a/...ver/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java b/...ver/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java
@@ -238,6 +238,25 @@ protected final MatchCode checkDeleted(DeleteTracker deletes, ExtendedCell cell)
    */
   public abstract MatchCode match(ExtendedCell cell) throws IOException;
 
+  /**
+   * Determines if the caller should do one of several things:
+   * <ul>
+   * <li>seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)</li>
+   * <li>seek/skip to the next column (MatchCode.SEEK_NEXT_COL)</li>
+   * <li>include the current KeyValue (MatchCode.INCLUDE)</li>
+   * <li>ignore the current KeyValue (MatchCode.SKIP)</li>
+   * <li>got to the next row (MatchCode.DONE)</li>
+   * </ul>
+   * @param cell     KeyValue to check
+   * @param prevCell KeyValue checked previously
+   * @return The match code instance.
+   * @throws IOException in case there is an internal consistency problem caused by a data
+   *                     corruption.
+   */
+  public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException {
+    return match(cell);
+  }
+
   /** Returns the start key */
   public ExtendedCell getStartKey() {
     return startKey;
@@ -284,7 +303,8 @@ public ExtendedCell getKeyForNextColumn(ExtendedCell cell) {
     // see HBASE-18471 for more details
     // see TestFromClientSide3#testScanAfterDeletingSpecifiedRow
     // see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2
-    if (cell.getQualifierLength() == 0) {
+    // But we can seek to next column if the cell is a type of DeleteFamily.
+    if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) {
       ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell);
       if (nextKey != cell) {
         return nextKey;

diff --git a/...src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java b/...src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java
@@ -287,7 +287,8 @@ public boolean moreRowsMayExistAfter(ExtendedCell cell) {
 
   public static UserScanQueryMatcher create(Scan scan, ScanInfo scanInfo,
     NavigableSet<byte[]> columns, long oldestUnexpiredTS, long now,
-    RegionCoprocessorHost regionCoprocessorHost) throws IOException {
+    RegionCoprocessorHost regionCoprocessorHost, boolean visibilityLabelEnabled)
+    throws IOException {
     boolean hasNullColumn =
       !(columns != null && columns.size() != 0 && columns.first().length != 0);
     Pair<DeleteTracker, ColumnTracker> trackers =
@@ -299,7 +300,7 @@ public static UserScanQueryMatcher create(Scan scan, ScanInfo scanInfo,
         oldestUnexpiredTS, now);
     } else {
       return NormalUserScanQueryMatcher.create(scan, scanInfo, columnTracker, deleteTracker,
-        hasNullColumn, oldestUnexpiredTS, now);
+        hasNullColumn, oldestUnexpiredTS, now, visibilityLabelEnabled);
     }
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/visibility/VisibilityUtils.java
@@ -39,6 +39,7 @@
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.TagType;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.io.util.StreamUtils;
@@ -329,6 +330,15 @@ public static List<Tag> createVisibilityExpTags(String visExpression,
     return tags;
   }
 
+  public static boolean isVisibilityLabelEnabled(Configuration conf) {
+    return conf.getInt("hfile.format.version", 0) == 3
+      && conf.getBoolean(User.HBASE_SECURITY_AUTHORIZATION_CONF_KEY, false)
+      && (conf.get(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, "")
+        .contains(VisibilityController.class.getName())
+        || conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "")
+          .contains(VisibilityController.class.getName()));
+  }
+
   private static void getLabelOrdinals(ExpressionNode node, List<Integer> labelOrdinals,
     Set<Integer> auths, boolean checkAuths, VisibilityLabelOrdinalProvider ordinalProvider)
     throws IOException, InvalidLabelException {