4242import java .util .concurrent .atomic .AtomicBoolean ;
4343import java .util .concurrent .atomic .AtomicLong ;
4444import java .util .concurrent .atomic .LongAdder ;
45- import org .apache .commons .lang3 .mutable .MutableObject ;
4645import org .apache .hadoop .conf .Configuration ;
4746import org .apache .hadoop .fs .FileSystem ;
4847import org .apache .hadoop .fs .Path ;
@@ -3282,8 +3281,7 @@ private void checkLimitOfRows(int numOfCompleteRows, int limitOfRows, boolean mo
32823281 // return whether we have more results in region.
32833282 private void scan (HBaseRpcController controller , ScanRequest request , RegionScannerHolder rsh ,
32843283 long maxQuotaResultSize , int maxResults , int limitOfRows , List <Result > results ,
3285- ScanResponse .Builder builder , MutableObject <Object > lastBlock , RpcCall rpcCall )
3286- throws IOException {
3284+ ScanResponse .Builder builder , RpcCall rpcCall ) throws IOException {
32873285 HRegion region = rsh .r ;
32883286 RegionScanner scanner = rsh .s ;
32893287 long maxResultSize ;
@@ -3343,7 +3341,19 @@ private void scan(HBaseRpcController controller, ScanRequest request, RegionScan
33433341 ScannerContext .Builder contextBuilder = ScannerContext .newBuilder (true );
33443342 // maxResultSize - either we can reach this much size for all cells(being read) data or sum
33453343 // of heap size occupied by cells(being read). Cell data means its key and value parts.
3346- contextBuilder .setSizeLimit (sizeScope , maxResultSize , maxResultSize );
3344+ // maxQuotaResultSize - max results just from server side configuration and quotas, without
3345+ // user's specified max. We use this for evaluating limits based on blocks (not cells).
3346+ // We may have accumulated some results in coprocessor preScannerNext call. We estimate
3347+ // block and cell size of those using call to addSize. Update our maximums for scanner
3348+ // context so we can account for them in the real scan.
3349+ long maxCellSize = maxResultSize ;
3350+ long maxBlockSize = maxQuotaResultSize ;
3351+ if (rpcCall != null ) {
3352+ maxBlockSize -= rpcCall .getResponseBlockSize ();
3353+ maxCellSize -= rpcCall .getResponseCellSize ();
3354+ }
3355+
3356+ contextBuilder .setSizeLimit (sizeScope , maxCellSize , maxCellSize , maxBlockSize );
33473357 contextBuilder .setBatchLimit (scanner .getBatch ());
33483358 contextBuilder .setTimeLimit (timeScope , timeLimit );
33493359 contextBuilder .setTrackMetrics (trackMetrics );
@@ -3398,7 +3408,6 @@ private void scan(HBaseRpcController controller, ScanRequest request, RegionScan
33983408 }
33993409 boolean mayHaveMoreCellsInRow = scannerContext .mayHaveMoreCellsInRow ();
34003410 Result r = Result .create (values , null , stale , mayHaveMoreCellsInRow );
3401- lastBlock .setValue (addSize (rpcCall , r , lastBlock .getValue ()));
34023411 results .add (r );
34033412 numOfResults ++;
34043413 if (!mayHaveMoreCellsInRow && limitOfRows > 0 ) {
@@ -3427,12 +3436,18 @@ private void scan(HBaseRpcController controller, ScanRequest request, RegionScan
34273436 limitReached = sizeLimitReached || timeLimitReached || resultsLimitReached ;
34283437
34293438 if (limitReached || !moreRows ) {
3439+ // With block size limit, we may exceed size limit without collecting any results.
3440+ // In this case we want to send heartbeat and/or cursor. We don't want to send heartbeat
3441+ // or cursor if results were collected, for example for cell size or heap size limits.
3442+ boolean sizeLimitReachedWithoutResults = sizeLimitReached && results .isEmpty ();
34303443 // We only want to mark a ScanResponse as a heartbeat message in the event that
34313444 // there are more values to be read server side. If there aren't more values,
34323445 // marking it as a heartbeat is wasteful because the client will need to issue
34333446 // another ScanRequest only to realize that they already have all the values
3434- if (moreRows && timeLimitReached ) {
3435- // Heartbeat messages occur when the time limit has been reached.
3447+ if (moreRows && (timeLimitReached || sizeLimitReachedWithoutResults )) {
3448+ // Heartbeat messages occur when the time limit has been reached, or size limit has
3449+ // been reached before collecting any results. This can happen for heavily filtered
3450+ // scans which scan over too many blocks.
34363451 builder .setHeartbeatMessage (true );
34373452 if (rsh .needCursor ) {
34383453 Cell cursorCell = scannerContext .getLastPeekedCell ();
@@ -3445,6 +3460,10 @@ private void scan(HBaseRpcController controller, ScanRequest request, RegionScan
34453460 }
34463461 values .clear ();
34473462 }
3463+ if (rpcCall != null ) {
3464+ rpcCall .incrementResponseBlockSize (scannerContext .getBlockSizeProgress ());
3465+ rpcCall .incrementResponseCellSize (scannerContext .getHeapSizeProgress ());
3466+ }
34483467 builder .setMoreResultsInRegion (moreRows );
34493468 // Check to see if the client requested that we track metrics server side. If the
34503469 // client requested metrics, retrieve the metrics from the scanner context.
@@ -3606,7 +3625,6 @@ public ScanResponse scan(final RpcController controller, final ScanRequest reque
36063625 } else {
36073626 limitOfRows = -1 ;
36083627 }
3609- MutableObject <Object > lastBlock = new MutableObject <>();
36103628 boolean scannerClosed = false ;
36113629 try {
36123630 List <Result > results = new ArrayList <>(Math .min (rows , 512 ));
@@ -3616,8 +3634,18 @@ public ScanResponse scan(final RpcController controller, final ScanRequest reque
36163634 if (region .getCoprocessorHost () != null ) {
36173635 Boolean bypass = region .getCoprocessorHost ().preScannerNext (scanner , results , rows );
36183636 if (!results .isEmpty ()) {
3637+ // If scanner CP added results to list, we want to account for cell and block size of
3638+ // that work. We estimate this using addSize, since CP does not get ScannerContext. If
3639+ // !done, the actual scan call below will use more accurate ScannerContext block and
3640+ // cell size tracking for the rest of the request. The two result sets will be added
3641+ // together in the RpcCall accounting.
3642+ // This here is just an estimate (see addSize for more details on estimation). We don't
3643+ // pass lastBlock to the scan call below because the real scan uses ScannerContext,
3644+ // which does not use lastBlock tracking. This may result in over counting by 1 block,
3645+ // but that is unlikely since addSize is already a rough estimate.
3646+ Object lastBlock = null ;
36193647 for (Result r : results ) {
3620- lastBlock . setValue ( addSize (rpcCall , r , lastBlock . getValue ()) );
3648+ lastBlock = addSize (rpcCall , r , lastBlock );
36213649 }
36223650 }
36233651 if (bypass != null && bypass .booleanValue ()) {
@@ -3626,7 +3654,7 @@ public ScanResponse scan(final RpcController controller, final ScanRequest reque
36263654 }
36273655 if (!done ) {
36283656 scan ((HBaseRpcController ) controller , request , rsh , maxQuotaResultSize , rows , limitOfRows ,
3629- results , builder , lastBlock , rpcCall );
3657+ results , builder , rpcCall );
36303658 } else {
36313659 builder .setMoreResultsInRegion (!results .isEmpty ());
36323660 }
0 commit comments