@@ -2302,7 +2302,18 @@ public void acquirePrimaryOperationPermit(ActionListener<Releasable> onPermitAcq
23022302 indexShardOperationPermits .acquire (onPermitAcquired , executorOnDelay , false , debugInfo );
23032303 }
23042304
2305- private <E extends Exception > void bumpPrimaryTerm (long newPrimaryTerm , final CheckedRunnable <E > onBlocked ) {
2305+ /**
2306+ * Acquire all primary operation permits. Once all permits are acquired, the provided ActionListener is called.
2307+ * It is the responsibility of the caller to close the {@link Releasable}.
2308+ */
2309+ public void acquireAllPrimaryOperationsPermits (final ActionListener <Releasable > onPermitAcquired , final TimeValue timeout ) {
2310+ verifyNotClosed ();
2311+ assert shardRouting .primary () : "acquireAllPrimaryOperationsPermits should only be called on primary shard: " + shardRouting ;
2312+
2313+ indexShardOperationPermits .asyncBlockOperations (onPermitAcquired , timeout .duration (), timeout .timeUnit ());
2314+ }
2315+
2316+ private <E extends Exception > void bumpPrimaryTerm (final long newPrimaryTerm , final CheckedRunnable <E > onBlocked ) {
23062317 assert Thread .holdsLock (mutex );
23072318 assert newPrimaryTerm > pendingPrimaryTerm ;
23082319 assert operationPrimaryTerm <= pendingPrimaryTerm ;
@@ -2357,11 +2368,42 @@ public void onResponse(final Releasable releasable) {
23572368 public void acquireReplicaOperationPermit (final long opPrimaryTerm , final long globalCheckpoint , final long maxSeqNoOfUpdatesOrDeletes ,
23582369 final ActionListener <Releasable > onPermitAcquired , final String executorOnDelay ,
23592370 final Object debugInfo ) {
2371+ innerAcquireReplicaOperationPermit (opPrimaryTerm , globalCheckpoint , maxSeqNoOfUpdatesOrDeletes , onPermitAcquired ,
2372+ (listener ) -> indexShardOperationPermits .acquire (listener , executorOnDelay , true , debugInfo ));
2373+ }
2374+
2375+ /**
2376+ * Acquire all replica operation permits whenever the shard is ready for indexing (see
2377+ * {@link #acquireAllPrimaryOperationsPermits(ActionListener, TimeValue)}. If the given primary term is lower than then one in
2378+ * {@link #shardRouting}, the {@link ActionListener#onFailure(Exception)} method of the provided listener is invoked with an
2379+ * {@link IllegalStateException}.
2380+ *
2381+ * @param opPrimaryTerm the operation primary term
2382+ * @param globalCheckpoint the global checkpoint associated with the request
2383+ * @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates (index operations overwrite Lucene) or deletes captured on the primary
2384+ * after this replication request was executed on it (see {@link #getMaxSeqNoOfUpdatesOrDeletes()}
2385+ * @param onPermitAcquired the listener for permit acquisition
2386+ * @param timeout the maximum time to wait for the in-flight operations block
2387+ */
2388+ public void acquireAllReplicaOperationsPermits (final long opPrimaryTerm ,
2389+ final long globalCheckpoint ,
2390+ final long maxSeqNoOfUpdatesOrDeletes ,
2391+ final ActionListener <Releasable > onPermitAcquired ,
2392+ final TimeValue timeout ) {
2393+ innerAcquireReplicaOperationPermit (opPrimaryTerm , globalCheckpoint , maxSeqNoOfUpdatesOrDeletes , onPermitAcquired ,
2394+ (listener ) -> indexShardOperationPermits .asyncBlockOperations (listener , timeout .duration (), timeout .timeUnit ()));
2395+ }
2396+
2397+ private void innerAcquireReplicaOperationPermit (final long opPrimaryTerm ,
2398+ final long globalCheckpoint ,
2399+ final long maxSeqNoOfUpdatesOrDeletes ,
2400+ final ActionListener <Releasable > onPermitAcquired ,
2401+ final Consumer <ActionListener <Releasable >> consumer ) {
23602402 verifyNotClosed ();
23612403 if (opPrimaryTerm > pendingPrimaryTerm ) {
23622404 synchronized (mutex ) {
23632405 if (opPrimaryTerm > pendingPrimaryTerm ) {
2364- IndexShardState shardState = state ();
2406+ final IndexShardState shardState = state ();
23652407 // only roll translog and update primary term if shard has made it past recovery
23662408 // Having a new primary term here means that the old primary failed and that there is a new primary, which again
23672409 // means that the master will fail this shard as all initializing shards are failed when a primary is selected
@@ -2373,58 +2415,54 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g
23732415
23742416 if (opPrimaryTerm > pendingPrimaryTerm ) {
23752417 bumpPrimaryTerm (opPrimaryTerm , () -> {
2376- updateGlobalCheckpointOnReplica (globalCheckpoint , "primary term transition" );
2377- final long currentGlobalCheckpoint = getGlobalCheckpoint ();
2378- final long maxSeqNo = seqNoStats ().getMaxSeqNo ();
2379- logger .info ("detected new primary with primary term [{}], global checkpoint [{}], max_seq_no [{}]" ,
2380- opPrimaryTerm , currentGlobalCheckpoint , maxSeqNo );
2381- if (currentGlobalCheckpoint < maxSeqNo ) {
2382- resetEngineToGlobalCheckpoint ();
2383- } else {
2384- getEngine ().rollTranslogGeneration ();
2385- }
2418+ updateGlobalCheckpointOnReplica (globalCheckpoint , "primary term transition" );
2419+ final long currentGlobalCheckpoint = getGlobalCheckpoint ();
2420+ final long maxSeqNo = seqNoStats ().getMaxSeqNo ();
2421+ logger .info ("detected new primary with primary term [{}], global checkpoint [{}], max_seq_no [{}]" ,
2422+ opPrimaryTerm , currentGlobalCheckpoint , maxSeqNo );
2423+ if (currentGlobalCheckpoint < maxSeqNo ) {
2424+ resetEngineToGlobalCheckpoint ();
2425+ } else {
2426+ getEngine ().rollTranslogGeneration ();
2427+ }
23862428 });
23872429 }
23882430 }
23892431 }
23902432 }
2391-
23922433 assert opPrimaryTerm <= pendingPrimaryTerm
2393- : "operation primary term [" + opPrimaryTerm + "] should be at most [" + pendingPrimaryTerm + "]" ;
2394- indexShardOperationPermits .acquire (
2395- new ActionListener <Releasable >() {
2396- @ Override
2397- public void onResponse (final Releasable releasable ) {
2398- if (opPrimaryTerm < operationPrimaryTerm ) {
2399- releasable .close ();
2400- final String message = String .format (
2401- Locale .ROOT ,
2402- "%s operation primary term [%d] is too old (current [%d])" ,
2403- shardId ,
2404- opPrimaryTerm ,
2405- operationPrimaryTerm );
2406- onPermitAcquired .onFailure (new IllegalStateException (message ));
2407- } else {
2408- assert assertReplicationTarget ();
2409- try {
2410- updateGlobalCheckpointOnReplica (globalCheckpoint , "operation" );
2411- advanceMaxSeqNoOfUpdatesOrDeletes (maxSeqNoOfUpdatesOrDeletes );
2412- } catch (Exception e ) {
2413- releasable .close ();
2414- onPermitAcquired .onFailure (e );
2415- return ;
2416- }
2417- onPermitAcquired .onResponse (releasable );
2418- }
2419- }
2420-
2421- @ Override
2422- public void onFailure (final Exception e ) {
2434+ : "operation primary term [" + opPrimaryTerm + "] should be at most [" + pendingPrimaryTerm + "]" ;
2435+ consumer .accept (new ActionListener <Releasable >() {
2436+ @ Override
2437+ public void onResponse (final Releasable releasable ) {
2438+ if (opPrimaryTerm < operationPrimaryTerm ) {
2439+ releasable .close ();
2440+ final String message = String .format (
2441+ Locale .ROOT ,
2442+ "%s operation primary term [%d] is too old (current [%d])" ,
2443+ shardId ,
2444+ opPrimaryTerm ,
2445+ operationPrimaryTerm );
2446+ onPermitAcquired .onFailure (new IllegalStateException (message ));
2447+ } else {
2448+ assert assertReplicationTarget ();
2449+ try {
2450+ updateGlobalCheckpointOnReplica (globalCheckpoint , "operation" );
2451+ advanceMaxSeqNoOfUpdatesOrDeletes (maxSeqNoOfUpdatesOrDeletes );
2452+ } catch (Exception e ) {
2453+ releasable .close ();
24232454 onPermitAcquired .onFailure (e );
2455+ return ;
24242456 }
2425- },
2426- executorOnDelay ,
2427- true , debugInfo );
2457+ onPermitAcquired .onResponse (releasable );
2458+ }
2459+ }
2460+
2461+ @ Override
2462+ public void onFailure (final Exception e ) {
2463+ onPermitAcquired .onFailure (e );
2464+ }
2465+ });
24282466 }
24292467
24302468 public int getActiveOperationsCount () {
0 commit comments