|
33 | 33 | import org.elasticsearch.Version; |
34 | 34 | import org.elasticsearch.action.ActionListener; |
35 | 35 | import org.elasticsearch.action.StepListener; |
| 36 | +import org.elasticsearch.action.support.PlainActionFuture; |
36 | 37 | import org.elasticsearch.cluster.routing.IndexShardRoutingTable; |
37 | 38 | import org.elasticsearch.cluster.routing.ShardRouting; |
38 | 39 | import org.elasticsearch.common.StopWatch; |
@@ -226,25 +227,27 @@ public void recoverToTarget(ActionListener<RecoveryResponse> listener) { |
226 | 227 | logger.trace("snapshot translog for recovery; current size is [{}]", |
227 | 228 | shard.estimateNumberOfHistoryOperations("peer-recovery", startingSeqNo)); |
228 | 229 | } |
229 | | - final SendSnapshotResult sendSnapshotResult; |
230 | | - try (Translog.Snapshot snapshot = shard.getHistoryOperations("peer-recovery", startingSeqNo)) { |
231 | | - // we can release the retention lock here because the snapshot itself will retain the required operations. |
232 | | - IOUtils.close(retentionLock, () -> resources.remove(retentionLock)); |
233 | | - // we have to capture the max_seen_auto_id_timestamp and the max_seq_no_of_updates to make sure that these values |
234 | | - // are at least as high as the corresponding values on the primary when any of these operations were executed on it. |
235 | | - final long maxSeenAutoIdTimestamp = shard.getMaxSeenAutoIdTimestamp(); |
236 | | - final long maxSeqNoOfUpdatesOrDeletes = shard.getMaxSeqNoOfUpdatesOrDeletes(); |
237 | | - sendSnapshotResult = phase2(startingSeqNo, requiredSeqNoRangeStart, endingSeqNo, snapshot, |
238 | | - maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes); |
239 | | - } catch (Exception e) { |
240 | | - throw new RecoveryEngineException(shard.shardId(), 2, "phase2 failed", e); |
241 | | - } |
242 | 230 |
|
| 231 | + final Translog.Snapshot phase2Snapshot = shard.getHistoryOperations("peer-recovery", startingSeqNo); |
| 232 | + resources.add(phase2Snapshot); |
| 233 | + // we can release the retention lock here because the snapshot itself will retain the required operations. |
| 234 | + IOUtils.close(retentionLock); |
| 235 | + // we have to capture the max_seen_auto_id_timestamp and the max_seq_no_of_updates to make sure that these values |
| 236 | + // are at least as high as the corresponding values on the primary when any of these operations were executed on it. |
| 237 | + final long maxSeenAutoIdTimestamp = shard.getMaxSeenAutoIdTimestamp(); |
| 238 | + final long maxSeqNoOfUpdatesOrDeletes = shard.getMaxSeqNoOfUpdatesOrDeletes(); |
| 239 | + final StepListener<SendSnapshotResult> sendSnapshotStep = new StepListener<>(); |
| 240 | + phase2(startingSeqNo, requiredSeqNoRangeStart, endingSeqNo, phase2Snapshot, maxSeenAutoIdTimestamp, |
| 241 | + maxSeqNoOfUpdatesOrDeletes, sendSnapshotStep); |
| 242 | + sendSnapshotStep.whenComplete( |
| 243 | + r -> IOUtils.close(phase2Snapshot), |
| 244 | + e -> onFailure.accept(new RecoveryEngineException(shard.shardId(), 2, "phase2 failed", e))); |
243 | 245 | final StepListener<Void> finalizeStep = new StepListener<>(); |
244 | | - finalizeRecovery(sendSnapshotResult.targetLocalCheckpoint, finalizeStep); |
| 246 | + sendSnapshotStep.whenComplete(r -> finalizeRecovery(r.targetLocalCheckpoint, finalizeStep), onFailure); |
| 247 | + |
245 | 248 | finalizeStep.whenComplete(r -> { |
246 | | - assert resources.isEmpty() : "not every resource is released [" + resources + "]"; |
247 | 249 | final long phase1ThrottlingWaitTime = 0L; // TODO: return the actual throttle time |
| 250 | + final SendSnapshotResult sendSnapshotResult = sendSnapshotStep.result(); |
248 | 251 | final RecoveryResponse response = new RecoveryResponse(sendFileResult.phase1FileNames, sendFileResult.phase1FileSizes, |
249 | 252 | sendFileResult.phase1ExistingFileNames, sendFileResult.phase1ExistingFileSizes, sendFileResult.totalSize, |
250 | 253 | sendFileResult.existingTotalSize, sendFileResult.took.millis(), phase1ThrottlingWaitTime, |
@@ -507,10 +510,17 @@ TimeValue prepareTargetForTranslog(final boolean fileBasedRecovery, final int to |
507 | 510 | * @param snapshot a snapshot of the translog |
508 | 511 | * @param maxSeenAutoIdTimestamp the max auto_id_timestamp of append-only requests on the primary |
509 | 512 | * @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates or deletes on the primary after these operations were executed on it. |
510 | | - * @return the send snapshot result |
| 513 | + * @param listener a listener which will be notified with the local checkpoint on the target. |
511 | 514 | */ |
512 | | - SendSnapshotResult phase2(long startingSeqNo, long requiredSeqNoRangeStart, long endingSeqNo, Translog.Snapshot snapshot, |
513 | | - long maxSeenAutoIdTimestamp, long maxSeqNoOfUpdatesOrDeletes) throws IOException { |
| 515 | + void phase2(long startingSeqNo, long requiredSeqNoRangeStart, long endingSeqNo, Translog.Snapshot snapshot, long maxSeenAutoIdTimestamp, |
| 516 | + long maxSeqNoOfUpdatesOrDeletes, ActionListener<SendSnapshotResult> listener) throws IOException { |
| 517 | + ActionListener.completeWith(listener, () -> sendSnapshotBlockingly( |
| 518 | + startingSeqNo, requiredSeqNoRangeStart, endingSeqNo, snapshot, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes)); |
| 519 | + } |
| 520 | + |
| 521 | + private SendSnapshotResult sendSnapshotBlockingly(long startingSeqNo, long requiredSeqNoRangeStart, long endingSeqNo, |
| 522 | + Translog.Snapshot snapshot, long maxSeenAutoIdTimestamp, |
| 523 | + long maxSeqNoOfUpdatesOrDeletes) throws IOException { |
514 | 524 | assert requiredSeqNoRangeStart <= endingSeqNo + 1: |
515 | 525 | "requiredSeqNoRangeStart " + requiredSeqNoRangeStart + " is larger than endingSeqNo " + endingSeqNo; |
516 | 526 | assert startingSeqNo <= requiredSeqNoRangeStart : |
@@ -538,9 +548,11 @@ SendSnapshotResult phase2(long startingSeqNo, long requiredSeqNoRangeStart, long |
538 | 548 | } |
539 | 549 |
|
540 | 550 | final CancellableThreads.IOInterruptible sendBatch = () -> { |
541 | | - final long targetCheckpoint = recoveryTarget.indexTranslogOperations( |
542 | | - operations, expectedTotalOps, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes); |
543 | | - targetLocalCheckpoint.set(targetCheckpoint); |
| 551 | + // TODO: Make this non-blocking |
| 552 | + final PlainActionFuture<Long> future = new PlainActionFuture<>(); |
| 553 | + recoveryTarget.indexTranslogOperations( |
| 554 | + operations, expectedTotalOps, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes, future); |
| 555 | + targetLocalCheckpoint.set(future.actionGet()); |
544 | 556 | }; |
545 | 557 |
|
546 | 558 | // send operations in batches |
|
0 commit comments