From 110517974222968fec0eb28b18c0be39744f14ca Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 14 Aug 2018 09:35:45 -0400 Subject: [PATCH 01/17] Reset replica engine before primary-replica resync When a replica starts following a newly promoted primary, it may have some operations which don't exist on the new primary. We need to reset replicas to the global checkpoint before executing primary-replica resync. These two steps will align replicas to the primary. This change resets an engine of a replica to the safe commit when detecting a new primary term, then reindex operations from the local translog up to the global checkpoint. --- .../TransportResyncReplicationAction.java | 6 +- .../elasticsearch/index/engine/Engine.java | 9 +- .../index/engine/InternalEngine.java | 30 +- .../index/engine/ReadOnlyEngine.java | 314 ++++++++++++++++++ .../elasticsearch/index/shard/IndexShard.java | 180 ++++++++-- .../index/translog/Translog.java | 40 ++- .../index/engine/InternalEngineTests.java | 74 ++++- .../IndexLevelReplicationTests.java | 6 +- .../RecoveryDuringReplicationTests.java | 107 ++++-- .../index/shard/IndexShardTests.java | 140 ++++++-- .../index/shard/RefreshListenersTests.java | 2 +- .../index/translog/TranslogTests.java | 46 ++- .../index/engine/EngineTestCase.java | 27 ++ .../index/shard/IndexShardTestCase.java | 28 +- 14 files changed, 869 insertions(+), 140 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java diff --git a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java index f8ad58b9cac8c..6275115e7d77d 100644 --- a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java @@ -121,16 +121,14 @@ protected WriteReplicaResult shardOperationOnReplica(ResyncReplicationRequest re public static Translog.Location performOnReplica(ResyncReplicationRequest request, IndexShard replica) throws Exception { Translog.Location location = null; for (Translog.Operation operation : request.getOperations()) { - final Engine.Result operationResult = replica.applyTranslogOperation(operation, Engine.Operation.Origin.REPLICA); + final Engine.Result operationResult = replica.applyResyncOperation(operation); if (operationResult.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) { throw new TransportReplicationAction.RetryOnReplicaException(replica.shardId(), "Mappings are not available on the replica yet, triggered update: " + operationResult.getRequiredMappingUpdate()); } location = syncOperationResultOrThrow(operationResult, location); } - if (request.getTrimAboveSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { - replica.trimOperationOfPreviousPrimaryTerms(request.getTrimAboveSeqNo()); - } + replica.afterApplyResyncOperationsBulk(request.getTrimAboveSeqNo()); return location; } diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 31da7afc51a10..1f5dd09cfb3e1 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -1623,16 +1623,21 @@ public interface Warmer { public abstract int fillSeqNoGaps(long primaryTerm) throws IOException; /** - * Performs recovery from the transaction log. + * Performs recovery from the transaction log up to {@code recoverUpToSeqNo} . * This operation will close the engine if the recovery fails. */ - public abstract Engine recoverFromTranslog() throws IOException; + public abstract Engine recoverFromTranslog(long recoverUpToSeqNo) throws IOException; /** * Do not replay translog operations, but make the engine be ready. */ public abstract void skipTranslogRecovery(); + /** + * Returns a {@link ReadOnlyEngine} that points to the last commit of the current engine. + */ + public abstract Engine lockDownEngine() throws IOException; + /** * Returns true iff this engine is currently recovering from translog. */ diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index a30127a24ae21..4058de23c81f5 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -364,7 +364,7 @@ private void bootstrapAppendOnlyInfoFromWriter(IndexWriter writer) { } @Override - public InternalEngine recoverFromTranslog() throws IOException { + public InternalEngine recoverFromTranslog(long recoverUpToSeqNo) throws IOException { flushLock.lock(); try (ReleasableLock lock = readLock.acquire()) { ensureOpen(); @@ -372,7 +372,7 @@ public InternalEngine recoverFromTranslog() throws IOException { throw new IllegalStateException("Engine has already been recovered"); } try { - recoverFromTranslogInternal(); + recoverFromTranslogInternal(recoverUpToSeqNo); } catch (Exception e) { try { pendingTranslogRecovery.set(true); // just play safe and never allow commits on this see #ensureCanFlush @@ -388,17 +388,23 @@ public InternalEngine recoverFromTranslog() throws IOException { return this; } + // for testing + final Engine recoverFromTranslog() throws IOException { + return recoverFromTranslog(Long.MAX_VALUE); + } + @Override public void skipTranslogRecovery() { assert pendingTranslogRecovery.get() : "translogRecovery is not pending but should be"; pendingTranslogRecovery.set(false); // we are good - now we can commit } - private void recoverFromTranslogInternal() throws IOException { + private void recoverFromTranslogInternal(long recoverUpToSeqNo) throws IOException { Translog.TranslogGeneration translogGeneration = translog.getGeneration(); final int opsRecovered; final long translogGen = Long.parseLong(lastCommittedSegmentInfos.getUserData().get(Translog.TRANSLOG_GENERATION_KEY)); - try (Translog.Snapshot snapshot = translog.newSnapshotFromGen(translogGen)) { + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen( + new Translog.TranslogGeneration(translog.getTranslogUUID(), translogGen), recoverUpToSeqNo)) { opsRecovered = config().getTranslogRecoveryRunner().run(this, snapshot); } catch (Exception e) { throw new EngineException(shardId, "failed to recover from translog", e); @@ -2234,6 +2240,22 @@ public SeqNoStats getSeqNoStats(long globalCheckpoint) { return localCheckpointTracker.getStats(globalCheckpoint); } + @Override + public Engine lockDownEngine() throws IOException { + try (ReleasableLock ignored = writeLock.acquire()) { + refresh("lockdown", SearcherScope.INTERNAL); + Searcher searcher = acquireSearcher("lockdown", SearcherScope.INTERNAL); + try { + ReadOnlyEngine readOnlyEngine = new ReadOnlyEngine(this.engineConfig, lastCommittedSegmentInfos, searcher, + getSeqNoStats(getLastSyncedGlobalCheckpoint()), getTranslogStats()); + searcher = null; + return readOnlyEngine; + } finally { + IOUtils.close(searcher); + } + } + } + /** * Returns the number of times a version was looked up either from the index. * Note this is only available if assertions are enabled diff --git a/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java new file mode 100644 index 0000000000000..caf8adcd92816 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java @@ -0,0 +1,314 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.engine; + +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.store.AlreadyClosedException; +import org.elasticsearch.common.lease.Releasable; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.util.concurrent.ReleasableLock; +import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.index.seqno.SeqNoStats; +import org.elasticsearch.index.translog.Translog; +import org.elasticsearch.index.translog.TranslogStats; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.function.BiFunction; +import java.util.stream.Stream; + +/** + * An engine that does not accept writes, and always points stats, searcher to the last commit. + */ +final class ReadOnlyEngine extends Engine { + private final SegmentInfos lastCommittedSegmentInfos; + private final SeqNoStats seqNoStats; + private final TranslogStats translogStats; + private final SearcherManager searcherManager; + private final Searcher lastCommitSearcher; + + ReadOnlyEngine(EngineConfig engineConfig, SegmentInfos lastCommittedSegmentInfos, + Searcher lastCommitSearcher, SeqNoStats seqNoStats, TranslogStats translogStats) throws IOException { + super(engineConfig); + this.lastCommittedSegmentInfos = lastCommittedSegmentInfos; + this.lastCommitSearcher = lastCommitSearcher; + this.seqNoStats = seqNoStats; + this.translogStats = translogStats; + this.searcherManager = new SearcherManager(lastCommitSearcher.getDirectoryReader(), + new RamAccountingSearcherFactory(engineConfig.getCircuitBreakerService())); + } + + @Override + protected void closeNoLock(String reason, CountDownLatch closedLatch) { + try { + IOUtils.close(lastCommitSearcher, searcherManager); + } catch (Exception ex) { + logger.warn("failed to close searcher", ex); + } finally { + closedLatch.countDown(); + } + } + + @Override + public GetResult get(Get get, BiFunction searcherFactory) throws EngineException { + return getFromSearcher(get, this::acquireSearcher, SearcherScope.INTERNAL); + } + + @Override + public Searcher acquireSearcher(String source, SearcherScope scope) throws EngineException { + store.incRef(); + Releasable releasable = store::decRef; + try (ReleasableLock ignored = readLock.acquire()) { + final EngineSearcher searcher = new EngineSearcher(source, searcherManager, store, logger); + releasable = null; // hand over the reference to the engine searcher + return searcher; + } catch (AlreadyClosedException ex) { + throw ex; + } catch (Exception ex) { + ensureOpen(ex); // throw AlreadyClosedException if it's closed + throw new EngineException(shardId, "failed to acquire searcher, source " + source, ex); + } finally { + Releasables.close(releasable); + } + } + + @Override + protected SegmentInfos getLastCommittedSegmentInfos() { + return lastCommittedSegmentInfos; + } + + @Override + public String getHistoryUUID() { + return lastCommittedSegmentInfos.userData.get(Engine.HISTORY_UUID_KEY); + } + + @Override + public long getWritingBytes() { + return 0; + } + + @Override + public long getIndexThrottleTimeInMillis() { + return 0; + } + + @Override + public boolean isThrottled() { + return false; + } + + @Override + public IndexResult index(Index index) { + throw new UnsupportedOperationException(); + } + + @Override + public DeleteResult delete(Delete delete) { + throw new UnsupportedOperationException(); + } + + @Override + public NoOpResult noOp(NoOp noOp) { + throw new UnsupportedOperationException(); + } + + @Override + public Engine lockDownEngine() { + return this; + } + + @Override + public boolean isTranslogSyncNeeded() { + return false; + } + + @Override + public boolean ensureTranslogSynced(Stream locations) { + return false; + } + + @Override + public void syncTranslog() { + throw new UnsupportedOperationException(); + } + + @Override + public Closeable acquireTranslogRetentionLock() { + throw new UnsupportedOperationException(); + } + + @Override + public Translog.Snapshot newTranslogSnapshotFromMinSeqNo(long minSeqNo) { + throw new UnsupportedOperationException(); + } + + @Override + public int estimateTranslogOperationsFromMinSeq(long minSeqNo) { + throw new UnsupportedOperationException(); + } + + @Override + public TranslogStats getTranslogStats() { + return translogStats; + } + + @Override + public Translog.Location getTranslogLastWriteLocation() { + throw new UnsupportedOperationException(); + } + + @Override + public long getLocalCheckpoint() { + return seqNoStats.getLocalCheckpoint(); + } + + @Override + public void waitForOpsToComplete(long seqNo) { + throw new UnsupportedOperationException(); + } + + @Override + public void resetLocalCheckpoint(long newCheckpoint) { + throw new UnsupportedOperationException(); + } + + @Override + public SeqNoStats getSeqNoStats(long globalCheckpoint) { + return new SeqNoStats(seqNoStats.getMaxSeqNo(), seqNoStats.getLocalCheckpoint(), globalCheckpoint); + } + + @Override + public long getLastSyncedGlobalCheckpoint() { + return seqNoStats.getGlobalCheckpoint(); + } + + @Override + public long getIndexBufferRAMBytesUsed() { + return 0; + } + + @Override + public List segments(boolean verbose) { + return Arrays.asList(getSegmentInfo(lastCommittedSegmentInfos, verbose)); + } + + @Override + public void refresh(String source) throws EngineException { + // noop + } + + @Override + public void writeIndexingBuffer() throws EngineException { + + } + + @Override + public boolean shouldPeriodicallyFlush() { + return false; + } + + @Override + public SyncedFlushResult syncFlush(String syncId, CommitId expectedCommitId) throws EngineException { + throw new UnsupportedOperationException(); + } + + @Override + public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException { + throw new UnsupportedOperationException(); + } + + @Override + public CommitId flush() throws EngineException { + throw new UnsupportedOperationException(); + } + + @Override + public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, + boolean upgrade, boolean upgradeOnlyAncientSegments) { + throw new UnsupportedOperationException(); + } + + @Override + public IndexCommitRef acquireLastIndexCommit(boolean flushFirst) { + throw new UnsupportedOperationException(); + } + + @Override + public IndexCommitRef acquireSafeIndexCommit() { + throw new UnsupportedOperationException(); + } + + @Override + public void activateThrottling() { + } + + @Override + public void deactivateThrottling() { + } + + @Override + public void trimUnreferencedTranslogFiles() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean shouldRollTranslogGeneration() { + return false; + } + + @Override + public void rollTranslogGeneration() throws EngineException { + throw new UnsupportedOperationException(); + } + + @Override + public void restoreLocalCheckpointFromTranslog() { + throw new UnsupportedOperationException(); + } + + @Override + public int fillSeqNoGaps(long primaryTerm) { + throw new UnsupportedOperationException(); + } + + @Override + public Engine recoverFromTranslog(long recoverUpToSeqNo) { + throw new UnsupportedOperationException(); + } + + @Override + public void skipTranslogRecovery() { + throw new UnsupportedOperationException(); + } + + @Override + public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { + throw new UnsupportedOperationException(); + } + + @Override + public void maybePruneDeletes() { + + } +} diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 08a0111fb4dc5..a2ab419cb4022 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -196,6 +196,11 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl protected volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm protected volatile long operationPrimaryTerm; protected final AtomicReference currentEngineReference = new AtomicReference<>(); + + private final AtomicReference resettingEngineReference = new AtomicReference<>(); + // avoid losing acknowledged writes, only activate the resetting engine when its local_checkpoint at least this guard. + private final AtomicLong minRequiredCheckpointForResettingEngine = new AtomicLong(-1); + final EngineFactory engineFactory; private final IndexingOperationListener indexingOperationListeners; @@ -433,7 +438,7 @@ public void updateShardState(final ShardRouting newRouting, // active primaries. throw new IndexShardRelocatedException(shardId(), "Shard is marked as relocated, cannot safely move to state " + newRouting.state()); } - assert newRouting.active() == false || state == IndexShardState.STARTED || state == IndexShardState.CLOSED : + assert newRouting.active() == false || state == IndexShardState.STARTED || state == IndexShardState.CLOSED || isResetting() : "routing is active, but local shard state isn't. routing: " + newRouting + ", local state: " + state; persistMetadata(path, indexSettings, newRouting, currentRouting, logger); final CountDownLatch shardStateUpdated = new CountDownLatch(1); @@ -495,6 +500,9 @@ public void updateShardState(final ShardRouting newRouting, * numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by * replaying the translog and marking any operations there are completed. */ + if (isResetting()) { + completeResettingEngineWithLocalHistory(); + } final Engine engine = getEngine(); engine.restoreLocalCheckpointFromTranslog(); /* Rolling the translog generation is not strictly needed here (as we will never have collisions between @@ -664,18 +672,17 @@ private IndexShardState changeState(IndexShardState newState, String reason) { public Engine.IndexResult applyIndexOperationOnPrimary(long version, VersionType versionType, SourceToParse sourceToParse, long autoGeneratedTimestamp, boolean isRetry) throws IOException { assert versionType.validateVersionForWrites(version); - return applyIndexOperation(SequenceNumbers.UNASSIGNED_SEQ_NO, operationPrimaryTerm, version, versionType, autoGeneratedTimestamp, - isRetry, Engine.Operation.Origin.PRIMARY, sourceToParse); + return applyIndexOperation(getEngine(), SequenceNumbers.UNASSIGNED_SEQ_NO, operationPrimaryTerm, version, versionType, + autoGeneratedTimestamp, isRetry, Engine.Operation.Origin.PRIMARY, sourceToParse); } public Engine.IndexResult applyIndexOperationOnReplica(long seqNo, long version, long autoGeneratedTimeStamp, - boolean isRetry, SourceToParse sourceToParse) - throws IOException { - return applyIndexOperation(seqNo, operationPrimaryTerm, version, null, autoGeneratedTimeStamp, isRetry, - Engine.Operation.Origin.REPLICA, sourceToParse); + boolean isRetry, SourceToParse sourceToParse) throws IOException { + return applyIndexOperation(getEngine(), seqNo, operationPrimaryTerm, version, null, autoGeneratedTimeStamp, + isRetry, Engine.Operation.Origin.REPLICA, sourceToParse); } - private Engine.IndexResult applyIndexOperation(long seqNo, long opPrimaryTerm, long version, @Nullable VersionType versionType, + private Engine.IndexResult applyIndexOperation(Engine engine, long seqNo, long opPrimaryTerm, long version, @Nullable VersionType versionType, long autoGeneratedTimeStamp, boolean isRetry, Engine.Operation.Origin origin, SourceToParse sourceToParse) throws IOException { assert opPrimaryTerm <= this.operationPrimaryTerm: "op term [ " + opPrimaryTerm + " ] > shard term [" + this.operationPrimaryTerm @@ -699,7 +706,7 @@ private Engine.IndexResult applyIndexOperation(long seqNo, long opPrimaryTerm, l return new Engine.IndexResult(e, version, opPrimaryTerm, seqNo); } - return index(getEngine(), operation); + return index(engine, operation); } public static Engine.Index prepareIndex(DocumentMapperForType docMapper, Version indexCreatedVersion, SourceToParse source, long seqNo, @@ -733,17 +740,17 @@ private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOExc } public Engine.NoOpResult markSeqNoAsNoop(long seqNo, String reason) throws IOException { - return markSeqNoAsNoop(seqNo, operationPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); + return markSeqNoAsNoop(getEngine(), seqNo, operationPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); } - private Engine.NoOpResult markSeqNoAsNoop(long seqNo, long opPrimaryTerm, String reason, + private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin) throws IOException { assert opPrimaryTerm <= this.operationPrimaryTerm : "op term [ " + opPrimaryTerm + " ] > shard term [" + this.operationPrimaryTerm + "]"; long startTime = System.nanoTime(); ensureWriteAllowed(origin); final Engine.NoOp noOp = new Engine.NoOp(seqNo, opPrimaryTerm, origin, startTime, reason); - return noOp(getEngine(), noOp); + return noOp(engine, noOp); } private Engine.NoOpResult noOp(Engine engine, Engine.NoOp noOp) { @@ -765,15 +772,15 @@ public Engine.DeleteResult getFailedDeleteResult(Exception e, long version) { public Engine.DeleteResult applyDeleteOperationOnPrimary(long version, String type, String id, VersionType versionType) throws IOException { assert versionType.validateVersionForWrites(version); - return applyDeleteOperation(SequenceNumbers.UNASSIGNED_SEQ_NO, operationPrimaryTerm, version, type, id, versionType, + return applyDeleteOperation(getEngine(), SequenceNumbers.UNASSIGNED_SEQ_NO, operationPrimaryTerm, version, type, id, versionType, Engine.Operation.Origin.PRIMARY); } public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long version, String type, String id) throws IOException { - return applyDeleteOperation(seqNo, operationPrimaryTerm, version, type, id, null, Engine.Operation.Origin.REPLICA); + return applyDeleteOperation(getEngine(), seqNo, operationPrimaryTerm, version, type, id, null, Engine.Operation.Origin.REPLICA); } - private Engine.DeleteResult applyDeleteOperation(long seqNo, long opPrimaryTerm, long version, String type, String id, + private Engine.DeleteResult applyDeleteOperation(Engine engine, long seqNo, long opPrimaryTerm, long version, String type, String id, @Nullable VersionType versionType, Engine.Operation.Origin origin) throws IOException { assert opPrimaryTerm <= this.operationPrimaryTerm : "op term [ " + opPrimaryTerm + " ] > shard term [" + this.operationPrimaryTerm + "]"; @@ -797,7 +804,7 @@ private Engine.DeleteResult applyDeleteOperation(long seqNo, long opPrimaryTerm, final Term uid = extractUidForDelete(type, id); final Engine.Delete delete = prepareDelete(type, id, uid, seqNo, opPrimaryTerm, version, versionType, origin); - return delete(getEngine(), delete); + return delete(engine, delete); } private static Engine.Delete prepareDelete(String type, String id, Term uid, long seqNo, long primaryTerm, long version, @@ -1186,13 +1193,13 @@ public void close(String reason, boolean flushEngine) throws IOException { } finally { final Engine engine = this.currentEngineReference.getAndSet(null); try { - if (engine != null && flushEngine) { + if (engine != null && flushEngine && isResetting() == false) { engine.flushAndClose(); } } finally { // playing safe here and close the engine even if the above succeeds - close can be called multiple times // Also closing refreshListeners to prevent us from accumulating any more listeners - IOUtils.close(engine, refreshListeners); + IOUtils.close(engine, resettingEngineReference.getAndSet(null), refreshListeners); indexShardOperationPermits.close(); } } @@ -1228,30 +1235,30 @@ public void prepareForIndexRecovery() { assert currentEngineReference.get() == null; } - public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { - getEngine().trimOperationsFromTranslog(operationPrimaryTerm, aboveSeqNo); + public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { + return applyTranslogOperation(getEngine(), operation, origin); } - public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { + private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { final Engine.Result result; switch (operation.opType()) { case INDEX: final Translog.Index index = (Translog.Index) operation; // we set canHaveDuplicates to true all the time such that we de-optimze the translog case and ensure that all // autoGeneratedID docs that are coming from the primary are updated correctly. - result = applyIndexOperation(index.seqNo(), index.primaryTerm(), index.version(), + result = applyIndexOperation(engine, index.seqNo(), index.primaryTerm(), index.version(), null, index.getAutoGeneratedIdTimestamp(), true, origin, source(shardId.getIndexName(), index.type(), index.id(), index.source(), XContentHelper.xContentType(index.source())).routing(index.routing())); break; case DELETE: final Translog.Delete delete = (Translog.Delete) operation; - result = applyDeleteOperation(delete.seqNo(), delete.primaryTerm(), delete.version(), delete.type(), delete.id(), + result = applyDeleteOperation(engine, delete.seqNo(), delete.primaryTerm(), delete.version(), delete.type(), delete.id(), null, origin); break; case NO_OP: final Translog.NoOp noOp = (Translog.NoOp) operation; - result = markSeqNoAsNoop(noOp.seqNo(), noOp.primaryTerm(), noOp.reason(), origin); + result = markSeqNoAsNoop(engine, noOp.seqNo(), noOp.primaryTerm(), noOp.reason(), origin); break; default: throw new IllegalStateException("No operation defined for [" + operation + "]"); @@ -1261,14 +1268,16 @@ public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine // package-private for testing int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOException { - recoveryState.getTranslog().totalOperations(snapshot.totalOperations()); - recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); + if (isResetting() == false) { + recoveryState.getTranslog().totalOperations(snapshot.totalOperations()); + recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); + } int opsRecovered = 0; Translog.Operation operation; while ((operation = snapshot.next()) != null) { try { logger.trace("[translog] recover op {}", operation); - Engine.Result result = applyTranslogOperation(operation, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY); + Engine.Result result = applyTranslogOperation(engine, operation, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY); switch (result.getResultType()) { case FAILURE: throw result.getFailure(); @@ -1281,7 +1290,9 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOExce } opsRecovered++; - recoveryState.getTranslog().incrementRecoveredOperations(); + if (isResetting() == false) { + recoveryState.getTranslog().incrementRecoveredOperations(); + } } catch (Exception e) { if (ExceptionsHelper.status(e) == RestStatus.BAD_REQUEST) { // mainly for MapperParsingException and Failure to detect xcontent @@ -1300,7 +1311,7 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOExce **/ public void openEngineAndRecoverFromTranslog() throws IOException { innerOpenEngineAndTranslog(); - getEngine().recoverFromTranslog(); + getEngine().recoverFromTranslog(Long.MAX_VALUE); } /** @@ -1428,6 +1439,9 @@ public boolean ignoreRecoveryAttempt() { public void readAllowed() throws IllegalIndexShardStateException { IndexShardState state = this.state; // one time volatile read + if (state == IndexShardState.RECOVERING && resettingEngineReference.get() != null) { + return; + } if (readAllowedStates.contains(state) == false) { throw new IllegalIndexShardStateException(shardId, state, "operations only allowed when shard state is one of " + readAllowedStates.toString()); } @@ -1458,6 +1472,15 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn } } + private boolean isResetting() { + return state == IndexShardState.RECOVERING && resettingEngineReference.get() != null; + } + + // for testing + final Engine getResettingEngine() { + return resettingEngineReference.get(); + } + private boolean assertPrimaryMode() { assert shardRouting.primary() && replicationTracker.isPrimaryMode() : "shard " + shardRouting + " is not a primary shard in primary mode"; return true; @@ -2278,13 +2301,20 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g } else { localCheckpoint = currentGlobalCheckpoint; } - logger.trace( + final Engine resettingEngine = resettingEngineReference.getAndSet(null); + if (resettingEngine != null) { + IOUtils.close(resettingEngine); + } else { + getEngine().resetLocalCheckpoint(globalCheckpoint); + getEngine().rollTranslogGeneration(); + sync(); + } + logger.info( "detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); - getEngine().resetLocalCheckpoint(localCheckpoint); - getEngine().rollTranslogGeneration(); + resetEngineUpToLocalCheckpoint(currentGlobalCheckpoint); }); } } @@ -2630,4 +2660,90 @@ public void afterRefresh(boolean didRefresh) throws IOException { refreshMetric.inc(System.nanoTime() - currentRefreshStartTime); } } + + final boolean canResetEngine() { + // TODO: do not reset the following shard + return indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_4_0); + } + + private void resetEngineUpToLocalCheckpoint(long recoverUpToSeqNo) throws IOException { + synchronized (mutex) { + assert resettingEngineReference.get() == null : "shard is being reset already"; + final Engine currentEngine = getEngine(); + final long globalCheckpoint = getLastSyncedGlobalCheckpoint(); + final long currentMaxSeqNo = currentEngine.getSeqNoStats(globalCheckpoint).getMaxSeqNo(); + // Do not reset the current engine if it does not have any stale operations + // or the primary is on an old version which does not send the max_seqno from the primary during resync. + // we need max_seqno from the primary to activate the resetting engine when its history aligned with primary's. + if (currentMaxSeqNo == globalCheckpoint || canResetEngine() == false) { + return; + } + final String translogUUID = currentEngine.commitStats().getUserData().get(Translog.TRANSLOG_UUID_KEY); + final Engine lockedEngine = currentEngine.lockDownEngine(); + if (lockedEngine != currentEngine) { + boolean swapped = currentEngineReference.compareAndSet(currentEngine, lockedEngine); + assert swapped : "Failed to swap engine"; + currentEngine.close(); + } + final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); + store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, indexSettings.getIndexVersionCreated()); + final EngineConfig config = newEngineConfig(); + config.setEnableGcDeletes(true); + final Engine resettingEngine = newEngine(config); + onNewEngine(resettingEngine); + verifyNotClosed(); + // the resetting engine will be activated only if its local_checkpoint at least this guard. + minRequiredCheckpointForResettingEngine.set(currentMaxSeqNo); + resettingEngineReference.set(resettingEngine); + changeState(IndexShardState.RECOVERING, "reset engine from=" + currentMaxSeqNo + " to=" + globalCheckpoint); + // TODO: We can transfer docs from the last commit to the safe commit or use multiple threads here. + resettingEngine.recoverFromTranslog(recoverUpToSeqNo); + active.set(true); + } + } + + private void maybeActivateResettingEngine() throws IOException { + synchronized (mutex) { + final Engine resettingEngine = resettingEngineReference.get(); + if (resettingEngine != null && resettingEngine.getLocalCheckpoint() >= minRequiredCheckpointForResettingEngine.get()) { + final Engine currentEngine = currentEngineReference.getAndSet(resettingEngineReference.getAndSet(null)); + IOUtils.close(currentEngine); + changeState(IndexShardState.STARTED, "engine was reset"); + } + } + } + + public Engine.Result applyResyncOperation(Translog.Operation operation) throws IOException { + final Engine resettingEngine = resettingEngineReference.get(); + final Engine targetEngine = resettingEngine != null ? resettingEngine : getEngine(); + return applyTranslogOperation(targetEngine, operation, Engine.Operation.Origin.PEER_RECOVERY); + } + + /** + * This method is called after each bulk of primary-replica resync operations are applied to this shard + * via {@link #applyResyncOperation(Translog.Operation)}. + * + * @param maxSeqNoFromPrimary the maximum sequence number from the newly promoted primary + */ + public void afterApplyResyncOperationsBulk(long maxSeqNoFromPrimary) throws IOException { + final Engine resettingEngine = resettingEngineReference.get(); + final Engine targetEngine = resettingEngine != null ? resettingEngine : getEngine(); + if (maxSeqNoFromPrimary != SequenceNumbers.UNASSIGNED_SEQ_NO) { + targetEngine.trimOperationsFromTranslog(operationPrimaryTerm, maxSeqNoFromPrimary); + // adjust the requirement for the resetting engine, so that we can activate when its history aligned with the primary. + minRequiredCheckpointForResettingEngine.set(Math.min(minRequiredCheckpointForResettingEngine.get(), maxSeqNoFromPrimary)); + } + maybeActivateResettingEngine(); + } + + private void completeResettingEngineWithLocalHistory() throws IOException { + synchronized (mutex) { + assert isResetting() : "engine is not being reset"; + IOUtils.close(resettingEngineReference.getAndSet(null)); + resetEngineUpToLocalCheckpoint(Long.MAX_VALUE); + minRequiredCheckpointForResettingEngine.set(getGlobalCheckpoint()); + maybeActivateResettingEngine(); + assert isResetting() == false : "engine was not reset with local history"; + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index e426b3a7253eb..bc40c6e987b1b 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -555,21 +555,49 @@ public long getLastSyncedGlobalCheckpoint() { */ public Snapshot newSnapshot() throws IOException { try (ReleasableLock ignored = readLock.acquire()) { - return newSnapshotFromGen(getMinFileGeneration()); + return newSnapshotFromGen(new TranslogGeneration(translogUUID, getMinFileGeneration()), Long.MAX_VALUE); } } - public Snapshot newSnapshotFromGen(long minGeneration) throws IOException { + public Snapshot newSnapshotFromGen(TranslogGeneration minGeneration, long upToSeqNo) throws IOException { try (ReleasableLock ignored = readLock.acquire()) { ensureOpen(); - if (minGeneration < getMinFileGeneration()) { - throw new IllegalArgumentException("requested snapshot generation [" + minGeneration + "] is not available. " + + final long minTranslogFileGen = minGeneration.translogFileGeneration; + if (minTranslogFileGen < getMinFileGeneration()) { + throw new IllegalArgumentException("requested snapshot generation [" + minTranslogFileGen + "] is not available. " + "Min referenced generation is [" + getMinFileGeneration() + "]"); } TranslogSnapshot[] snapshots = Stream.concat(readers.stream(), Stream.of(current)) - .filter(reader -> reader.getGeneration() >= minGeneration) + .filter(reader -> reader.getGeneration() >= minTranslogFileGen && reader.getCheckpoint().minSeqNo <= upToSeqNo) .map(BaseTranslogReader::newSnapshot).toArray(TranslogSnapshot[]::new); - return newMultiSnapshot(snapshots); + Snapshot snapshot = newMultiSnapshot(snapshots); + return new Snapshot() { + int skipped = 0; + @Override + public int totalOperations() { + return snapshot.totalOperations(); + } + @Override + public int skippedOperations() { + return skipped + snapshot.skippedOperations(); + } + @Override + public Operation next() throws IOException { + Translog.Operation op; + while ((op = snapshot.next()) != null) { + if (op.seqNo() <= upToSeqNo) { + return op; + } else { + skipped++; + } + } + return null; + } + @Override + public void close() throws IOException { + snapshot.close(); + } + }; } } diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 9151fa24fc9a4..69dc91a587ed2 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -143,6 +143,7 @@ import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.seqno.LocalCheckpointTracker; import org.elasticsearch.index.seqno.ReplicationTracker; +import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.IndexSearcherWrapper; import org.elasticsearch.index.shard.ShardId; @@ -694,7 +695,7 @@ public void testTranslogMultipleOperationsSameDocument() throws IOException { IOUtils.close(engine); } trimUnsafeCommits(engine.config()); - try (Engine recoveringEngine = new InternalEngine(engine.config())){ + try (InternalEngine recoveringEngine = new InternalEngine(engine.config())){ recoveringEngine.recoverFromTranslog(); try (Engine.Searcher searcher = recoveringEngine.acquireSearcher("test")) { final TotalHitCountCollector collector = new TotalHitCountCollector(); @@ -718,7 +719,7 @@ public void testTranslogRecoveryDoesNotReplayIntoTranslog() throws IOException { IOUtils.close(initialEngine); } - Engine recoveringEngine = null; + InternalEngine recoveringEngine = null; try { final AtomicBoolean committed = new AtomicBoolean(); trimUnsafeCommits(initialEngine.config()); @@ -742,8 +743,8 @@ public void testTranslogRecoveryWithMultipleGenerations() throws IOException { final int docs = randomIntBetween(1, 4096); final List seqNos = LongStream.range(0, docs).boxed().collect(Collectors.toList()); Randomness.shuffle(seqNos); - Engine initialEngine = null; - Engine recoveringEngine = null; + InternalEngine initialEngine = null; + InternalEngine recoveringEngine = null; Store store = createStore(); final AtomicInteger counter = new AtomicInteger(); try { @@ -3381,7 +3382,7 @@ public void testEngineMaxTimestampIsInitialized() throws IOException { engine.index(appendOnlyPrimary(doc, true, timestamp1)); assertEquals(timestamp1, engine.segmentsStats(false).getMaxUnsafeAutoIdTimestamp()); } - try (Store store = createStore(newFSDirectory(storeDir)); Engine engine = new InternalEngine(configSupplier.apply(store))) { + try (Store store = createStore(newFSDirectory(storeDir)); InternalEngine engine = new InternalEngine(configSupplier.apply(store))) { assertEquals(IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, engine.segmentsStats(false).getMaxUnsafeAutoIdTimestamp()); engine.recoverFromTranslog(); assertEquals(timestamp1, engine.segmentsStats(false).getMaxUnsafeAutoIdTimestamp()); @@ -3665,7 +3666,7 @@ public void testSequenceNumberAdvancesToMaxSeqOnEngineOpenOnPrimary() throws Bro IOUtils.close(initialEngine); } trimUnsafeCommits(initialEngine.config()); - try (Engine recoveringEngine = new InternalEngine(initialEngine.config())) { + try (InternalEngine recoveringEngine = new InternalEngine(initialEngine.config())) { recoveringEngine.recoverFromTranslog(); recoveringEngine.fillSeqNoGaps(2); assertThat(recoveringEngine.getLocalCheckpoint(), greaterThanOrEqualTo((long) (docs - 1))); @@ -3976,7 +3977,7 @@ public void testFillUpSequenceIdGapsOnRecovery() throws IOException { boolean flushed = false; AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - Engine recoveringEngine = null; + InternalEngine recoveringEngine = null; try { assertEquals(docs - 1, engine.getSeqNoStats(-1).getMaxSeqNo()); assertEquals(docs - 1, engine.getLocalCheckpoint()); @@ -4203,7 +4204,7 @@ public void testKeepTranslogAfterGlobalCheckpoint() throws Exception { final EngineConfig engineConfig = config(indexSettings, store, translogPath, NoMergePolicy.INSTANCE, null, null, () -> globalCheckpoint.get()); - try (Engine engine = new InternalEngine(engineConfig) { + try (InternalEngine engine = new InternalEngine(engineConfig) { @Override protected void commitIndexWriter(IndexWriter writer, Translog translog, String syncId) throws IOException { // Advance the global checkpoint during the flush to create a lag between a persisted global checkpoint in the translog @@ -4724,6 +4725,63 @@ public void testTrimUnsafeCommits() throws Exception { } } + public void testLockDownEngine() throws Exception { + IOUtils.close(engine, store); + Engine lockedDownEngine = null; + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + try (Store store = createStore()) { + EngineConfig config = config(defaultSettings, store, createTempDir(), newMergePolicy(), null, null, globalCheckpoint::get); + int numDocs = scaledRandomIntBetween(10, 1000); + final SeqNoStats lastSeqNoStats; + final Set lastDocIds; + try (InternalEngine engine = createEngine(config)) { + for (int i = 0; i < numDocs; i++) { + if (rarely()) { + continue; // gap in sequence number + } + ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), new BytesArray("{}"), null); + engine.index(new Engine.Index(newUid(doc), doc, i, primaryTerm.get(), 1, null, REPLICA, + System.nanoTime(), -1, false)); + if (rarely()) { + engine.flush(); + } + globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint())); + } + engine.syncTranslog(); + lockedDownEngine = engine.lockDownEngine(); + lastSeqNoStats = engine.getSeqNoStats(globalCheckpoint.get()); + lastDocIds = getDocIds(engine, true); + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + for (int i = 0; i < numDocs; i++) { + if (randomBoolean()) { + String delId = Integer.toString(i); + engine.delete(new Engine.Delete("test", delId, newUid(delId), primaryTerm.get())); + } + if (rarely()) { + engine.flush(); + } + } + // the locked down engine should still point to the previous commit + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + } + // Close and reopen the main engine + trimUnsafeCommits(config); + try (InternalEngine recoveringEngine = new InternalEngine(config)) { + recoveringEngine.recoverFromTranslog(); + // the locked down engine should still point to the previous commit + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + } + } finally { + IOUtils.close(lockedDownEngine); + } + } + private static void trimUnsafeCommits(EngineConfig config) throws IOException { final Store store = config.getStore(); final TranslogConfig translogConfig = config.getTranslogConfig(); diff --git a/server/src/test/java/org/elasticsearch/index/replication/IndexLevelReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/IndexLevelReplicationTests.java index f38550d70413b..700e10cc5a8c9 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/IndexLevelReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/IndexLevelReplicationTests.java @@ -469,11 +469,7 @@ public void testSeqNoCollision() throws Exception { assertThat("Remaining of snapshot should contain init operations", snapshot, containsOperationsInAnyOrder(initOperations)); assertThat("Peer-recovery should not send overridden operations", snapshot.skippedOperations(), equalTo(0)); } - // TODO: We should assert the content of shards in the ReplicationGroup. - // Without rollback replicas(current implementation), we don't have the same content across shards: - // - replica1 has {doc1} - // - replica2 has {doc1, doc2} - // - replica3 can have either {doc2} only if operation-based recovery or {doc1, doc2} if file-based recovery + shards.assertAllEqual(initDocs + 1); } } diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index 2d198c32ba74f..b87b7f50551fa 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -22,6 +22,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.bulk.BulkShardRequest; import org.elasticsearch.action.index.IndexRequest; @@ -55,7 +56,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -68,8 +68,10 @@ import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.isIn; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.not; @@ -299,14 +301,6 @@ public void testRecoveryAfterPrimaryPromotion() throws Exception { assertThat(newReplica.recoveryState().getIndex().fileDetails(), not(empty())); assertThat(newReplica.recoveryState().getTranslog().recoveredOperations(), equalTo(uncommittedOpsOnPrimary)); } - - // roll back the extra ops in the replica - shards.removeReplica(replica); - replica.close("resync", false); - replica.store().close(); - newReplica = shards.addReplicaWithExistingPath(replica.shardPath(), replica.routingEntry().currentNodeId()); - shards.recoverReplica(newReplica); - shards.assertAllEqual(totalDocs); // Make sure that flushing on a recovering shard is ok. shards.flush(); shards.assertAllEqual(totalDocs); @@ -363,7 +357,6 @@ public void testResyncAfterPrimaryPromotion() throws Exception { try (ReplicationGroup shards = new ReplicationGroup(buildIndexMetaData(2, mappings))) { shards.startAll(); int initialDocs = randomInt(10); - for (int i = 0; i < initialDocs; i++) { final IndexRequest indexRequest = new IndexRequest(index.getName(), "type", "initial_doc_" + i) .source("{ \"f\": \"normal\"}", XContentType.JSON); @@ -399,31 +392,15 @@ public void testResyncAfterPrimaryPromotion() throws Exception { indexOnReplica(bulkShardRequest, shards, justReplica); } - logger.info("--> seqNo primary {} replica {}", oldPrimary.seqNoStats(), newPrimary.seqNoStats()); - - logger.info("--> resyncing replicas"); - PrimaryReplicaSyncer.ResyncTask task = shards.promoteReplicaToPrimary(newPrimary).get(); + logger.info("--> resyncing replicas seqno_stats primary {} replica {}", oldPrimary.seqNoStats(), newPrimary.seqNoStats()); + PrimaryReplicaSyncer.ResyncTask resyncTask = shards.promoteReplicaToPrimary(newPrimary).get(); if (syncedGlobalCheckPoint) { - assertEquals(extraDocs, task.getResyncedOperations()); + assertEquals(extraDocs, resyncTask.getResyncedOperations()); } else { - assertThat(task.getResyncedOperations(), greaterThanOrEqualTo(extraDocs)); - } - List replicas = shards.getReplicas(); - - // check all docs on primary are available on replica - Set primaryIds = getShardDocUIDs(newPrimary); - assertThat(primaryIds.size(), equalTo(initialDocs + extraDocs)); - for (IndexShard replica : replicas) { - Set replicaIds = getShardDocUIDs(replica); - Set temp = new HashSet<>(primaryIds); - temp.removeAll(replicaIds); - assertThat(replica.routingEntry() + " is missing docs", temp, empty()); - temp = new HashSet<>(replicaIds); - temp.removeAll(primaryIds); - // yeah, replica has more docs as there is no Lucene roll back on it - assertThat(replica.routingEntry() + " has to have extra docs", temp, - extraDocsToBeTrimmed > 0 ? not(empty()) : empty()); + assertThat(resyncTask.getResyncedOperations(), greaterThanOrEqualTo(extraDocs)); } + // documents on replicas should be aligned with primary + shards.assertAllEqual(initialDocs + extraDocs); // check translog on replica is trimmed int translogOperations = 0; @@ -651,6 +628,72 @@ public long indexTranslogOperations(final List operations, f } } + public void testRollbackOnPromotion() throws Exception { + int numberOfReplicas = between(2, 3); + try (ReplicationGroup shards = createGroup(numberOfReplicas)) { + shards.startAll(); + IndexShard newPrimary = randomFrom(shards.getReplicas()); + int initDocs = shards.indexDocs(randomInt(100)); + Set ackedDocIds = getShardDocUIDs(shards.getPrimary(), true); + int inFlightOps = scaledRandomIntBetween(10, 200); + int extraDocsOnNewPrimary = 0; + for (int i = 0; i < inFlightOps; i++) { + String id = "extra-" + i; + IndexRequest primaryRequest = new IndexRequest(index.getName(), "type", id).source("{}", XContentType.JSON); + BulkShardRequest replicationRequest = indexOnPrimary(primaryRequest, shards.getPrimary()); + int indexed = 0; + for (IndexShard replica : shards.getReplicas()) { + if (randomBoolean()) { + indexOnReplica(replicationRequest, shards, replica); + if (replica == newPrimary) { + extraDocsOnNewPrimary++; + } + indexed++; + } + } + if (indexed == numberOfReplicas) { + ackedDocIds.add(id); + } + if (randomBoolean()) { + shards.syncGlobalCheckpoint(); + } + if (rarely()) { + shards.flush(); + } + } + AtomicBoolean isDone = new AtomicBoolean(); + Thread[] threads = new Thread[numberOfReplicas]; + for (int i = 0; i < threads.length; i++) { + IndexShard replica = shards.getReplicas().get(i); + threads[i] = new Thread(() -> { + // should fail at most twice with two transitions: normal engine -> read-only engine -> resetting engine + long hitClosedExceptions = 0; + while (isDone.get() == false) { + try { + Set docIds = getShardDocUIDs(replica, true); + assertThat(ackedDocIds, everyItem(isIn(docIds))); + assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); + } catch (AlreadyClosedException e) { + hitClosedExceptions++; + } catch (IOException e) { + throw new AssertionError(e); + } + } + assertThat(hitClosedExceptions, lessThanOrEqualTo(2L)); + }); + threads[i].start(); + } + shards.promoteReplicaToPrimary(newPrimary).get(); + shards.assertAllEqual(initDocs + extraDocsOnNewPrimary); + int moreDocs = shards.indexDocs(scaledRandomIntBetween(1, 10)); + shards.assertAllEqual(initDocs + extraDocsOnNewPrimary + moreDocs); + isDone.set(true); + for (Thread thread : threads) { + thread.join(); + } + } + } + public static class BlockingTarget extends RecoveryTarget { private final CountDownLatch recoveryBlocked; diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 67a28c9b33623..2cffbeec861a3 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -156,6 +156,7 @@ import static org.elasticsearch.test.hamcrest.RegexMatcher.matches; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @@ -164,8 +165,10 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; /** * Simple unit-test IndexShard related operations. @@ -741,6 +744,11 @@ public void onFailure(Exception e) { ActionListener listener = new ActionListener() { @Override public void onResponse(Releasable releasable) { + try { + indexShard.afterApplyResyncOperationsBulk(newGlobalCheckPoint); + } catch (IOException e) { + throw new AssertionError(e); + } assertThat(indexShard.getPendingPrimaryTerm(), equalTo(newPrimaryTerm)); assertThat(TestTranslog.getCurrentTerm(getTranslog(indexShard)), equalTo(newPrimaryTerm)); assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint)); @@ -807,7 +815,8 @@ private void finish() { } else { assertTrue(onResponse.get()); assertNull(onFailure.get()); - assertThat(getTranslog(indexShard).getGeneration().translogFileGeneration, equalTo(translogGen + 1)); + assertThat(getTranslog(indexShard).getGeneration().translogFileGeneration, + either(equalTo(translogGen + 1)).or(equalTo(translogGen + 2))); assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint)); assertThat(indexShard.getGlobalCheckpoint(), equalTo(newGlobalCheckPoint)); } @@ -879,23 +888,15 @@ public void testGlobalCheckpointSync() throws IOException { closeShards(replicaShard, primaryShard); } - public void testRestoreLocalCheckpointTrackerFromTranslogOnPromotion() throws IOException, InterruptedException { - final IndexShard indexShard = newStartedShard(false); - final int operations = 1024 - scaledRandomIntBetween(0, 1024); + public void testRestoreLocalHistoryOnPromotion() throws IOException, InterruptedException { + final Version indexVersion = randomFrom(Version.V_5_5_0, Version.V_6_3_0, Version.V_6_4_0, Version.CURRENT); + final IndexShard indexShard = newStartedShard(false, indexVersion); + final int operations = scaledRandomIntBetween(1, 1000); indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - - final long maxSeqNo = indexShard.seqNoStats().getMaxSeqNo(); - final long globalCheckpointOnReplica = SequenceNumbers.UNASSIGNED_SEQ_NO; - randomIntBetween( - Math.toIntExact(SequenceNumbers.UNASSIGNED_SEQ_NO), - Math.toIntExact(indexShard.getLocalCheckpoint())); + long maxSeqNo = indexShard.seqNoStats().getMaxSeqNo(); + final long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, indexShard.getLocalCheckpoint()); + final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, globalCheckpoint); indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test"); - - final int globalCheckpoint = - randomIntBetween( - Math.toIntExact(SequenceNumbers.UNASSIGNED_SEQ_NO), - Math.toIntExact(indexShard.getLocalCheckpoint())); - final CountDownLatch latch = new CountDownLatch(1); indexShard.acquireReplicaOperationPermit( indexShard.getPendingPrimaryTerm() + 1, @@ -913,8 +914,14 @@ public void onFailure(Exception e) { } }, ThreadPool.Names.SAME, ""); - latch.await(); + final boolean shouldReset = indexShard.canResetEngine() && globalCheckpoint < maxSeqNo; + assertThat(indexShard.getResettingEngine(), shouldReset ? notNullValue() : nullValue()); + if (shouldReset && randomBoolean()) { + // trimmed by the max_seqno on the primary + maxSeqNo = randomLongBetween(globalCheckpoint, maxSeqNo); + indexShard.afterApplyResyncOperationsBulk(maxSeqNo); + } final ShardRouting newRouting = indexShard.routingEntry().moveActiveReplicaToPrimary(); final CountDownLatch resyncLatch = new CountDownLatch(1); @@ -927,9 +934,10 @@ public void onFailure(Exception e) { new IndexShardRoutingTable.Builder(newRouting.shardId()).addShard(newRouting).build(), Collections.emptySet()); resyncLatch.await(); + assertThat(indexShard.getResettingEngine(), nullValue()); + assertThat(indexShard.state(), equalTo(IndexShardState.STARTED)); assertThat(indexShard.getLocalCheckpoint(), equalTo(maxSeqNo)); assertThat(indexShard.seqNoStats().getMaxSeqNo(), equalTo(maxSeqNo)); - closeShards(indexShard); } @@ -958,6 +966,11 @@ public void testThrowBackLocalCheckpointOnReplica() throws IOException, Interrup @Override public void onResponse(final Releasable releasable) { releasable.close(); + try { + indexShard.afterApplyResyncOperationsBulk(Math.max(globalCheckpoint, SequenceNumbers.NO_OPS_PERFORMED)); + } catch (IOException e) { + throw new AssertionError(e); + } latch.countDown(); } @@ -1840,7 +1853,6 @@ public void testRecoverFromStoreRemoveStaleOperations() throws Exception { flushShard(shard); assertThat(getShardDocUIDs(shard), containsInAnyOrder("doc-0", "doc-1")); // Simulate resync (without rollback): Noop #1, index #2 - acquireReplicaOperationPermitBlockingly(shard, shard.pendingPrimaryTerm + 1); shard.markSeqNoAsNoop(1, "test"); shard.applyIndexOperationOnReplica(2, 1, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, SourceToParse.source(indexName, "_doc", "doc-2", new BytesArray("{}"), XContentType.JSON)); @@ -1929,6 +1941,76 @@ public void restoreShard(IndexShard shard, SnapshotId snapshotId, Version versio closeShards(source, target); } + public void testResetEngineOnAcquireReplicaShardPermits() throws Exception { + IndexShard shard = newStartedShard(false); + AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + indexOnReplicaWithGaps(shard, scaledRandomIntBetween(10, 1000), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); + globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), shard.getLocalCheckpoint())); + Set lastDocIds = getShardDocUIDs(shard, true); + CountDownLatch acquired = new CountDownLatch(1); + Engine prevEngine = getEngine(shard); + shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint.get(), new ActionListener() { + @Override + public void onResponse(Releasable releasable) { + releasable.close(); + acquired.countDown(); + } + @Override + public void onFailure(Exception e) { + + } + }, ThreadPool.Names.SAME, ""); + acquired.await(); + assertThat(getShardDocUIDs(shard, randomBoolean()), equalTo(lastDocIds)); + final Engine resettingEngine = shard.getResettingEngine(); + if (globalCheckpoint.get() == shard.seqNoStats().getMaxSeqNo()) { + assertThat("Engine without stale operations should not be reset", resettingEngine, nullValue()); + assertThat(getEngine(shard), sameInstance(prevEngine)); + } else { + assertThat(resettingEngine, notNullValue()); + assertThat(getEngine(shard), not(sameInstance(prevEngine))); + SeqNoStats seqNoStats = resettingEngine.getSeqNoStats(globalCheckpoint.get()); + assertThat(seqNoStats.getMaxSeqNo(), equalTo(globalCheckpoint.get())); + assertThat(seqNoStats.getLocalCheckpoint(), equalTo(globalCheckpoint.get())); + // should not activate the resetting engine + shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint.get() + 1, Long.MAX_VALUE)); + assertThat(getEngine(shard), not(sameInstance(resettingEngine))); + // should activate if the resetting engine + shard.afterApplyResyncOperationsBulk(globalCheckpoint.get()); + assertThat(getEngine(shard), sameInstance(resettingEngine)); + assertThat(shard.getResettingEngine(), nullValue()); + } + closeShards(shard); + } + + public void testDoNoResetEngineOnOldVersions() throws Exception { + Version oldVersion = randomValueOtherThanMany(v -> v.onOrAfter(Version.V_6_4_0), + () -> randomFrom(Version.getDeclaredVersions(Version.class))); + IndexShard shard = newStartedShard(false, oldVersion); + assertThat(shard.canResetEngine(), equalTo(false)); + indexOnReplicaWithGaps(shard, between(5, 50), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); + long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, shard.getLocalCheckpoint()); + Engine engine = shard.getEngine(); + CountDownLatch acquired = new CountDownLatch(1); + shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint, new ActionListener() { + @Override + public void onResponse(Releasable releasable) { + releasable.close(); + acquired.countDown(); + } + @Override + public void onFailure(Exception e) { + } + }, ThreadPool.Names.SAME, ""); + acquired.await(); + assertThat(shard.getEngine(), sameInstance(engine)); + assertThat(shard.getResettingEngine(), nullValue()); + shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint, globalCheckpoint + 5)); + assertThat(shard.getEngine(), sameInstance(engine)); + assertThat(shard.getResettingEngine(), nullValue()); + closeShards(shard); + } + public void testSearcherWrapperIsUsed() throws IOException { IndexShard shard = newStartedShard(true); indexDoc(shard, "_doc", "0", "{\"foo\" : \"bar\"}"); @@ -2679,6 +2761,9 @@ private Result indexOnReplicaWithGaps( } else { gap = true; } + if (rarely()) { + indexShard.flush(new FlushRequest()); + } } assert localCheckpoint == indexShard.getLocalCheckpoint(); assert !gap || (localCheckpoint != max); @@ -3183,4 +3268,21 @@ public void testOnCloseStats() throws IOException { } + private IndexShard newStartedShard(boolean primary, Version indexCreatedVersion) throws IOException { + Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexCreatedVersion) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).build(); + ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId("index", "_na_", 0), randomAlphaOfLength(10), primary, + ShardRoutingState.INITIALIZING, + primary ? RecoverySource.StoreRecoverySource.EMPTY_STORE_INSTANCE : RecoverySource.PeerRecoverySource.INSTANCE); + IndexMetaData.Builder metaData = IndexMetaData.builder(shardRouting.getIndexName()) + .settings(settings).primaryTerm(0, randomLongBetween(1, 1000)).putMapping("_doc", "{ \"properties\": {} }"); + IndexShard shard = newShard(shardRouting, metaData.build()); + if (primary) { + recoverShardFromStore(shard); + } else { + recoveryEmptyReplica(shard, true); + } + return shard; + } + } diff --git a/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java index 2d1c1d4e15af8..774b272121a5c 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java @@ -132,7 +132,7 @@ indexSettings, null, store, newMergePolicy(), iwc.getAnalyzer(), iwc.getSimilari TimeValue.timeValueMinutes(5), Collections.singletonList(listeners), Collections.emptyList(), null, (e, s) -> 0, new NoneCircuitBreakerService(), () -> SequenceNumbers.NO_OPS_PERFORMED, () -> primaryTerm); engine = new InternalEngine(config); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); listeners.setCurrentRefreshLocationSupplier(engine::getTranslogLastWriteLocation); } diff --git a/server/src/test/java/org/elasticsearch/index/translog/TranslogTests.java b/server/src/test/java/org/elasticsearch/index/translog/TranslogTests.java index 1c27a59e0ecbe..58f9ec93a155d 100644 --- a/server/src/test/java/org/elasticsearch/index/translog/TranslogTests.java +++ b/server/src/test/java/org/elasticsearch/index/translog/TranslogTests.java @@ -358,7 +358,8 @@ public void testSimpleOperations() throws IOException { } markCurrentGenAsCommitted(translog); - try (Translog.Snapshot snapshot = translog.newSnapshotFromGen(firstId + 1)) { + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen( + new Translog.TranslogGeneration(translog.getTranslogUUID(), firstId + 1), Long.MAX_VALUE)) { assertThat(snapshot, SnapshotMatchers.size(0)); assertThat(snapshot.totalOperations(), equalTo(0)); } @@ -589,6 +590,43 @@ public void testSnapshot() throws IOException { } } + public void testSnapshotFromMinGen() throws Exception { + Map> operationsPerGen = new HashMap<>(); + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen( + new Translog.TranslogGeneration(translog.getTranslogUUID(), 1), randomNonNegativeLong())) { + assertThat(snapshot, SnapshotMatchers.size(0)); + } + long maxGen = translog.currentFileGeneration() + between(0, 10); + for (long gen = translog.currentFileGeneration(); gen <= maxGen; gen++) { + operationsPerGen.putIfAbsent(gen, new ArrayList<>()); + int numOps = between(0, 20); + for (int i = 0; i < numOps; i++) { + long seqNo = randomLongBetween(0, 1000); + addToTranslogAndList(translog, operationsPerGen.get(gen), new Translog.Index("test", + Long.toString(seqNo), seqNo, primaryTerm.get(), new byte[]{1})); + } + long minGen = randomLongBetween(translog.getMinFileGeneration(), translog.currentFileGeneration()); + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen( + new Translog.TranslogGeneration(translog.getTranslogUUID(), minGen), Long.MAX_VALUE)) { + List expectedOps = operationsPerGen.entrySet().stream() + .filter(e -> e.getKey() >= minGen) + .flatMap(e -> e.getValue().stream()) + .collect(Collectors.toList()); + assertThat(snapshot, SnapshotMatchers.containsOperationsInAnyOrder(expectedOps)); + } + long upToSeqNo = randomLongBetween(1, 5000); + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen( + new Translog.TranslogGeneration(translog.getTranslogUUID(), minGen), upToSeqNo)) { + List expectedOps = operationsPerGen.entrySet().stream() + .filter(e -> e.getKey() >= minGen) + .flatMap(e -> e.getValue().stream().filter(op -> op.seqNo() <= upToSeqNo)) + .collect(Collectors.toList()); + assertThat(snapshot, SnapshotMatchers.containsOperationsInAnyOrder(expectedOps)); + } + translog.rollGeneration(); + } + } + public void testReadLocation() throws IOException { ArrayList ops = new ArrayList<>(); ArrayList locs = new ArrayList<>(); @@ -1302,7 +1340,7 @@ public void testBasicRecovery() throws IOException { translog = new Translog(config, translogGeneration.translogUUID, translog.getDeletionPolicy(), () -> SequenceNumbers.NO_OPS_PERFORMED, primaryTerm::get); assertEquals("lastCommitted must be 1 less than current", translogGeneration.translogFileGeneration + 1, translog.currentFileGeneration()); assertFalse(translog.syncNeeded()); - try (Translog.Snapshot snapshot = translog.newSnapshotFromGen(translogGeneration.translogFileGeneration)) { + try (Translog.Snapshot snapshot = translog.newSnapshotFromGen(translogGeneration, Long.MAX_VALUE)) { for (int i = minUncommittedOp; i < translogOperations; i++) { assertEquals("expected operation" + i + " to be in the previous translog but wasn't", translog.currentFileGeneration() - 1, locations.get(i).generation); @@ -1733,7 +1771,7 @@ public void testOpenForeignTranslog() throws IOException { } this.translog = new Translog(config, translogUUID, deletionPolicy, () -> SequenceNumbers.NO_OPS_PERFORMED, primaryTerm::get); - try (Translog.Snapshot snapshot = this.translog.newSnapshotFromGen(translogGeneration.translogFileGeneration)) { + try (Translog.Snapshot snapshot = this.translog.newSnapshotFromGen(translogGeneration, Long.MAX_VALUE)) { for (int i = firstUncommitted; i < translogOperations; i++) { Translog.Operation next = snapshot.next(); assertNotNull("" + i, next); @@ -2553,7 +2591,7 @@ public void testWithRandomException() throws IOException { generationUUID = Translog.createEmptyTranslog(config.getTranslogPath(), SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); } try (Translog translog = new Translog(config, generationUUID, deletionPolicy, () -> SequenceNumbers.NO_OPS_PERFORMED, primaryTerm::get); - Translog.Snapshot snapshot = translog.newSnapshotFromGen(minGenForRecovery)) { + Translog.Snapshot snapshot = translog.newSnapshotFromGen(new Translog.TranslogGeneration(generationUUID, minGenForRecovery), Long.MAX_VALUE)) { assertEquals(syncedDocs.size(), snapshot.totalOperations()); for (int i = 0; i < syncedDocs.size(); i++) { Translog.Operation next = snapshot.next(); diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 2a84a8f424616..b838ca03b4c9b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -21,12 +21,15 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.Term; @@ -36,6 +39,7 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.action.index.IndexRequest; @@ -82,7 +86,9 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.BiFunction; import java.util.function.LongSupplier; @@ -508,4 +514,25 @@ public static Translog getTranslog(Engine engine) { InternalEngine internalEngine = (InternalEngine) engine; return internalEngine.getTranslog(); } + + public static Set getDocIds(Engine engine, boolean refresh) throws IOException { + if (refresh) { + engine.refresh("test"); + } + try (Engine.Searcher searcher = engine.acquireSearcher("test")) { + Set ids = new HashSet<>(); + for (LeafReaderContext leafContext : searcher.reader().leaves()) { + LeafReader reader = leafContext.reader(); + Bits liveDocs = reader.getLiveDocs(); + for (int i = 0; i < reader.maxDoc(); i++) { + if (liveDocs == null || liveDocs.get(i)) { + Document uuid = reader.document(i, Collections.singleton(IdFieldMapper.NAME)); + BytesRef binaryID = uuid.getBinaryValue(IdFieldMapper.NAME); + ids.add(Uid.decodeId(Arrays.copyOfRange(binaryID.bytes, binaryID.offset, binaryID.offset + binaryID.length))); + } + } + } + return ids; + } + } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java index d2a84589669a6..f84442529c297 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java @@ -18,13 +18,8 @@ */ package org.elasticsearch.index.shard; -import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexNotFoundException; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.index.IndexRequest; @@ -57,10 +52,8 @@ import org.elasticsearch.index.engine.EngineFactory; import org.elasticsearch.index.engine.EngineTestCase; import org.elasticsearch.index.engine.InternalEngineFactory; -import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.SourceToParse; -import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.seqno.ReplicationTracker; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.similarity.SimilarityService; @@ -581,23 +574,12 @@ private Store.MetadataSnapshot getMetadataSnapshotOrEmpty(IndexShard replica) th return result; } + protected Set getShardDocUIDs(final IndexShard shard, boolean refresh) throws IOException { + return EngineTestCase.getDocIds(shard.getEngine(), refresh); + } + protected Set getShardDocUIDs(final IndexShard shard) throws IOException { - shard.refresh("get_uids"); - try (Engine.Searcher searcher = shard.acquireSearcher("test")) { - Set ids = new HashSet<>(); - for (LeafReaderContext leafContext : searcher.reader().leaves()) { - LeafReader reader = leafContext.reader(); - Bits liveDocs = reader.getLiveDocs(); - for (int i = 0; i < reader.maxDoc(); i++) { - if (liveDocs == null || liveDocs.get(i)) { - Document uuid = reader.document(i, Collections.singleton(IdFieldMapper.NAME)); - BytesRef binaryID = uuid.getBinaryValue(IdFieldMapper.NAME); - ids.add(Uid.decodeId(Arrays.copyOfRange(binaryID.bytes, binaryID.offset, binaryID.offset + binaryID.length))); - } - } - } - return ids; - } + return getShardDocUIDs(shard, true); } protected void assertDocCount(IndexShard shard, int docDount) throws IOException { From 2e106802729a054d70106978daba896a18fb5a2f Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 15 Aug 2018 08:06:49 -0400 Subject: [PATCH 02/17] Remove no-arg method --- .../index/engine/InternalEngine.java | 5 --- .../index/engine/InternalEngineTests.java | 44 +++++++++---------- .../index/engine/EngineTestCase.java | 2 +- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index 4058de23c81f5..03170bdc615d2 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -388,11 +388,6 @@ public InternalEngine recoverFromTranslog(long recoverUpToSeqNo) throws IOExcept return this; } - // for testing - final Engine recoverFromTranslog() throws IOException { - return recoverFromTranslog(Long.MAX_VALUE); - } - @Override public void skipTranslogRecovery() { assert pendingTranslogRecovery.get() : "translogRecovery is not pending but should be"; diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 69dc91a587ed2..6941c7b3d0cdf 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -649,7 +649,7 @@ public IndexSearcher wrap(IndexSearcher searcher) throws EngineException { trimUnsafeCommits(engine.config()); engine = new InternalEngine(engine.config()); assertTrue(engine.isRecovering()); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); Engine.Searcher searcher = wrapper.wrap(engine.acquireSearcher("test")); assertThat(counter.get(), equalTo(2)); searcher.close(); @@ -666,7 +666,7 @@ public void testFlushIsDisabledDuringTranslogRecovery() throws IOException { engine = new InternalEngine(engine.config()); expectThrows(IllegalStateException.class, () -> engine.flush(true, true)); assertTrue(engine.isRecovering()); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertFalse(engine.isRecovering()); doc = testParsedDocument("2", null, testDocumentWithTextField(), SOURCE, null); engine.index(indexForDoc(doc)); @@ -696,7 +696,7 @@ public void testTranslogMultipleOperationsSameDocument() throws IOException { } trimUnsafeCommits(engine.config()); try (InternalEngine recoveringEngine = new InternalEngine(engine.config())){ - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); try (Engine.Searcher searcher = recoveringEngine.acquireSearcher("test")) { final TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.searcher().search(new MatchAllDocsQuery(), collector); @@ -732,7 +732,7 @@ protected void commitIndexWriter(IndexWriter writer, Translog translog, String s } }; assertThat(getTranslog(recoveringEngine).stats().getUncommittedOperations(), equalTo(docs)); - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); assertTrue(committed.get()); } finally { IOUtils.close(recoveringEngine); @@ -766,7 +766,7 @@ public void testTranslogRecoveryWithMultipleGenerations() throws IOException { initialEngine.close(); trimUnsafeCommits(initialEngine.config()); recoveringEngine = new InternalEngine(initialEngine.config()); - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); try (Engine.Searcher searcher = recoveringEngine.acquireSearcher("test")) { TopDocs topDocs = searcher.searcher().search(new MatchAllDocsQuery(), docs); assertEquals(docs, topDocs.totalHits); @@ -1153,7 +1153,7 @@ public void testSyncedFlushSurvivesEngineRestart() throws IOException { } trimUnsafeCommits(config); engine = new InternalEngine(config); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID), syncId); } @@ -1172,7 +1172,7 @@ public void testSyncedFlushVanishesOnReplay() throws IOException { engine.close(); trimUnsafeCommits(config); engine = new InternalEngine(config); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertNull("Sync ID must be gone since we have a document to replay", engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID)); } @@ -2126,7 +2126,7 @@ public void testSeqNoAndCheckpoints() throws IOException { trimUnsafeCommits(initialEngine.engineConfig); try (InternalEngine recoveringEngine = new InternalEngine(initialEngine.config())){ - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(primarySeqNo, recoveringEngine.getSeqNoStats(-1).getMaxSeqNo()); assertThat( @@ -2447,7 +2447,7 @@ public void testCurrentTranslogIDisCommitted() throws IOException { try (InternalEngine engine = createEngine(config)) { engine.index(firstIndexRequest); globalCheckpoint.set(engine.getLocalCheckpoint()); - expectThrows(IllegalStateException.class, () -> engine.recoverFromTranslog()); + expectThrows(IllegalStateException.class, () -> engine.recoverFromTranslog(Long.MAX_VALUE)); Map userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals("1", userData.get(Translog.TRANSLOG_GENERATION_KEY)); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); @@ -2469,7 +2469,7 @@ public void testCurrentTranslogIDisCommitted() throws IOException { assertEquals("3", userData.get(Translog.TRANSLOG_GENERATION_KEY)); } assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals("3", userData.get(Translog.TRANSLOG_GENERATION_KEY)); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); @@ -2486,7 +2486,7 @@ public void testCurrentTranslogIDisCommitted() throws IOException { Map userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals("1", userData.get(Translog.TRANSLOG_GENERATION_KEY)); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(2, engine.getTranslog().currentFileGeneration()); assertEquals(0L, engine.getTranslog().stats().getUncommittedOperations()); } @@ -2500,7 +2500,7 @@ public void testCurrentTranslogIDisCommitted() throws IOException { Map userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals("1", userData.get(Translog.TRANSLOG_GENERATION_KEY)); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); userData = engine.getLastCommittedSegmentInfos().getUserData(); assertEquals("no changes - nothing to commit", "1", userData.get(Translog.TRANSLOG_GENERATION_KEY)); assertEquals(engine.getTranslog().getTranslogUUID(), userData.get(Translog.TRANSLOG_UUID_KEY)); @@ -2606,7 +2606,7 @@ protected void commitIndexWriter(IndexWriter writer, Translog translog, String s } } }) { - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); final ParsedDocument doc1 = testParsedDocument("1", null, testDocumentWithTextField(), SOURCE, null); engine.index(indexForDoc(doc1)); globalCheckpoint.set(engine.getLocalCheckpoint()); @@ -2617,7 +2617,7 @@ protected void commitIndexWriter(IndexWriter writer, Translog translog, String s try (InternalEngine engine = new InternalEngine(config(indexSettings, store, translogPath, newMergePolicy(), null, null, globalCheckpointSupplier))) { - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertVisibleCount(engine, 1); final long committedGen = Long.valueOf( engine.getLastCommittedSegmentInfos().getUserData().get(Translog.TRANSLOG_GENERATION_KEY)); @@ -2683,7 +2683,7 @@ public void testTranslogReplay() throws IOException { engine.close(); trimUnsafeCommits(copy(engine.config(), inSyncGlobalCheckpointSupplier)); engine = new InternalEngine(copy(engine.config(), inSyncGlobalCheckpointSupplier)); // we need to reuse the engine config unless the parser.mappingModified won't work - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertVisibleCount(engine, numDocs, false); parser = (TranslogHandler) engine.config().getTranslogRecoveryRunner(); @@ -3384,7 +3384,7 @@ public void testEngineMaxTimestampIsInitialized() throws IOException { } try (Store store = createStore(newFSDirectory(storeDir)); InternalEngine engine = new InternalEngine(configSupplier.apply(store))) { assertEquals(IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, engine.segmentsStats(false).getMaxUnsafeAutoIdTimestamp()); - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(timestamp1, engine.segmentsStats(false).getMaxUnsafeAutoIdTimestamp()); final ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(), new BytesArray("{}".getBytes(Charset.defaultCharset())), null); @@ -3667,7 +3667,7 @@ public void testSequenceNumberAdvancesToMaxSeqOnEngineOpenOnPrimary() throws Bro } trimUnsafeCommits(initialEngine.config()); try (InternalEngine recoveringEngine = new InternalEngine(initialEngine.config())) { - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); recoveringEngine.fillSeqNoGaps(2); assertThat(recoveringEngine.getLocalCheckpoint(), greaterThanOrEqualTo((long) (docs - 1))); } @@ -3774,7 +3774,7 @@ protected long doGenerateSeqNoForOperation(Operation operation) { throw new UnsupportedOperationException(); } }; - noOpEngine.recoverFromTranslog(); + noOpEngine.recoverFromTranslog(Long.MAX_VALUE); final int gapsFilled = noOpEngine.fillSeqNoGaps(primaryTerm.get()); final String reason = randomAlphaOfLength(16); noOpEngine.noOp(new Engine.NoOp(maxSeqNo + 1, primaryTerm.get(), LOCAL_TRANSLOG_RECOVERY, System.nanoTime(), reason)); @@ -3986,7 +3986,7 @@ public void testFillUpSequenceIdGapsOnRecovery() throws IOException { trimUnsafeCommits(copy(replicaEngine.config(), globalCheckpoint::get)); recoveringEngine = new InternalEngine(copy(replicaEngine.config(), globalCheckpoint::get)); assertEquals(numDocsOnReplica, getTranslog(recoveringEngine).stats().getUncommittedOperations()); - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(maxSeqIDOnReplica, recoveringEngine.getSeqNoStats(-1).getMaxSeqNo()); assertEquals(checkpointOnReplica, recoveringEngine.getLocalCheckpoint()); assertEquals((maxSeqIDOnReplica + 1) - numDocsOnReplica, recoveringEngine.fillSeqNoGaps(2)); @@ -4022,7 +4022,7 @@ public void testFillUpSequenceIdGapsOnRecovery() throws IOException { if (flushed) { assertThat(recoveringEngine.getTranslogStats().getUncommittedOperations(), equalTo(0)); } - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); assertEquals(maxSeqIDOnReplica, recoveringEngine.getSeqNoStats(-1).getMaxSeqNo()); assertEquals(maxSeqIDOnReplica, recoveringEngine.getLocalCheckpoint()); assertEquals(0, recoveringEngine.fillSeqNoGaps(3)); @@ -4215,7 +4215,7 @@ protected void commitIndexWriter(IndexWriter writer, Translog translog, String s super.commitIndexWriter(writer, translog, syncId); } }) { - engine.recoverFromTranslog(); + engine.recoverFromTranslog(Long.MAX_VALUE); int numDocs = scaledRandomIntBetween(10, 100); for (int docId = 0; docId < numDocs; docId++) { ParseContext.Document document = testDocumentWithTextField(); @@ -4771,7 +4771,7 @@ public void testLockDownEngine() throws Exception { // Close and reopen the main engine trimUnsafeCommits(config); try (InternalEngine recoveringEngine = new InternalEngine(config)) { - recoveringEngine.recoverFromTranslog(); + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); // the locked down engine should still point to the previous commit assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index b838ca03b4c9b..0f3607d41ba3f 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -381,7 +381,7 @@ private InternalEngine createEngine(@Nullable IndexWriterFactory indexWriterFact } InternalEngine internalEngine = createInternalEngine(indexWriterFactory, localCheckpointTrackerSupplier, seqNoForOperation, config); - internalEngine.recoverFromTranslog(); + internalEngine.recoverFromTranslog(Long.MAX_VALUE); return internalEngine; } From 216b89040a3a5843cd2c57397f93f51d31fe3f73 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 15 Aug 2018 11:29:53 -0400 Subject: [PATCH 03/17] Remove lockdown method --- .../elasticsearch/index/engine/Engine.java | 5 - .../index/engine/InternalEngine.java | 16 ---- ...dOnlyEngine.java => SearchOnlyEngine.java} | 37 ++++---- .../elasticsearch/index/shard/IndexShard.java | 11 +-- .../index/engine/InternalEngineTests.java | 67 ------------- .../index/engine/SearchOnlyEngineTests.java | 93 +++++++++++++++++++ .../index/engine/EngineTestCase.java | 9 ++ 7 files changed, 125 insertions(+), 113 deletions(-) rename server/src/main/java/org/elasticsearch/index/engine/{ReadOnlyEngine.java => SearchOnlyEngine.java} (88%) create mode 100644 server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 1f5dd09cfb3e1..bf20c787d4573 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -1633,11 +1633,6 @@ public interface Warmer { */ public abstract void skipTranslogRecovery(); - /** - * Returns a {@link ReadOnlyEngine} that points to the last commit of the current engine. - */ - public abstract Engine lockDownEngine() throws IOException; - /** * Returns true iff this engine is currently recovering from translog. */ diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index 03170bdc615d2..a1988a8e9ed19 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -2235,22 +2235,6 @@ public SeqNoStats getSeqNoStats(long globalCheckpoint) { return localCheckpointTracker.getStats(globalCheckpoint); } - @Override - public Engine lockDownEngine() throws IOException { - try (ReleasableLock ignored = writeLock.acquire()) { - refresh("lockdown", SearcherScope.INTERNAL); - Searcher searcher = acquireSearcher("lockdown", SearcherScope.INTERNAL); - try { - ReadOnlyEngine readOnlyEngine = new ReadOnlyEngine(this.engineConfig, lastCommittedSegmentInfos, searcher, - getSeqNoStats(getLastSyncedGlobalCheckpoint()), getTranslogStats()); - searcher = null; - return readOnlyEngine; - } finally { - IOUtils.close(searcher); - } - } - } - /** * Returns the number of times a version was looked up either from the index. * Note this is only available if assertions are enabled diff --git a/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java similarity index 88% rename from server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java rename to server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java index caf8adcd92816..235b6fe926669 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java @@ -39,30 +39,36 @@ import java.util.stream.Stream; /** - * An engine that does not accept writes, and always points stats, searcher to the last commit. + * A minimal engine that does not accept writes, and always points stats, searcher to the last commit. */ -final class ReadOnlyEngine extends Engine { +public final class SearchOnlyEngine extends Engine { private final SegmentInfos lastCommittedSegmentInfos; private final SeqNoStats seqNoStats; private final TranslogStats translogStats; private final SearcherManager searcherManager; private final Searcher lastCommitSearcher; - ReadOnlyEngine(EngineConfig engineConfig, SegmentInfos lastCommittedSegmentInfos, - Searcher lastCommitSearcher, SeqNoStats seqNoStats, TranslogStats translogStats) throws IOException { - super(engineConfig); - this.lastCommittedSegmentInfos = lastCommittedSegmentInfos; - this.lastCommitSearcher = lastCommitSearcher; - this.seqNoStats = seqNoStats; - this.translogStats = translogStats; - this.searcherManager = new SearcherManager(lastCommitSearcher.getDirectoryReader(), - new RamAccountingSearcherFactory(engineConfig.getCircuitBreakerService())); + public SearchOnlyEngine(Engine engine) throws IOException { + super(engine.config()); + engine.refresh("lockdown"); + this.seqNoStats = engine.getSeqNoStats(engine.getLastSyncedGlobalCheckpoint()); + this.translogStats = engine.getTranslogStats(); + this.lastCommittedSegmentInfos = engine.getLastCommittedSegmentInfos(); + Searcher searcher = engine.acquireSearcher("lockdown", SearcherScope.INTERNAL); + try { + this.searcherManager = new SearcherManager(searcher.getDirectoryReader(), + new RamAccountingSearcherFactory(engineConfig.getCircuitBreakerService())); + this.lastCommitSearcher = searcher; + searcher = null; + } finally { + IOUtils.close(searcher); + } } @Override protected void closeNoLock(String reason, CountDownLatch closedLatch) { try { - IOUtils.close(lastCommitSearcher, searcherManager); + IOUtils.close(lastCommitSearcher); } catch (Exception ex) { logger.warn("failed to close searcher", ex); } finally { @@ -133,11 +139,6 @@ public NoOpResult noOp(NoOp noOp) { throw new UnsupportedOperationException(); } - @Override - public Engine lockDownEngine() { - return this; - } - @Override public boolean isTranslogSyncNeeded() { return false; @@ -215,7 +216,7 @@ public List segments(boolean verbose) { @Override public void refresh(String source) throws EngineException { - // noop + throw new UnsupportedOperationException(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index a2ab419cb4022..394492e911a22 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -84,6 +84,7 @@ import org.elasticsearch.index.engine.EngineException; import org.elasticsearch.index.engine.EngineFactory; import org.elasticsearch.index.engine.InternalEngine; +import org.elasticsearch.index.engine.SearchOnlyEngine; import org.elasticsearch.index.engine.RefreshFailedEngineException; import org.elasticsearch.index.engine.Segment; import org.elasticsearch.index.engine.SegmentsStats; @@ -2679,12 +2680,7 @@ private void resetEngineUpToLocalCheckpoint(long recoverUpToSeqNo) throws IOExce return; } final String translogUUID = currentEngine.commitStats().getUserData().get(Translog.TRANSLOG_UUID_KEY); - final Engine lockedEngine = currentEngine.lockDownEngine(); - if (lockedEngine != currentEngine) { - boolean swapped = currentEngineReference.compareAndSet(currentEngine, lockedEngine); - assert swapped : "Failed to swap engine"; - currentEngine.close(); - } + IOUtils.close(currentEngineReference.getAndSet(new SearchOnlyEngine(currentEngine))); final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, indexSettings.getIndexVersionCreated()); final EngineConfig config = newEngineConfig(); @@ -2706,7 +2702,8 @@ private void maybeActivateResettingEngine() throws IOException { synchronized (mutex) { final Engine resettingEngine = resettingEngineReference.get(); if (resettingEngine != null && resettingEngine.getLocalCheckpoint() >= minRequiredCheckpointForResettingEngine.get()) { - final Engine currentEngine = currentEngineReference.getAndSet(resettingEngineReference.getAndSet(null)); + final Engine newEngine = resettingEngineReference.getAndSet(null); + final Engine currentEngine = currentEngineReference.getAndSet(newEngine); IOUtils.close(currentEngine); changeState(IndexShardState.STARTED, "engine was reset"); } diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 6941c7b3d0cdf..671ff6747cc47 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -143,7 +143,6 @@ import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.seqno.LocalCheckpointTracker; import org.elasticsearch.index.seqno.ReplicationTracker; -import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.IndexSearcherWrapper; import org.elasticsearch.index.shard.ShardId; @@ -4725,72 +4724,6 @@ public void testTrimUnsafeCommits() throws Exception { } } - public void testLockDownEngine() throws Exception { - IOUtils.close(engine, store); - Engine lockedDownEngine = null; - final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - try (Store store = createStore()) { - EngineConfig config = config(defaultSettings, store, createTempDir(), newMergePolicy(), null, null, globalCheckpoint::get); - int numDocs = scaledRandomIntBetween(10, 1000); - final SeqNoStats lastSeqNoStats; - final Set lastDocIds; - try (InternalEngine engine = createEngine(config)) { - for (int i = 0; i < numDocs; i++) { - if (rarely()) { - continue; // gap in sequence number - } - ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), new BytesArray("{}"), null); - engine.index(new Engine.Index(newUid(doc), doc, i, primaryTerm.get(), 1, null, REPLICA, - System.nanoTime(), -1, false)); - if (rarely()) { - engine.flush(); - } - globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint())); - } - engine.syncTranslog(); - lockedDownEngine = engine.lockDownEngine(); - lastSeqNoStats = engine.getSeqNoStats(globalCheckpoint.get()); - lastDocIds = getDocIds(engine, true); - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); - for (int i = 0; i < numDocs; i++) { - if (randomBoolean()) { - String delId = Integer.toString(i); - engine.delete(new Engine.Delete("test", delId, newUid(delId), primaryTerm.get())); - } - if (rarely()) { - engine.flush(); - } - } - // the locked down engine should still point to the previous commit - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); - } - // Close and reopen the main engine - trimUnsafeCommits(config); - try (InternalEngine recoveringEngine = new InternalEngine(config)) { - recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); - // the locked down engine should still point to the previous commit - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); - } - } finally { - IOUtils.close(lockedDownEngine); - } - } - - private static void trimUnsafeCommits(EngineConfig config) throws IOException { - final Store store = config.getStore(); - final TranslogConfig translogConfig = config.getTranslogConfig(); - final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); - final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); - final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); - store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); - } - void assertLuceneOperations(InternalEngine engine, long expectedAppends, long expectedUpdates, long expectedDeletes) { String message = "Lucene operations mismatched;" + " appends [actual:" + engine.getNumDocAppends() + ", expected:" + expectedAppends + "]," + diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java new file mode 100644 index 0000000000000..0a3c95e796c30 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -0,0 +1,93 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.engine; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.core.internal.io.IOUtils; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.seqno.SeqNoStats; +import org.elasticsearch.index.seqno.SequenceNumbers; +import org.elasticsearch.index.store.Store; + +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; + +import static org.elasticsearch.index.engine.Engine.Operation.Origin.REPLICA; +import static org.hamcrest.Matchers.equalTo; + +public class SearchOnlyEngineTests extends EngineTestCase { + + public void testReadOnlyEngine() throws Exception { + IOUtils.close(engine, store); + Engine lockedDownEngine = null; + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + try (Store store = createStore()) { + EngineConfig config = config(defaultSettings, store, createTempDir(), newMergePolicy(), null, null, globalCheckpoint::get); + int numDocs = scaledRandomIntBetween(10, 1000); + final SeqNoStats lastSeqNoStats; + final Set lastDocIds; + try (InternalEngine engine = createEngine(config)) { + for (int i = 0; i < numDocs; i++) { + if (rarely()) { + continue; // gap in sequence number + } + ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), new BytesArray("{}"), null); + engine.index(new Engine.Index(newUid(doc), doc, i, primaryTerm.get(), 1, null, REPLICA, + System.nanoTime(), -1, false)); + if (rarely()) { + engine.flush(); + } + globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint())); + } + engine.syncTranslog(); + lockedDownEngine = new SearchOnlyEngine(engine); + lastSeqNoStats = engine.getSeqNoStats(globalCheckpoint.get()); + lastDocIds = getDocIds(engine, true); + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + for (int i = 0; i < numDocs; i++) { + if (randomBoolean()) { + String delId = Integer.toString(i); + engine.delete(new Engine.Delete("test", delId, newUid(delId), primaryTerm.get())); + } + if (rarely()) { + engine.flush(); + } + } + // the locked down engine should still point to the previous commit + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + } + // Close and reopen the main engine + trimUnsafeCommits(config); + try (InternalEngine recoveringEngine = new InternalEngine(config)) { + recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); + // the locked down engine should still point to the previous commit + assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + } + } finally { + IOUtils.close(lockedDownEngine); + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 0f3607d41ba3f..257ee0d87cb97 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -535,4 +535,13 @@ public static Set getDocIds(Engine engine, boolean refresh) throws IOExc return ids; } } + + static void trimUnsafeCommits(EngineConfig config) throws IOException { + final Store store = config.getStore(); + final TranslogConfig translogConfig = config.getTranslogConfig(); + final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); + final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); + final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); + store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); + } } From 0ebb7e3718aab7065db21ce23b4e81d15f271e64 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 16 Aug 2018 14:44:25 -0400 Subject: [PATCH 04/17] Use engine holder --- .../elasticsearch/index/shard/IndexShard.java | 293 ++++++++++-------- .../index/engine/SearchOnlyEngineTests.java | 6 +- .../RecoveryDuringReplicationTests.java | 3 +- .../index/shard/IndexShardTests.java | 142 ++++----- 4 files changed, 218 insertions(+), 226 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 394492e911a22..bfc27a853b58a 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -196,11 +196,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl protected volatile IndexShardState state; protected volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm protected volatile long operationPrimaryTerm; - protected final AtomicReference currentEngineReference = new AtomicReference<>(); - - private final AtomicReference resettingEngineReference = new AtomicReference<>(); - // avoid losing acknowledged writes, only activate the resetting engine when its local_checkpoint at least this guard. - private final AtomicLong minRequiredCheckpointForResettingEngine = new AtomicLong(-1); + private final EngineHolder engineHolder = new EngineHolder(); final EngineFactory engineFactory; @@ -439,7 +435,7 @@ public void updateShardState(final ShardRouting newRouting, // active primaries. throw new IndexShardRelocatedException(shardId(), "Shard is marked as relocated, cannot safely move to state " + newRouting.state()); } - assert newRouting.active() == false || state == IndexShardState.STARTED || state == IndexShardState.CLOSED || isResetting() : + assert newRouting.active() == false || state == IndexShardState.STARTED || state == IndexShardState.CLOSED : "routing is active, but local shard state isn't. routing: " + newRouting + ", local state: " + state; persistMetadata(path, indexSettings, newRouting, currentRouting, logger); final CountDownLatch shardStateUpdated = new CountDownLatch(1); @@ -501,8 +497,8 @@ public void updateShardState(final ShardRouting newRouting, * numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by * replaying the translog and marking any operations there are completed. */ - if (isResetting()) { - completeResettingEngineWithLocalHistory(); + if (isEngineResetting()) { + completePendingEngineWithLocalHistory(); } final Engine engine = getEngine(); engine.restoreLocalCheckpointFromTranslog(); @@ -1192,15 +1188,15 @@ public void close(String reason, boolean flushEngine) throws IOException { try { changeState(IndexShardState.CLOSED, reason); } finally { - final Engine engine = this.currentEngineReference.getAndSet(null); + final Engine engine = getEngineOrNull(); try { - if (engine != null && flushEngine && isResetting() == false) { + if (engine != null && flushEngine && isEngineResetting() == false) { engine.flushAndClose(); } } finally { // playing safe here and close the engine even if the above succeeds - close can be called multiple times // Also closing refreshListeners to prevent us from accumulating any more listeners - IOUtils.close(engine, resettingEngineReference.getAndSet(null), refreshListeners); + IOUtils.close(engine, engineHolder, refreshListeners); indexShardOperationPermits.close(); } } @@ -1233,7 +1229,7 @@ public void prepareForIndexRecovery() { throw new IndexShardNotRecoveringException(shardId, state); } recoveryState.setStage(RecoveryState.Stage.INDEX); - assert currentEngineReference.get() == null; + assert getEngineOrNull() == null : "engine is open already"; } public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { @@ -1269,7 +1265,7 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o // package-private for testing int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOException { - if (isResetting() == false) { + if (isEngineResetting() == false) { recoveryState.getTranslog().totalOperations(snapshot.totalOperations()); recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); } @@ -1291,7 +1287,7 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOExce } opsRecovered++; - if (isResetting() == false) { + if (isEngineResetting() == false) { recoveryState.getTranslog().incrementRecoveredOperations(); } } catch (Exception e) { @@ -1350,13 +1346,9 @@ private void innerOpenEngineAndTranslog() throws IOException { final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); replicationTracker.updateGlobalCheckpointOnReplica(globalCheckpoint, "read from translog checkpoint"); - - assertMaxUnsafeAutoIdInCommit(); - - final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); - store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); - - createNewEngine(config); + synchronized (mutex) { + engineHolder.activateNewEngine(createNewEngine(config)); + } verifyNotClosed(); // We set active because we are now writing operations to the engine; this way, if we go idle after some time and become inactive, // we still give sync'd flush a chance to run: @@ -1396,8 +1388,7 @@ public void performRecoveryRestart() throws IOException { throw new IndexShardNotRecoveringException(shardId, state); } assert refreshListeners.pendingCount() == 0 : "we can't restart with pending listeners"; - final Engine engine = this.currentEngineReference.getAndSet(null); - IOUtils.close(engine); + IOUtils.close(engineHolder); recoveryState().setStage(RecoveryState.Stage.INIT); } } @@ -1440,9 +1431,6 @@ public boolean ignoreRecoveryAttempt() { public void readAllowed() throws IllegalIndexShardStateException { IndexShardState state = this.state; // one time volatile read - if (state == IndexShardState.RECOVERING && resettingEngineReference.get() != null) { - return; - } if (readAllowedStates.contains(state) == false) { throw new IllegalIndexShardStateException(shardId, state, "operations only allowed when shard state is one of " + readAllowedStates.toString()); } @@ -1457,7 +1445,7 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn IndexShardState state = this.state; // one time volatile read if (origin.isRecovery()) { - if (state != IndexShardState.RECOVERING) { + if (state != IndexShardState.RECOVERING && isEngineResetting() == false) { throw new IllegalIndexShardStateException(shardId, state, "operation only allowed when recovering, origin [" + origin + "]"); } } else { @@ -1473,15 +1461,6 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn } } - private boolean isResetting() { - return state == IndexShardState.RECOVERING && resettingEngineReference.get() != null; - } - - // for testing - final Engine getResettingEngine() { - return resettingEngineReference.get(); - } - private boolean assertPrimaryMode() { assert shardRouting.primary() && replicationTracker.isPrimaryMode() : "shard " + shardRouting + " is not a primary shard in primary mode"; return true; @@ -2007,7 +1986,7 @@ Engine getEngine() { * closed. */ protected Engine getEngineOrNull() { - return this.currentEngineReference.get(); + return engineHolder.getActiveEngineOrNull(); } public void startRecovery(RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, @@ -2146,33 +2125,22 @@ public void onFailedEngine(String reason, @Nullable Exception failure) { } } - private Engine createNewEngine(EngineConfig config) { - synchronized (mutex) { - if (state == IndexShardState.CLOSED) { - throw new AlreadyClosedException(shardId + " can't create engine - shard is closed"); - } - assert this.currentEngineReference.get() == null; - Engine engine = newEngine(config); - onNewEngine(engine); // call this before we pass the memory barrier otherwise actions that happen - // inside the callback are not visible. This one enforces happens-before - this.currentEngineReference.set(engine); - } - - // time elapses after the engine is created above (pulling the config settings) until we set the engine reference, during which - // settings changes could possibly have happened, so here we forcefully push any config changes to the new engine: - Engine engine = getEngineOrNull(); - - // engine could perhaps be null if we were e.g. concurrently closed: - if (engine != null) { - engine.onSettingsChanged(); + private Engine createNewEngine(EngineConfig config) throws IOException { + assert Thread.holdsLock(mutex); + if (state == IndexShardState.CLOSED) { + throw new AlreadyClosedException(shardId + " can't create engine - shard is closed"); } + final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); + final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); + final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); + store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); + assertMaxUnsafeAutoIdInCommit(); + final Engine engine = engineFactory.newReadWriteEngine(config); + onNewEngine(engine); + engine.onSettingsChanged(); return engine; } - protected Engine newEngine(EngineConfig config) { - return engineFactory.newReadWriteEngine(config); - } - private static void persistMetadata( final ShardPath shardPath, final IndexSettings indexSettings, @@ -2302,20 +2270,19 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g } else { localCheckpoint = currentGlobalCheckpoint; } - final Engine resettingEngine = resettingEngineReference.getAndSet(null); - if (resettingEngine != null) { - IOUtils.close(resettingEngine); + if (isEngineResetting()) { + engineHolder.closePendingEngine(); } else { - getEngine().resetLocalCheckpoint(globalCheckpoint); + getEngine().resetLocalCheckpoint(localCheckpoint); getEngine().rollTranslogGeneration(); sync(); } - logger.info( - "detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", - opPrimaryTerm, - getLocalCheckpoint(), - localCheckpoint); - resetEngineUpToLocalCheckpoint(currentGlobalCheckpoint); + logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", + opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); + final Engine pendingEngine = maybeResetEngineToSafeCommit(); + if (pendingEngine != null) { + pendingEngine.recoverFromTranslog(localCheckpoint); + } }); } } @@ -2662,85 +2629,147 @@ public void afterRefresh(boolean didRefresh) throws IOException { } } + public Engine.Result applyResyncOperation(Translog.Operation operation) throws IOException { + return applyTranslogOperation(engineHolder.getEngineForResync(), operation, Engine.Operation.Origin.PEER_RECOVERY); + } + + /** + * This method is called after each bulk of primary-replica resync operations are applied to this shard + * via {@link #applyResyncOperation(Translog.Operation)}. + * + * @param maxSeqNoFromPrimary the maximum sequence number from the newly promoted primary + */ + public void afterApplyResyncOperationsBulk(long maxSeqNoFromPrimary) throws IOException { + if (maxSeqNoFromPrimary != SequenceNumbers.UNASSIGNED_SEQ_NO) { + engineHolder.getEngineForResync().trimOperationsFromTranslog(operationPrimaryTerm, maxSeqNoFromPrimary); + engineHolder.adjustMinRequiredCheckpoint(maxSeqNoFromPrimary); + } + engineHolder.maybeActivatePendingEngine(); + } + + final boolean isEngineResetting() { + return engineHolder.hasPendingEngine(); + } + final boolean canResetEngine() { // TODO: do not reset the following shard + // other options to enable rollback on older indices: + // 1. send max_seq_no from the primary via replication requests + // 2. pass channel version when acquiring an index shard permit return indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_4_0); } - private void resetEngineUpToLocalCheckpoint(long recoverUpToSeqNo) throws IOException { + private Engine maybeResetEngineToSafeCommit() throws IOException { synchronized (mutex) { - assert resettingEngineReference.get() == null : "shard is being reset already"; - final Engine currentEngine = getEngine(); - final long globalCheckpoint = getLastSyncedGlobalCheckpoint(); - final long currentMaxSeqNo = currentEngine.getSeqNoStats(globalCheckpoint).getMaxSeqNo(); + assert isEngineResetting() == false : "engine is being reset already"; + final long lastSyncedGlobalCheckpoint = getLastSyncedGlobalCheckpoint(); + final long currentMaxSeqNo = getEngine().getSeqNoStats(lastSyncedGlobalCheckpoint).getMaxSeqNo(); // Do not reset the current engine if it does not have any stale operations - // or the primary is on an old version which does not send the max_seqno from the primary during resync. - // we need max_seqno from the primary to activate the resetting engine when its history aligned with primary's. - if (currentMaxSeqNo == globalCheckpoint || canResetEngine() == false) { - return; - } - final String translogUUID = currentEngine.commitStats().getUserData().get(Translog.TRANSLOG_UUID_KEY); - IOUtils.close(currentEngineReference.getAndSet(new SearchOnlyEngine(currentEngine))); - final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); - store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, indexSettings.getIndexVersionCreated()); - final EngineConfig config = newEngineConfig(); - config.setEnableGcDeletes(true); - final Engine resettingEngine = newEngine(config); - onNewEngine(resettingEngine); - verifyNotClosed(); - // the resetting engine will be activated only if its local_checkpoint at least this guard. - minRequiredCheckpointForResettingEngine.set(currentMaxSeqNo); - resettingEngineReference.set(resettingEngine); - changeState(IndexShardState.RECOVERING, "reset engine from=" + currentMaxSeqNo + " to=" + globalCheckpoint); - // TODO: We can transfer docs from the last commit to the safe commit or use multiple threads here. - resettingEngine.recoverFromTranslog(recoverUpToSeqNo); - active.set(true); - } - } - - private void maybeActivateResettingEngine() throws IOException { - synchronized (mutex) { - final Engine resettingEngine = resettingEngineReference.get(); - if (resettingEngine != null && resettingEngine.getLocalCheckpoint() >= minRequiredCheckpointForResettingEngine.get()) { - final Engine newEngine = resettingEngineReference.getAndSet(null); - final Engine currentEngine = currentEngineReference.getAndSet(newEngine); - IOUtils.close(currentEngine); - changeState(IndexShardState.STARTED, "engine was reset"); + // or the primary is on an old version which does not send the max_seq_no from the primary during resync. + // we need max_seq_no from the primary to activate the resetting engine when its history aligned with primary's. + if (currentMaxSeqNo == lastSyncedGlobalCheckpoint || canResetEngine() == false) { + return null; + } else { + engineHolder.makeActiveEngineSearchOnly(); + final Engine pendingEngine = createNewEngine(newEngineConfig()); + engineHolder.setPendingEngine(pendingEngine, currentMaxSeqNo); + active.set(true); + return pendingEngine; } } } - public Engine.Result applyResyncOperation(Translog.Operation operation) throws IOException { - final Engine resettingEngine = resettingEngineReference.get(); - final Engine targetEngine = resettingEngine != null ? resettingEngine : getEngine(); - return applyTranslogOperation(targetEngine, operation, Engine.Operation.Origin.PEER_RECOVERY); + private void completePendingEngineWithLocalHistory() throws IOException { + final Engine pendingEngine; + synchronized (mutex) { + assert isEngineResetting() : "engine is not being reset"; + engineHolder.closePendingEngine(); + pendingEngine = maybeResetEngineToSafeCommit(); + } + if (pendingEngine != null) { + pendingEngine.recoverFromTranslog(Long.MAX_VALUE); + engineHolder.adjustMinRequiredCheckpoint(-1); + engineHolder.maybeActivatePendingEngine(); + } + assert isEngineResetting() == false : "engine was reset with local history"; } /** - * This method is called after each bulk of primary-replica resync operations are applied to this shard - * via {@link #applyResyncOperation(Translog.Operation)}. - * - * @param maxSeqNoFromPrimary the maximum sequence number from the newly promoted primary + * An {@link EngineHolder} manages one or two engine references and its transition during primary-replica resync. */ - public void afterApplyResyncOperationsBulk(long maxSeqNoFromPrimary) throws IOException { - final Engine resettingEngine = resettingEngineReference.get(); - final Engine targetEngine = resettingEngine != null ? resettingEngine : getEngine(); - if (maxSeqNoFromPrimary != SequenceNumbers.UNASSIGNED_SEQ_NO) { - targetEngine.trimOperationsFromTranslog(operationPrimaryTerm, maxSeqNoFromPrimary); - // adjust the requirement for the resetting engine, so that we can activate when its history aligned with the primary. - minRequiredCheckpointForResettingEngine.set(Math.min(minRequiredCheckpointForResettingEngine.get(), maxSeqNoFromPrimary)); + static final class EngineHolder implements Closeable { + private volatile Engine activeEngine; // allow to read without synchronization + private Engine pendingEngine; + // avoid losing acknowledged writes, only activate the pending engine when its local_checkpoint at least this guard. + private long minRequiredCheckpoint = SequenceNumbers.NO_OPS_PERFORMED; + + synchronized void activateNewEngine(Engine newEngine) { + assert activeEngine == null : "engine is activated already"; + assert pendingEngine == null : "engine is being reset"; + this.activeEngine = newEngine; } - maybeActivateResettingEngine(); - } - private void completeResettingEngineWithLocalHistory() throws IOException { - synchronized (mutex) { - assert isResetting() : "engine is not being reset"; - IOUtils.close(resettingEngineReference.getAndSet(null)); - resetEngineUpToLocalCheckpoint(Long.MAX_VALUE); - minRequiredCheckpointForResettingEngine.set(getGlobalCheckpoint()); - maybeActivateResettingEngine(); - assert isResetting() == false : "engine was not reset with local history"; + Engine getActiveEngineOrNull() { + return activeEngine; + } + + synchronized void makeActiveEngineSearchOnly() throws IOException { + assert activeEngine != null : "engine is not activated yet"; + if ((this.activeEngine instanceof SearchOnlyEngine) == false) { + final Engine current = this.activeEngine; + this.activeEngine = new SearchOnlyEngine(this.activeEngine); + IOUtils.close(current); + } + } + + synchronized Engine getEngineForResync() { + final Engine target = pendingEngine != null ? pendingEngine : activeEngine; + if (target == null) { + throw new AlreadyClosedException("engine is closed"); + } + return target; + } + + synchronized boolean hasPendingEngine() { + return pendingEngine != null; + } + + synchronized void setPendingEngine(Engine pendingEngine, long minRequiredCheckpoint) { + assert this.activeEngine != null : "engine is not activated"; + assert this.pendingEngine == null : "engine is being reset already"; + this.pendingEngine = pendingEngine; + this.minRequiredCheckpoint = minRequiredCheckpoint; + } + + synchronized void adjustMinRequiredCheckpoint(long newMinRequiredCheckpoint) { + this.minRequiredCheckpoint = Math.min(minRequiredCheckpoint, newMinRequiredCheckpoint); + } + + synchronized void closePendingEngine() throws IOException { + final Engine closing = this.pendingEngine; + this.pendingEngine = null; + IOUtils.close(closing); + } + + synchronized void maybeActivatePendingEngine() throws IOException { + if (pendingEngine != null && pendingEngine.getLocalCheckpoint() >= minRequiredCheckpoint) { + // make sure that all acknowledged writes are visible + pendingEngine.refresh("rollback"); + final Engine closing = this.activeEngine; + this.activeEngine = this.pendingEngine; + this.pendingEngine = null; + IOUtils.close(closing); + } + } + + @Override + public void close() throws IOException { + try { + IOUtils.close(activeEngine, pendingEngine); + } finally { + activeEngine = null; + pendingEngine = null; + } } } } diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java index 0a3c95e796c30..8bf09fa8bd654 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -34,7 +34,7 @@ public class SearchOnlyEngineTests extends EngineTestCase { - public void testReadOnlyEngine() throws Exception { + public void testSearchOnlyEngine() throws Exception { IOUtils.close(engine, store); Engine lockedDownEngine = null; final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); @@ -75,7 +75,7 @@ public void testReadOnlyEngine() throws Exception { // the locked down engine should still point to the previous commit assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + assertThat(getDocIds(lockedDownEngine, false), equalTo(lastDocIds)); } // Close and reopen the main engine trimUnsafeCommits(config); @@ -84,7 +84,7 @@ public void testReadOnlyEngine() throws Exception { // the locked down engine should still point to the previous commit assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, randomBoolean()), equalTo(lastDocIds)); + assertThat(getDocIds(lockedDownEngine, false), equalTo(lastDocIds)); } } finally { IOUtils.close(lockedDownEngine); diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index b87b7f50551fa..1eef2bb969baf 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -661,6 +661,7 @@ public void testRollbackOnPromotion() throws Exception { shards.flush(); } } + shards.refresh("test"); AtomicBoolean isDone = new AtomicBoolean(); Thread[] threads = new Thread[numberOfReplicas]; for (int i = 0; i < threads.length; i++) { @@ -670,7 +671,7 @@ public void testRollbackOnPromotion() throws Exception { long hitClosedExceptions = 0; while (isDone.get() == false) { try { - Set docIds = getShardDocUIDs(replica, true); + Set docIds = getShardDocUIDs(replica, false); assertThat(ackedDocIds, everyItem(isIn(docIds))); assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); } catch (AlreadyClosedException e) { diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 2cffbeec861a3..36833e3660c8d 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -158,11 +158,13 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.hasToString; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.isIn; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.not; @@ -916,7 +918,7 @@ public void onFailure(Exception e) { ThreadPool.Names.SAME, ""); latch.await(); final boolean shouldReset = indexShard.canResetEngine() && globalCheckpoint < maxSeqNo; - assertThat(indexShard.getResettingEngine(), shouldReset ? notNullValue() : nullValue()); + assertThat(indexShard.isEngineResetting(), equalTo(shouldReset)); if (shouldReset && randomBoolean()) { // trimmed by the max_seqno on the primary maxSeqNo = randomLongBetween(globalCheckpoint, maxSeqNo); @@ -934,30 +936,25 @@ public void onFailure(Exception e) { new IndexShardRoutingTable.Builder(newRouting.shardId()).addShard(newRouting).build(), Collections.emptySet()); resyncLatch.await(); - assertThat(indexShard.getResettingEngine(), nullValue()); - assertThat(indexShard.state(), equalTo(IndexShardState.STARTED)); + assertThat(indexShard.isEngineResetting(), equalTo(false)); assertThat(indexShard.getLocalCheckpoint(), equalTo(maxSeqNo)); assertThat(indexShard.seqNoStats().getMaxSeqNo(), equalTo(maxSeqNo)); closeShards(indexShard); } - public void testThrowBackLocalCheckpointOnReplica() throws IOException, InterruptedException { + public void testResetReplicaEngineOnPromotion() throws IOException, InterruptedException { final IndexShard indexShard = newStartedShard(false); // most of the time this is large enough that most of the time there will be at least one gap final int operations = 1024 - scaledRandomIntBetween(0, 1024); indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - final long globalCheckpointOnReplica = - randomIntBetween( - Math.toIntExact(SequenceNumbers.UNASSIGNED_SEQ_NO), - Math.toIntExact(indexShard.getLocalCheckpoint())); + final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test"); + final long globalCheckpoint = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); - final int globalCheckpoint = - randomIntBetween( - Math.toIntExact(SequenceNumbers.UNASSIGNED_SEQ_NO), - Math.toIntExact(indexShard.getLocalCheckpoint())); + final Set lastDocIds = getShardDocUIDs(indexShard, true); + final Engine prevEngine = getEngine(indexShard); final CountDownLatch latch = new CountDownLatch(1); indexShard.acquireReplicaOperationPermit( indexShard.pendingPrimaryTerm + 1, @@ -966,11 +963,6 @@ public void testThrowBackLocalCheckpointOnReplica() throws IOException, Interrup @Override public void onResponse(final Releasable releasable) { releasable.close(); - try { - indexShard.afterApplyResyncOperationsBulk(Math.max(globalCheckpoint, SequenceNumbers.NO_OPS_PERFORMED)); - } catch (IOException e) { - throw new AssertionError(e); - } latch.countDown(); } @@ -982,13 +974,25 @@ public void onFailure(final Exception e) { ThreadPool.Names.SAME, ""); latch.await(); - if (globalCheckpointOnReplica == SequenceNumbers.UNASSIGNED_SEQ_NO - && globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO) { + if (globalCheckpointOnReplica == SequenceNumbers.UNASSIGNED_SEQ_NO && globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO) { assertThat(indexShard.getLocalCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); } else { assertThat(indexShard.getLocalCheckpoint(), equalTo(Math.max(globalCheckpoint, globalCheckpointOnReplica))); } - + assertThat(lastDocIds, everyItem(isIn(getShardDocUIDs(indexShard, false)))); + if (indexShard.seqNoStats().getMaxSeqNo() == indexShard.getGlobalCheckpoint()) { + assertThat(indexShard.isEngineResetting(), equalTo(false)); + assertThat(getEngine(indexShard), sameInstance(prevEngine)); + } else { + assertThat(indexShard.isEngineResetting(), equalTo(true)); + assertThat(getEngine(indexShard), not(sameInstance(prevEngine))); + indexShard.afterApplyResyncOperationsBulk( + randomLongBetween(indexShard.getLocalCheckpoint() + 1, indexShard.seqNoStats().getMaxSeqNo())); + assertThat(indexShard.isEngineResetting(), equalTo(true)); + indexShard.afterApplyResyncOperationsBulk( + randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, indexShard.getLocalCheckpoint())); + assertThat(indexShard.isEngineResetting(), equalTo(false)); + } // ensure that after the local checkpoint throw back and indexing again, the local checkpoint advances final Result result = indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(indexShard.getLocalCheckpoint())); assertThat(indexShard.getLocalCheckpoint(), equalTo((long) result.localCheckpoint)); @@ -996,6 +1000,34 @@ public void onFailure(final Exception e) { closeShards(indexShard); } + public void testDoNoResetEngineOnOldVersions() throws Exception { + Version oldVersion = randomValueOtherThanMany(v -> v.onOrAfter(Version.V_6_4_0), + () -> randomFrom(Version.getDeclaredVersions(Version.class))); + IndexShard shard = newStartedShard(false, oldVersion); + assertThat(shard.canResetEngine(), equalTo(false)); + indexOnReplicaWithGaps(shard, between(5, 50), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); + long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, shard.getLocalCheckpoint()); + Engine engine = shard.getEngine(); + CountDownLatch acquired = new CountDownLatch(1); + shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint, new ActionListener() { + @Override + public void onResponse(Releasable releasable) { + releasable.close(); + acquired.countDown(); + } + @Override + public void onFailure(Exception e) { + } + }, ThreadPool.Names.SAME, ""); + acquired.await(); + assertThat(shard.getEngine(), sameInstance(engine)); + assertThat(shard.isEngineResetting(), equalTo(false)); + shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint, globalCheckpoint + 5)); + assertThat(shard.getEngine(), sameInstance(engine)); + assertThat(shard.isEngineResetting(), equalTo(false)); + closeShards(shard); + } + public void testConcurrentTermIncreaseOnReplicaShard() throws BrokenBarrierException, InterruptedException, IOException { final IndexShard indexShard = newStartedShard(false); @@ -1941,76 +1973,6 @@ public void restoreShard(IndexShard shard, SnapshotId snapshotId, Version versio closeShards(source, target); } - public void testResetEngineOnAcquireReplicaShardPermits() throws Exception { - IndexShard shard = newStartedShard(false); - AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - indexOnReplicaWithGaps(shard, scaledRandomIntBetween(10, 1000), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), shard.getLocalCheckpoint())); - Set lastDocIds = getShardDocUIDs(shard, true); - CountDownLatch acquired = new CountDownLatch(1); - Engine prevEngine = getEngine(shard); - shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint.get(), new ActionListener() { - @Override - public void onResponse(Releasable releasable) { - releasable.close(); - acquired.countDown(); - } - @Override - public void onFailure(Exception e) { - - } - }, ThreadPool.Names.SAME, ""); - acquired.await(); - assertThat(getShardDocUIDs(shard, randomBoolean()), equalTo(lastDocIds)); - final Engine resettingEngine = shard.getResettingEngine(); - if (globalCheckpoint.get() == shard.seqNoStats().getMaxSeqNo()) { - assertThat("Engine without stale operations should not be reset", resettingEngine, nullValue()); - assertThat(getEngine(shard), sameInstance(prevEngine)); - } else { - assertThat(resettingEngine, notNullValue()); - assertThat(getEngine(shard), not(sameInstance(prevEngine))); - SeqNoStats seqNoStats = resettingEngine.getSeqNoStats(globalCheckpoint.get()); - assertThat(seqNoStats.getMaxSeqNo(), equalTo(globalCheckpoint.get())); - assertThat(seqNoStats.getLocalCheckpoint(), equalTo(globalCheckpoint.get())); - // should not activate the resetting engine - shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint.get() + 1, Long.MAX_VALUE)); - assertThat(getEngine(shard), not(sameInstance(resettingEngine))); - // should activate if the resetting engine - shard.afterApplyResyncOperationsBulk(globalCheckpoint.get()); - assertThat(getEngine(shard), sameInstance(resettingEngine)); - assertThat(shard.getResettingEngine(), nullValue()); - } - closeShards(shard); - } - - public void testDoNoResetEngineOnOldVersions() throws Exception { - Version oldVersion = randomValueOtherThanMany(v -> v.onOrAfter(Version.V_6_4_0), - () -> randomFrom(Version.getDeclaredVersions(Version.class))); - IndexShard shard = newStartedShard(false, oldVersion); - assertThat(shard.canResetEngine(), equalTo(false)); - indexOnReplicaWithGaps(shard, between(5, 50), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, shard.getLocalCheckpoint()); - Engine engine = shard.getEngine(); - CountDownLatch acquired = new CountDownLatch(1); - shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint, new ActionListener() { - @Override - public void onResponse(Releasable releasable) { - releasable.close(); - acquired.countDown(); - } - @Override - public void onFailure(Exception e) { - } - }, ThreadPool.Names.SAME, ""); - acquired.await(); - assertThat(shard.getEngine(), sameInstance(engine)); - assertThat(shard.getResettingEngine(), nullValue()); - shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint, globalCheckpoint + 5)); - assertThat(shard.getEngine(), sameInstance(engine)); - assertThat(shard.getResettingEngine(), nullValue()); - closeShards(shard); - } - public void testSearcherWrapperIsUsed() throws IOException { IndexShard shard = newStartedShard(true); indexDoc(shard, "_doc", "0", "{\"foo\" : \"bar\"}"); From 92021f70f211ab94b50fedc45f98240beb315a64 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 20 Aug 2018 21:40:20 -0400 Subject: [PATCH 05/17] use last commit + fix synchronization --- .../index/engine/SearchOnlyEngine.java | 53 ++++++++------ .../elasticsearch/index/shard/IndexShard.java | 71 +++++++++++-------- .../index/translog/Translog.java | 3 + .../indices/recovery/RecoveryState.java | 12 ++++ .../index/engine/SearchOnlyEngineTests.java | 25 +++---- 5 files changed, 99 insertions(+), 65 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java index 235b6fe926669..2f05ba647c547 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.engine; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.util.concurrent.ReleasableLock; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.index.seqno.SeqNoStats; @@ -32,6 +34,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Arrays; import java.util.List; import java.util.concurrent.CountDownLatch; @@ -46,33 +49,40 @@ public final class SearchOnlyEngine extends Engine { private final SeqNoStats seqNoStats; private final TranslogStats translogStats; private final SearcherManager searcherManager; - private final Searcher lastCommitSearcher; - - public SearchOnlyEngine(Engine engine) throws IOException { - super(engine.config()); - engine.refresh("lockdown"); - this.seqNoStats = engine.getSeqNoStats(engine.getLastSyncedGlobalCheckpoint()); - this.translogStats = engine.getTranslogStats(); - this.lastCommittedSegmentInfos = engine.getLastCommittedSegmentInfos(); - Searcher searcher = engine.acquireSearcher("lockdown", SearcherScope.INTERNAL); + + public SearchOnlyEngine(EngineConfig config, SeqNoStats seqNoStats) { + super(config); + this.seqNoStats = seqNoStats; try { - this.searcherManager = new SearcherManager(searcher.getDirectoryReader(), - new RamAccountingSearcherFactory(engineConfig.getCircuitBreakerService())); - this.lastCommitSearcher = searcher; - searcher = null; - } finally { - IOUtils.close(searcher); + store.incRef(); + ElasticsearchDirectoryReader reader = null; + boolean success = false; + try { + this.lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); + this.translogStats = new TranslogStats(0, 0, 0, 0, 0); + reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(store.directory()), config.getShardId()); + this.searcherManager = new SearcherManager(reader, new RamAccountingSearcherFactory(config.getCircuitBreakerService())); + success = true; + } finally { + if (success == false) { + IOUtils.close(reader, store::decRef); + } + } + } catch (IOException e) { + throw new UncheckedIOException(e); } } @Override protected void closeNoLock(String reason, CountDownLatch closedLatch) { - try { - IOUtils.close(lastCommitSearcher); - } catch (Exception ex) { - logger.warn("failed to close searcher", ex); - } finally { - closedLatch.countDown(); + if (isClosed.compareAndSet(false, true)) { + try { + IOUtils.close(searcherManager, store::decRef); + } catch (Exception ex) { + logger.warn("failed to close engine", ex); + } finally { + closedLatch.countDown(); + } } } @@ -310,6 +320,5 @@ public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws E @Override public void maybePruneDeletes() { - } } diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index a09a88fb269e8..2569068e0b6a5 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -1269,10 +1269,8 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o // package-private for testing int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOException { - if (isEngineResetting() == false) { - recoveryState.getTranslog().totalOperations(snapshot.totalOperations()); - recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); - } + recoveryState.getTranslog().setOrIncreaseTotalOperations(snapshot.totalOperations()); + recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); int opsRecovered = 0; Translog.Operation operation; while ((operation = snapshot.next()) != null) { @@ -1291,9 +1289,7 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOExce } opsRecovered++; - if (isEngineResetting() == false) { - recoveryState.getTranslog().incrementRecoveredOperations(); - } + recoveryState.getTranslog().incrementRecoveredOperations(); } catch (Exception e) { if (ExceptionsHelper.status(e) == RestStatus.BAD_REQUEST) { // mainly for MapperParsingException and Failure to detect xcontent @@ -2145,7 +2141,7 @@ public void onFailedEngine(String reason, @Nullable Exception failure) { private Engine createNewEngine(EngineConfig config) throws IOException { assert Thread.holdsLock(mutex); if (state == IndexShardState.CLOSED) { - throw new AlreadyClosedException(shardId + " can't create engine - shard is closed"); + throw new IndexShardClosedException(shardId, "can't create engine - shard is closed"); } final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); @@ -2290,9 +2286,9 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g if (isEngineResetting()) { engineHolder.closePendingEngine(); } else { - getEngine().resetLocalCheckpoint(localCheckpoint); - getEngine().rollTranslogGeneration(); sync(); + getEngine().flush(true, true); // force=true to make sure that we roll a translog generation + getEngine().resetLocalCheckpoint(localCheckpoint); } logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); @@ -2730,13 +2726,17 @@ Engine getActiveEngineOrNull() { return activeEngine; } - synchronized void makeActiveEngineSearchOnly() throws IOException { + void makeActiveEngineSearchOnly() throws IOException { + Engine closing = null; assert activeEngine != null : "engine is not activated yet"; - if ((this.activeEngine instanceof SearchOnlyEngine) == false) { - final Engine current = this.activeEngine; - this.activeEngine = new SearchOnlyEngine(this.activeEngine); - IOUtils.close(current); + synchronized (this) { + if ((this.activeEngine instanceof SearchOnlyEngine) == false) { + final SeqNoStats seqNoStats = this.activeEngine.getSeqNoStats(activeEngine.getLastSyncedGlobalCheckpoint()); + closing = this.activeEngine; + this.activeEngine = new SearchOnlyEngine(this.activeEngine.config(), seqNoStats); + } } + IOUtils.close(closing); } synchronized Engine getEngineForResync() { @@ -2762,31 +2762,40 @@ synchronized void adjustMinRequiredCheckpoint(long newMinRequiredCheckpoint) { this.minRequiredCheckpoint = Math.min(minRequiredCheckpoint, newMinRequiredCheckpoint); } - synchronized void closePendingEngine() throws IOException { - final Engine closing = this.pendingEngine; - this.pendingEngine = null; + void closePendingEngine() throws IOException { + final Engine closing; + synchronized (this) { + closing = this.pendingEngine; + this.pendingEngine = null; + } IOUtils.close(closing); } - synchronized void maybeActivatePendingEngine() throws IOException { - if (pendingEngine != null && pendingEngine.getLocalCheckpoint() >= minRequiredCheckpoint) { - // make sure that all acknowledged writes are visible - pendingEngine.refresh("rollback"); - final Engine closing = this.activeEngine; - this.activeEngine = this.pendingEngine; - this.pendingEngine = null; - IOUtils.close(closing); + void maybeActivatePendingEngine() throws IOException { + Engine closing = null; + synchronized (this) { + if (pendingEngine != null && pendingEngine.getLocalCheckpoint() >= minRequiredCheckpoint) { + // make sure that all acknowledged writes are visible + pendingEngine.refresh("rollback"); + closing = this.activeEngine; + this.activeEngine = this.pendingEngine; + this.pendingEngine = null; + } } + IOUtils.close(closing); } @Override public void close() throws IOException { - try { - IOUtils.close(activeEngine, pendingEngine); - } finally { - activeEngine = null; - pendingEngine = null; + final Engine closingActive; + final Engine closingPending; + synchronized (this) { + closingActive = this.activeEngine; + this.activeEngine = null; + closingPending = this.pendingEngine; + this.pendingEngine = null; } + IOUtils.close(closingActive, closingPending); } } } diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index e2fc83ab79091..a2a69d93836e5 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -593,6 +593,9 @@ public Snapshot newSnapshotFromGen(TranslogGeneration minGeneration, long upToSe .filter(reader -> reader.getGeneration() >= minTranslogFileGen && reader.getCheckpoint().minSeqNo <= upToSeqNo) .map(BaseTranslogReader::newSnapshot).toArray(TranslogSnapshot[]::new); Snapshot snapshot = newMultiSnapshot(snapshots); + if (upToSeqNo == Long.MAX_VALUE) { + return snapshot; + } return new Snapshot() { int skipped = 0; @Override diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java index 0d57d9506628f..aed77f9e34ac4 100644 --- a/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java @@ -504,6 +504,18 @@ public synchronized void totalOperations(int total) { assert total == UNKNOWN || total >= recovered : "total, if known, should be > recovered. total [" + total + "], recovered [" + recovered + "]"; } + /** + * Sets or increases the total number of translog operations to be recovered by the given value. + */ + public synchronized void setOrIncreaseTotalOperations(int newOperations) { + if (total == UNKNOWN) { + this.total = newOperations; + } else { + this.total += newOperations; + } + assert total >= recovered : "total [" + total + "] < recovered [" + recovered + "]"; + } + /** * returns the total number of translog operations to recovered, on the start of the recovery. Unlike {@link #totalOperations} * this does change during recovery. diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java index 8c668c7241c69..e81a1e73f6622 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -36,7 +36,7 @@ public class SearchOnlyEngineTests extends EngineTestCase { public void testSearchOnlyEngine() throws Exception { IOUtils.close(engine, store); - Engine lockedDownEngine = null; + Engine searchOnlyEngine = null; final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); try (Store store = createStore()) { EngineConfig config = config(defaultSettings, store, createTempDir(), newMergePolicy(), null, null, globalCheckpoint::get); @@ -57,12 +57,13 @@ public void testSearchOnlyEngine() throws Exception { globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint())); } engine.syncTranslog(); - lockedDownEngine = new SearchOnlyEngine(engine); + engine.flush(); + searchOnlyEngine = new SearchOnlyEngine(engine.engineConfig, engine.getSeqNoStats(globalCheckpoint.get())); lastSeqNoStats = engine.getSeqNoStats(globalCheckpoint.get()); lastDocIds = getDocIds(engine, true); - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, false), equalTo(lastDocIds)); + assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(searchOnlyEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(searchOnlyEngine, false), equalTo(lastDocIds)); for (int i = 0; i < numDocs; i++) { if (randomBoolean()) { String delId = Integer.toString(i); @@ -73,21 +74,21 @@ public void testSearchOnlyEngine() throws Exception { } } // the locked down engine should still point to the previous commit - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, false), equalTo(lastDocIds)); + assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(searchOnlyEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(searchOnlyEngine, false), equalTo(lastDocIds)); } // Close and reopen the main engine trimUnsafeCommits(config); try (InternalEngine recoveringEngine = new InternalEngine(config)) { recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); // the locked down engine should still point to the previous commit - assertThat(lockedDownEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); - assertThat(lockedDownEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(lockedDownEngine, false), equalTo(lastDocIds)); + assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); + assertThat(searchOnlyEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); + assertThat(getDocIds(searchOnlyEngine, false), equalTo(lastDocIds)); } } finally { - IOUtils.close(lockedDownEngine); + IOUtils.close(searchOnlyEngine); } } } From b60d0e8d1c083e43e936648dd937866a77cda1ec Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 21 Aug 2018 21:00:28 -0400 Subject: [PATCH 06/17] assert same doc ids --- .../elasticsearch/index/shard/IndexShard.java | 26 +++++++++--------- .../discovery/AbstractDisruptionTestCase.java | 6 +++++ .../discovery/ClusterDisruptionIT.java | 1 + .../index/shard/IndexShardTestCase.java | 4 +-- .../elasticsearch/test/ESIntegTestCase.java | 27 +++++++++++++++++++ 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 2569068e0b6a5..4b90fff398970 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -2285,13 +2285,15 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g } if (isEngineResetting()) { engineHolder.closePendingEngine(); + logger.info("detected new primary with primary term [{}], global checkpoint [{}]; " + + "resetting pending engine", opPrimaryTerm, getLocalCheckpoint()); } else { sync(); getEngine().flush(true, true); // force=true to make sure that we roll a translog generation + logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", + opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); getEngine().resetLocalCheckpoint(localCheckpoint); } - logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", - opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); final Engine pendingEngine = maybeResetEngineToSafeCommit(); if (pendingEngine != null) { pendingEngine.recoverFromTranslog(localCheckpoint); @@ -2693,18 +2695,14 @@ private Engine maybeResetEngineToSafeCommit() throws IOException { } private void completePendingEngineWithLocalHistory() throws IOException { - final Engine pendingEngine; - synchronized (mutex) { - assert isEngineResetting() : "engine is not being reset"; - engineHolder.closePendingEngine(); - pendingEngine = maybeResetEngineToSafeCommit(); - } - if (pendingEngine != null) { - pendingEngine.recoverFromTranslog(Long.MAX_VALUE); - engineHolder.adjustMinRequiredCheckpoint(-1); - engineHolder.maybeActivatePendingEngine(); - } - assert isEngineResetting() == false : "engine was reset with local history"; + assert isEngineResetting() : "engine is not being reset"; + engineHolder.closePendingEngine(); + final Engine pendingEngine = maybeResetEngineToSafeCommit(); + assert pendingEngine != null : "pending engine is not reset"; + pendingEngine.recoverFromTranslog(Long.MAX_VALUE); + engineHolder.adjustMinRequiredCheckpoint(-1); + engineHolder.maybeActivatePendingEngine(); + assert isEngineResetting() == false : "engine was not reset with local history"; } /** diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 0f3288b1973e5..0d6de4a150a22 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -45,6 +45,7 @@ import org.elasticsearch.test.disruption.SlowClusterStateProcessing; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.TransportService; +import org.junit.After; import org.junit.Before; import java.util.Arrays; @@ -93,6 +94,11 @@ public void setUp() throws Exception { disableBeforeIndexDeletion = false; } + @After + public void assertSameDocIds() throws Exception { + assertSameDocIdsOnShards(); + } + @Override public void setDisruptionScheme(ServiceDisruptionScheme scheme) { if (scheme instanceof NetworkDisruption && diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index fab38a2b73b4a..984ef963fd755 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -211,6 +211,7 @@ public void testAckedIndexing() throws Exception { throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e); } } + assertSameDocIdsOnShards(); }, 30, TimeUnit.SECONDS); logger.info("done validating (iteration [{}])", iter); diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java index f84442529c297..09925a687907c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java @@ -574,11 +574,11 @@ private Store.MetadataSnapshot getMetadataSnapshotOrEmpty(IndexShard replica) th return result; } - protected Set getShardDocUIDs(final IndexShard shard, boolean refresh) throws IOException { + public static Set getShardDocUIDs(final IndexShard shard, boolean refresh) throws IOException { return EngineTestCase.getDocIds(shard.getEngine(), refresh); } - protected Set getShardDocUIDs(final IndexShard shard) throws IOException { + public static Set getShardDocUIDs(final IndexShard shard) throws IOException { return getShardDocUIDs(shard, true); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 3a479aad89770..1f3b5b14a5582 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -26,6 +26,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.http.HttpHost; import org.apache.lucene.search.Sort; +import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; @@ -133,6 +134,8 @@ import org.elasticsearch.index.seqno.SeqNoStats; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.shard.IndexShardTestCase; +import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.indices.IndicesQueryCache; import org.elasticsearch.indices.IndicesRequestCache; @@ -2129,6 +2132,30 @@ public Set assertAllShardsOnNodes(String index, String... pattern) { return nodes; } + /** + * Asserts that all shards with the same shardId should have document Ids. + */ + public void assertSameDocIdsOnShards() throws Exception { + final Map> docIdsPerShard = new HashMap<>(); + final String[] nodeNames = internalCluster().getNodeNames(); + for (String nodeName : nodeNames) { + final IndicesService indexServices = internalCluster().getInstance(IndicesService.class, nodeName); + for (IndexService indexService : indexServices) { + for (IndexShard shard : indexService) { + try { + final Set docIds = IndexShardTestCase.getShardDocUIDs(shard, true); + if (docIdsPerShard.containsKey(shard.shardId())) { + assertThat(docIds, equalTo(docIdsPerShard.get(shard.shardId()))); + } else { + docIdsPerShard.put(shard.shardId(), docIds); + } + } catch (AlreadyClosedException e) { + + } + } + } + } + } /** * Asserts that all segments are sorted with the provided {@link Sort}. From f0a1496bed7a1160d2d4356be5b3ae01a5df1457 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 22 Aug 2018 15:15:21 -0400 Subject: [PATCH 07/17] move trimUnsafeCommits abstract test case --- .../elasticsearch/index/engine/InternalEngineTests.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 0580a4ce825be..9bc05fd717c19 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -4768,15 +4768,6 @@ public void testTrimUnsafeCommits() throws Exception { } } - static void trimUnsafeCommits(EngineConfig config) throws IOException { - final Store store = config.getStore(); - final TranslogConfig translogConfig = config.getTranslogConfig(); - final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); - final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); - final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); - store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); - } - void assertLuceneOperations(InternalEngine engine, long expectedAppends, long expectedUpdates, long expectedDeletes) { String message = "Lucene operations mismatched;" + " appends [actual:" + engine.getNumDocAppends() + ", expected:" + expectedAppends + "]," + From 8f47769ca9192cf7598960774ca0c7d8749704ac Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 22 Aug 2018 17:46:26 -0400 Subject: [PATCH 08/17] index resync operation as replica instead of peer_recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don’t move the resetting shard back to the “recovering” state, thus resync operations should be processed as replica as before. --- .../org/elasticsearch/index/shard/IndexShard.java | 2 +- .../RecoveryDuringReplicationTests.java | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 4b90fff398970..f6a2d6a1b6c41 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -2645,7 +2645,7 @@ public void afterRefresh(boolean didRefresh) throws IOException { } public Engine.Result applyResyncOperation(Translog.Operation operation) throws IOException { - return applyTranslogOperation(engineHolder.getEngineForResync(), operation, Engine.Operation.Origin.PEER_RECOVERY); + return applyTranslogOperation(engineHolder.getEngineForResync(), operation, Engine.Operation.Origin.REPLICA); } /** diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index 1eef2bb969baf..e810e04614720 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -629,7 +629,9 @@ public long indexTranslogOperations(final List operations, f } public void testRollbackOnPromotion() throws Exception { - int numberOfReplicas = between(2, 3); + final int numberOfReplicas = between(2, 3); + final AtomicBoolean isDone = new AtomicBoolean(); + final List threads = new ArrayList<>(); try (ReplicationGroup shards = createGroup(numberOfReplicas)) { shards.startAll(); IndexShard newPrimary = randomFrom(shards.getReplicas()); @@ -662,11 +664,9 @@ public void testRollbackOnPromotion() throws Exception { } } shards.refresh("test"); - AtomicBoolean isDone = new AtomicBoolean(); - Thread[] threads = new Thread[numberOfReplicas]; - for (int i = 0; i < threads.length; i++) { + for (int i = 0; i < numberOfReplicas; i++) { IndexShard replica = shards.getReplicas().get(i); - threads[i] = new Thread(() -> { + Thread thread = new Thread(() -> { // should fail at most twice with two transitions: normal engine -> read-only engine -> resetting engine long hitClosedExceptions = 0; while (isDone.get() == false) { @@ -682,12 +682,15 @@ public void testRollbackOnPromotion() throws Exception { } assertThat(hitClosedExceptions, lessThanOrEqualTo(2L)); }); - threads[i].start(); + threads.add(thread); + thread.start(); } shards.promoteReplicaToPrimary(newPrimary).get(); shards.assertAllEqual(initDocs + extraDocsOnNewPrimary); int moreDocs = shards.indexDocs(scaledRandomIntBetween(1, 10)); shards.assertAllEqual(initDocs + extraDocsOnNewPrimary + moreDocs); + isDone.set(true); // stop before closing shards to have an accurate "AlreadyClosedException" count + } finally { isDone.set(true); for (Thread thread : threads) { thread.join(); From a224c6bbcafec33c6deb2cbeb16ce6fc0da35387 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 22 Aug 2018 18:05:53 -0400 Subject: [PATCH 09/17] more feedback --- .../index/engine/SearchOnlyEngine.java | 10 +++- .../elasticsearch/index/shard/IndexShard.java | 9 ++-- .../discovery/AbstractDisruptionTestCase.java | 8 +--- .../index/engine/SearchOnlyEngineTests.java | 3 +- .../index/shard/IndexShardTests.java | 3 +- .../elasticsearch/recovery/RelocationIT.java | 1 + .../elasticsearch/test/ESIntegTestCase.java | 46 +++++++++++++------ 7 files changed, 49 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java index 2f05ba647c547..d0b07fae774ac 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java @@ -50,16 +50,16 @@ public final class SearchOnlyEngine extends Engine { private final TranslogStats translogStats; private final SearcherManager searcherManager; - public SearchOnlyEngine(EngineConfig config, SeqNoStats seqNoStats) { + public SearchOnlyEngine(EngineConfig config, SeqNoStats seqNoStats, TranslogStats translogStats) { super(config); this.seqNoStats = seqNoStats; + this.translogStats = translogStats; try { store.incRef(); ElasticsearchDirectoryReader reader = null; boolean success = false; try { this.lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); - this.translogStats = new TranslogStats(0, 0, 0, 0, 0); reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(store.directory()), config.getShardId()); this.searcherManager = new SearcherManager(reader, new RamAccountingSearcherFactory(config.getCircuitBreakerService())); success = true; @@ -86,6 +86,12 @@ protected void closeNoLock(String reason, CountDownLatch closedLatch) { } } + @Override + public void flushAndClose() throws IOException { + // make a flush as a noop so that callers can close (and flush) this engine without worrying the engine type. + close(); + } + @Override public GetResult get(Get get, BiFunction searcherFactory) throws EngineException { return getFromSearcher(get, this::acquireSearcher, SearcherScope.INTERNAL); diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index f6a2d6a1b6c41..14ecd770b29a0 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -1194,7 +1194,7 @@ public void close(String reason, boolean flushEngine) throws IOException { } finally { final Engine engine = getEngineOrNull(); try { - if (engine != null && flushEngine && isEngineResetting() == false) { + if (engine != null && flushEngine) { engine.flushAndClose(); } } finally { @@ -2140,9 +2140,7 @@ public void onFailedEngine(String reason, @Nullable Exception failure) { private Engine createNewEngine(EngineConfig config) throws IOException { assert Thread.holdsLock(mutex); - if (state == IndexShardState.CLOSED) { - throw new IndexShardClosedException(shardId, "can't create engine - shard is closed"); - } + verifyNotClosed(); final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); @@ -2730,8 +2728,9 @@ void makeActiveEngineSearchOnly() throws IOException { synchronized (this) { if ((this.activeEngine instanceof SearchOnlyEngine) == false) { final SeqNoStats seqNoStats = this.activeEngine.getSeqNoStats(activeEngine.getLastSyncedGlobalCheckpoint()); + final TranslogStats translogStats = this.activeEngine.getTranslogStats(); closing = this.activeEngine; - this.activeEngine = new SearchOnlyEngine(this.activeEngine.config(), seqNoStats); + this.activeEngine = new SearchOnlyEngine(this.activeEngine.config(), seqNoStats, translogStats); } } IOUtils.close(closing); diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 0d6de4a150a22..046925cd16abc 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -45,7 +45,6 @@ import org.elasticsearch.test.disruption.SlowClusterStateProcessing; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.transport.TransportService; -import org.junit.After; import org.junit.Before; import java.util.Arrays; @@ -94,11 +93,6 @@ public void setUp() throws Exception { disableBeforeIndexDeletion = false; } - @After - public void assertSameDocIds() throws Exception { - assertSameDocIdsOnShards(); - } - @Override public void setDisruptionScheme(ServiceDisruptionScheme scheme) { if (scheme instanceof NetworkDisruption && @@ -115,6 +109,8 @@ public void setDisruptionScheme(ServiceDisruptionScheme scheme) { protected void beforeIndexDeletion() throws Exception { if (disableBeforeIndexDeletion == false) { super.beforeIndexDeletion(); + // TODO: enable this assertion after fixing NPE in assertSeqNos(); + assertSameDocIdsOnShards(); } } diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java index e81a1e73f6622..ef8ba9b0b41e9 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -58,7 +58,8 @@ public void testSearchOnlyEngine() throws Exception { } engine.syncTranslog(); engine.flush(); - searchOnlyEngine = new SearchOnlyEngine(engine.engineConfig, engine.getSeqNoStats(globalCheckpoint.get())); + searchOnlyEngine = new SearchOnlyEngine(engine.engineConfig, + engine.getSeqNoStats(globalCheckpoint.get()), engine.getTranslogStats()); lastSeqNoStats = engine.getSeqNoStats(globalCheckpoint.get()); lastDocIds = getDocIds(engine, true); assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 4e31f81595100..b042b40863455 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -1001,8 +1001,7 @@ public void onFailure(final Exception e) { } public void testDoNoResetEngineOnOldVersions() throws Exception { - Version oldVersion = randomValueOtherThanMany(v -> v.onOrAfter(Version.V_6_4_0), - () -> randomFrom(Version.getDeclaredVersions(Version.class))); + Version oldVersion = VersionUtils.randomVersionBetween(random(), null, VersionUtils.getPreviousVersion(Version.V_6_4_0)); IndexShard shard = newStartedShard(false, oldVersion); assertThat(shard.canResetEngine(), equalTo(false)); indexOnReplicaWithGaps(shard, between(5, 50), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); diff --git a/server/src/test/java/org/elasticsearch/recovery/RelocationIT.java b/server/src/test/java/org/elasticsearch/recovery/RelocationIT.java index cb93d803bb7c6..8d0f1845be60d 100644 --- a/server/src/test/java/org/elasticsearch/recovery/RelocationIT.java +++ b/server/src/test/java/org/elasticsearch/recovery/RelocationIT.java @@ -103,6 +103,7 @@ protected Collection> nodePlugins() { protected void beforeIndexDeletion() throws Exception { super.beforeIndexDeletion(); assertSeqNos(); + assertSameDocIdsOnShards(); } public void testSimpleRelocationNoIndexing() { diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 1f3b5b14a5582..9711d0810bbd1 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -20,6 +20,8 @@ package org.elasticsearch.test; import com.carrotsearch.hppc.ObjectLongMap; +import com.carrotsearch.hppc.cursors.IntObjectCursor; +import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import com.carrotsearch.randomizedtesting.RandomizedContext; import com.carrotsearch.randomizedtesting.annotations.TestGroup; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; @@ -135,7 +137,6 @@ import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.IndexShardTestCase; -import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.indices.IndicesQueryCache; import org.elasticsearch.indices.IndicesRequestCache; @@ -2136,25 +2137,40 @@ public Set assertAllShardsOnNodes(String index, String... pattern) { * Asserts that all shards with the same shardId should have document Ids. */ public void assertSameDocIdsOnShards() throws Exception { - final Map> docIdsPerShard = new HashMap<>(); - final String[] nodeNames = internalCluster().getNodeNames(); - for (String nodeName : nodeNames) { - final IndicesService indexServices = internalCluster().getInstance(IndicesService.class, nodeName); - for (IndexService indexService : indexServices) { - for (IndexShard shard : indexService) { + assertBusy(() -> { + ClusterState state = client().admin().cluster().prepareState().get().getState(); + for (ObjectObjectCursor indexRoutingTable : state.routingTable().indicesRouting()) { + for (IntObjectCursor indexShardRoutingTable : indexRoutingTable.value.shards()) { + ShardRouting primaryShardRouting = indexShardRoutingTable.value.primaryShard(); + if (primaryShardRouting == null) { + continue; + } + String primaryNode = state.nodes().get(primaryShardRouting.currentNodeId()).getName(); + IndexShard primaryShard = internalCluster().getInstance(IndicesService.class, primaryNode) + .indexServiceSafe(primaryShardRouting.index()).getShard(primaryShardRouting.id()); + final Set docsOnPrimary; try { - final Set docIds = IndexShardTestCase.getShardDocUIDs(shard, true); - if (docIdsPerShard.containsKey(shard.shardId())) { - assertThat(docIds, equalTo(docIdsPerShard.get(shard.shardId()))); - } else { - docIdsPerShard.put(shard.shardId(), docIds); + docsOnPrimary = IndexShardTestCase.getShardDocUIDs(primaryShard, true); + } catch (AlreadyClosedException ex) { + continue; + } + for (ShardRouting replicaShardRouting : indexShardRoutingTable.value.replicaShards()) { + String replicaNode = state.nodes().get(replicaShardRouting.currentNodeId()).getName(); + IndexShard replicaShard = internalCluster().getInstance(IndicesService.class, replicaNode) + .indexServiceSafe(replicaShardRouting.index()).getShard(replicaShardRouting.id()); + final Set docsOnReplica; + try { + docsOnReplica = IndexShardTestCase.getShardDocUIDs(replicaShard, true); + } catch (AlreadyClosedException ex) { + continue; } - } catch (AlreadyClosedException e) { - + assertThat("out of sync shards: primary=[" + primaryShardRouting + "] num_docs_on_primary=[" + docsOnPrimary.size() + + "] vs replica=[" + replicaShardRouting + "] num_docs_on_replica=[" + docsOnReplica.size() + "]", + docsOnReplica, equalTo(docsOnPrimary)); } } } - } + }); } /** From 7e08bd3fc7cb00aed9f57296f9d5b9ad8e670a46 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 23 Aug 2018 08:54:22 -0400 Subject: [PATCH 10/17] skip unassigned shards when assert docIds --- .../org/elasticsearch/test/ESIntegTestCase.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 9711d0810bbd1..315327035495d 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -2142,11 +2142,11 @@ public void assertSameDocIdsOnShards() throws Exception { for (ObjectObjectCursor indexRoutingTable : state.routingTable().indicesRouting()) { for (IntObjectCursor indexShardRoutingTable : indexRoutingTable.value.shards()) { ShardRouting primaryShardRouting = indexShardRoutingTable.value.primaryShard(); - if (primaryShardRouting == null) { + if (primaryShardRouting == null || primaryShardRouting.assignedToNode() == false) { continue; } - String primaryNode = state.nodes().get(primaryShardRouting.currentNodeId()).getName(); - IndexShard primaryShard = internalCluster().getInstance(IndicesService.class, primaryNode) + DiscoveryNode primaryNode = state.nodes().get(primaryShardRouting.currentNodeId()); + IndexShard primaryShard = internalCluster().getInstance(IndicesService.class, primaryNode.getName()) .indexServiceSafe(primaryShardRouting.index()).getShard(primaryShardRouting.id()); final Set docsOnPrimary; try { @@ -2155,8 +2155,11 @@ public void assertSameDocIdsOnShards() throws Exception { continue; } for (ShardRouting replicaShardRouting : indexShardRoutingTable.value.replicaShards()) { - String replicaNode = state.nodes().get(replicaShardRouting.currentNodeId()).getName(); - IndexShard replicaShard = internalCluster().getInstance(IndicesService.class, replicaNode) + if (replicaShardRouting.assignedToNode() == false) { + continue; + } + DiscoveryNode replicaNode = state.nodes().get(replicaShardRouting.currentNodeId()); + IndexShard replicaShard = internalCluster().getInstance(IndicesService.class, replicaNode.getName()) .indexServiceSafe(replicaShardRouting.index()).getShard(replicaShardRouting.id()); final Set docsOnReplica; try { From eacd9b2c5e927e98a354cd873c1fe159456e7755 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 23 Aug 2018 12:06:15 -0400 Subject: [PATCH 11/17] ensure unsupported methods never called --- .../index/engine/SearchOnlyEngine.java | 53 ++++++++++--------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java index d0b07fae774ac..0bc50210193d3 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java @@ -142,17 +142,17 @@ public boolean isThrottled() { @Override public IndexResult index(Index index) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public DeleteResult delete(Delete delete) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public NoOpResult noOp(NoOp noOp) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override @@ -167,22 +167,22 @@ public boolean ensureTranslogSynced(Stream locations) { @Override public void syncTranslog() { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public Closeable acquireTranslogRetentionLock() { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public Translog.Snapshot newTranslogSnapshotFromMinSeqNo(long minSeqNo) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public int estimateTranslogOperationsFromMinSeq(long minSeqNo) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override @@ -192,7 +192,7 @@ public TranslogStats getTranslogStats() { @Override public Translog.Location getTranslogLastWriteLocation() { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override @@ -202,12 +202,12 @@ public long getLocalCheckpoint() { @Override public void waitForOpsToComplete(long seqNo) { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public void resetLocalCheckpoint(long newCheckpoint) { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override @@ -232,7 +232,7 @@ public List segments(boolean verbose) { @Override public void refresh(String source) throws EngineException { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override @@ -247,33 +247,33 @@ public boolean shouldPeriodicallyFlush() { @Override public SyncedFlushResult syncFlush(String syncId, CommitId expectedCommitId) throws EngineException { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public CommitId flush() throws EngineException { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments) { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public IndexCommitRef acquireLastIndexCommit(boolean flushFirst) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public IndexCommitRef acquireSafeIndexCommit() { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override @@ -286,7 +286,7 @@ public void deactivateThrottling() { @Override public void trimUnreferencedTranslogFiles() { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override @@ -296,35 +296,40 @@ public boolean shouldRollTranslogGeneration() { @Override public void rollTranslogGeneration() throws EngineException { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public void restoreLocalCheckpointFromTranslog() { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public int fillSeqNoGaps(long primaryTerm) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public Engine recoverFromTranslog(long recoverUpToSeqNo) { - throw new UnsupportedOperationException(); + return ensureUnsupportedMethodNeverCalled(); } @Override public void skipTranslogRecovery() { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException { - throw new UnsupportedOperationException(); + ensureUnsupportedMethodNeverCalled(); } @Override public void maybePruneDeletes() { } + + private T ensureUnsupportedMethodNeverCalled() { + assert false : "invoking an unsupported method in a search-only engine"; + throw new UnsupportedOperationException(); + } } From aa01fbd24d3947f1e3a30f7d2cf340044c977bc1 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 23 Aug 2018 12:27:25 -0400 Subject: [PATCH 12/17] active shard after replaying translog --- .../TransportResyncReplicationAction.java | 6 +- .../elasticsearch/index/shard/IndexShard.java | 264 +++++------------- .../RecoveryDuringReplicationTests.java | 3 +- .../index/shard/IndexShardTests.java | 136 +++------ .../ESIndexLevelReplicationTestCase.java | 13 +- 5 files changed, 123 insertions(+), 299 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java index 6275115e7d77d..f8ad58b9cac8c 100644 --- a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java @@ -121,14 +121,16 @@ protected WriteReplicaResult shardOperationOnReplica(ResyncReplicationRequest re public static Translog.Location performOnReplica(ResyncReplicationRequest request, IndexShard replica) throws Exception { Translog.Location location = null; for (Translog.Operation operation : request.getOperations()) { - final Engine.Result operationResult = replica.applyResyncOperation(operation); + final Engine.Result operationResult = replica.applyTranslogOperation(operation, Engine.Operation.Origin.REPLICA); if (operationResult.getResultType() == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) { throw new TransportReplicationAction.RetryOnReplicaException(replica.shardId(), "Mappings are not available on the replica yet, triggered update: " + operationResult.getRequiredMappingUpdate()); } location = syncOperationResultOrThrow(operationResult, location); } - replica.afterApplyResyncOperationsBulk(request.getTrimAboveSeqNo()); + if (request.getTrimAboveSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { + replica.trimOperationOfPreviousPrimaryTerms(request.getTrimAboveSeqNo()); + } return location; } diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 14ecd770b29a0..d68260ebeb66c 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -84,8 +84,8 @@ import org.elasticsearch.index.engine.EngineException; import org.elasticsearch.index.engine.EngineFactory; import org.elasticsearch.index.engine.InternalEngine; -import org.elasticsearch.index.engine.SearchOnlyEngine; import org.elasticsearch.index.engine.RefreshFailedEngineException; +import org.elasticsearch.index.engine.SearchOnlyEngine; import org.elasticsearch.index.engine.Segment; import org.elasticsearch.index.engine.SegmentsStats; import org.elasticsearch.index.fielddata.FieldDataStats; @@ -199,8 +199,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl protected volatile IndexShardState state; protected volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm protected volatile long operationPrimaryTerm; + protected final AtomicReference currentEngineReference = new AtomicReference<>(); + private volatile long maxSeqNoOfResettingEngine = Long.MIN_VALUE; final EngineFactory engineFactory; - private final EngineHolder engineHolder = new EngineHolder(); private final IndexingOperationListener indexingOperationListeners; private final Runnable globalCheckpointSyncer; @@ -501,18 +502,19 @@ public void updateShardState(final ShardRouting newRouting, * numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by * replaying the translog and marking any operations there are completed. */ - if (isEngineResetting()) { - completePendingEngineWithLocalHistory(); + if (seqNoStats().getLocalCheckpoint() < maxSeqNoOfResettingEngine) { + getEngine().flush(); + resetEngineUpToSeqNo(Long.MAX_VALUE); + } else { + getEngine().restoreLocalCheckpointFromTranslog(); } - final Engine engine = getEngine(); - engine.restoreLocalCheckpointFromTranslog(); /* Rolling the translog generation is not strictly needed here (as we will never have collisions between * sequence numbers in a translog generation in a new primary as it takes the last known sequence number * as a starting point), but it simplifies reasoning about the relationship between primary terms and * translog generations. */ - engine.rollTranslogGeneration(); - engine.fillSeqNoGaps(newPrimaryTerm); + getEngine().rollTranslogGeneration(); + getEngine().fillSeqNoGaps(newPrimaryTerm); replicationTracker.updateLocalCheckpoint(currentRouting.allocationId().getId(), getLocalCheckpoint()); primaryReplicaSyncer.accept(this, new ActionListener() { @Override @@ -678,14 +680,15 @@ public Engine.IndexResult applyIndexOperationOnPrimary(long version, VersionType } public Engine.IndexResult applyIndexOperationOnReplica(long seqNo, long version, long autoGeneratedTimeStamp, - boolean isRetry, SourceToParse sourceToParse) throws IOException { - return applyIndexOperation(getEngine(), seqNo, operationPrimaryTerm, version, null, autoGeneratedTimeStamp, - isRetry, Engine.Operation.Origin.REPLICA, sourceToParse); + boolean isRetry, SourceToParse sourceToParse) + throws IOException { + return applyIndexOperation(getEngine(), seqNo, operationPrimaryTerm, version, null, autoGeneratedTimeStamp, isRetry, + Engine.Operation.Origin.REPLICA, sourceToParse); } - private Engine.IndexResult applyIndexOperation(Engine engine, long seqNo, long opPrimaryTerm, long version, @Nullable VersionType versionType, - long autoGeneratedTimeStamp, boolean isRetry, Engine.Operation.Origin origin, - SourceToParse sourceToParse) throws IOException { + private Engine.IndexResult applyIndexOperation(Engine engine, long seqNo, long opPrimaryTerm, long version, + @Nullable VersionType versionType, long autoGeneratedTimeStamp, boolean isRetry, + Engine.Operation.Origin origin, SourceToParse sourceToParse) throws IOException { assert opPrimaryTerm <= this.operationPrimaryTerm: "op term [ " + opPrimaryTerm + " ] > shard term [" + this.operationPrimaryTerm + "]"; ensureWriteAllowed(origin); @@ -1192,7 +1195,7 @@ public void close(String reason, boolean flushEngine) throws IOException { try { changeState(IndexShardState.CLOSED, reason); } finally { - final Engine engine = getEngineOrNull(); + final Engine engine = this.currentEngineReference.getAndSet(null); try { if (engine != null && flushEngine) { engine.flushAndClose(); @@ -1200,7 +1203,7 @@ public void close(String reason, boolean flushEngine) throws IOException { } finally { // playing safe here and close the engine even if the above succeeds - close can be called multiple times // Also closing refreshListeners to prevent us from accumulating any more listeners - IOUtils.close(engineHolder, globalCheckpointListeners, refreshListeners); + IOUtils.close(engine, globalCheckpointListeners, refreshListeners); indexShardOperationPermits.close(); } } @@ -1233,7 +1236,11 @@ public void prepareForIndexRecovery() { throw new IndexShardNotRecoveringException(shardId, state); } recoveryState.setStage(RecoveryState.Stage.INDEX); - assert getEngineOrNull() == null : "engine is open already"; + assert currentEngineReference.get() == null; + } + + public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { + getEngine().trimOperationsFromTranslog(operationPrimaryTerm, aboveSeqNo); } public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { @@ -1334,26 +1341,22 @@ private void innerOpenEngineAndTranslog() throws IOException { } } recoveryState.setStage(RecoveryState.Stage.TRANSLOG); - - final EngineConfig config = newEngineConfig(); - - // we disable deletes since we allow for operations to be executed against the shard while recovering - // but we need to make sure we don't loose deletes until we are done recovering - config.setEnableGcDeletes(false); // we have to set it before we open an engine and recover from the translog because // acquiring a snapshot from the translog causes a sync which causes the global checkpoint to be pulled in, // and an engine can be forced to close in ctor which also causes the global checkpoint to be pulled in. final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); replicationTracker.updateGlobalCheckpointOnReplica(globalCheckpoint, "read from translog checkpoint"); + + final EngineConfig config = newEngineConfig(); + // we disable deletes since we allow for operations to be executed against the shard while recovering + // but we need to make sure we don't loose deletes until we are done recovering + config.setEnableGcDeletes(false); synchronized (mutex) { - engineHolder.activateNewEngine(createNewEngine(config)); + assert currentEngineReference.get() == null : "engine is initialized already"; + currentEngineReference.set(createNewEngine(config)); } - verifyNotClosed(); - // We set active because we are now writing operations to the engine; this way, if we go idle after some time and become inactive, - // we still give sync'd flush a chance to run: - active.set(true); - assertSequenceNumbersInCommit(); + assert assertSequenceNumbersInCommit(); assert recoveryState.getStage() == RecoveryState.Stage.TRANSLOG : "TRANSLOG stage expected but was: " + recoveryState.getStage(); } @@ -1388,7 +1391,8 @@ public void performRecoveryRestart() throws IOException { throw new IndexShardNotRecoveringException(shardId, state); } assert refreshListeners.pendingCount() == 0 : "we can't restart with pending listeners"; - IOUtils.close(engineHolder); + final Engine engine = this.currentEngineReference.getAndSet(null); + IOUtils.close(engine); recoveryState().setStage(RecoveryState.Stage.INIT); } } @@ -1445,7 +1449,8 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn IndexShardState state = this.state; // one time volatile read if (origin.isRecovery()) { - if (state != IndexShardState.RECOVERING && isEngineResetting() == false) { + final boolean isResetting = getEngineOrNull() instanceof SearchOnlyEngine; + if (state != IndexShardState.RECOVERING && isResetting == false) { throw new IllegalIndexShardStateException(shardId, state, "operation only allowed when recovering, origin [" + origin + "]"); } } else { @@ -1999,7 +2004,7 @@ Engine getEngine() { * closed. */ protected Engine getEngineOrNull() { - return engineHolder.getActiveEngineOrNull(); + return this.currentEngineReference.get(); } public void startRecovery(RecoveryState recoveryState, PeerRecoveryTargetService recoveryTargetService, @@ -2149,6 +2154,7 @@ private Engine createNewEngine(EngineConfig config) throws IOException { final Engine engine = engineFactory.newReadWriteEngine(config); onNewEngine(engine); engine.onSettingsChanged(); + active.set(true); return engine; } @@ -2281,20 +2287,16 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g } else { localCheckpoint = currentGlobalCheckpoint; } - if (isEngineResetting()) { - engineHolder.closePendingEngine(); - logger.info("detected new primary with primary term [{}], global checkpoint [{}]; " + - "resetting pending engine", opPrimaryTerm, getLocalCheckpoint()); - } else { + logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", + opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); + if (localCheckpoint < getEngine().getSeqNoStats(localCheckpoint).getMaxSeqNo()) { sync(); - getEngine().flush(true, true); // force=true to make sure that we roll a translog generation - logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", - opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); + getEngine().flush(true, true); getEngine().resetLocalCheckpoint(localCheckpoint); - } - final Engine pendingEngine = maybeResetEngineToSafeCommit(); - if (pendingEngine != null) { - pendingEngine.recoverFromTranslog(localCheckpoint); + resetEngineUpToSeqNo(localCheckpoint); + } else { + getEngine().resetLocalCheckpoint(localCheckpoint); + getEngine().rollTranslogGeneration(); } }); } @@ -2642,157 +2644,31 @@ public void afterRefresh(boolean didRefresh) throws IOException { } } - public Engine.Result applyResyncOperation(Translog.Operation operation) throws IOException { - return applyTranslogOperation(engineHolder.getEngineForResync(), operation, Engine.Operation.Origin.REPLICA); - } - - /** - * This method is called after each bulk of primary-replica resync operations are applied to this shard - * via {@link #applyResyncOperation(Translog.Operation)}. - * - * @param maxSeqNoFromPrimary the maximum sequence number from the newly promoted primary - */ - public void afterApplyResyncOperationsBulk(long maxSeqNoFromPrimary) throws IOException { - if (maxSeqNoFromPrimary != UNASSIGNED_SEQ_NO) { - engineHolder.getEngineForResync().trimOperationsFromTranslog(operationPrimaryTerm, maxSeqNoFromPrimary); - engineHolder.adjustMinRequiredCheckpoint(maxSeqNoFromPrimary); - } - engineHolder.maybeActivatePendingEngine(); - } - - final boolean isEngineResetting() { - return engineHolder.hasPendingEngine(); - } - - final boolean canResetEngine() { - // TODO: do not reset the following shard - // other options to enable rollback on older indices: - // 1. send max_seq_no from the primary via replication requests - // 2. pass channel version when acquiring an index shard permit - return indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_4_0); - } - - private Engine maybeResetEngineToSafeCommit() throws IOException { + /** Rollback the current engine to the safe commit, the replay local translog up to the given {@code upToSeqNo} (inclusive) */ + private void resetEngineUpToSeqNo(long upToSeqNo) throws IOException { + assert getActiveOperationsCount() == 0 : "Ongoing writes [" + getActiveOperations() + "]"; + final Engine resettingEngine; synchronized (mutex) { - assert isEngineResetting() == false : "engine is being reset already"; + final Engine currentEngine = getEngine(); final long lastSyncedGlobalCheckpoint = getLastSyncedGlobalCheckpoint(); - final long currentMaxSeqNo = getEngine().getSeqNoStats(lastSyncedGlobalCheckpoint).getMaxSeqNo(); - // Do not reset the current engine if it does not have any stale operations - // or the primary is on an old version which does not send the max_seq_no from the primary during resync. - // we need max_seq_no from the primary to activate the resetting engine when its history aligned with primary's. - if (currentMaxSeqNo == lastSyncedGlobalCheckpoint || canResetEngine() == false) { - return null; - } else { - engineHolder.makeActiveEngineSearchOnly(); - final Engine pendingEngine = createNewEngine(newEngineConfig()); - engineHolder.setPendingEngine(pendingEngine, currentMaxSeqNo); - active.set(true); - return pendingEngine; - } - } - } - - private void completePendingEngineWithLocalHistory() throws IOException { - assert isEngineResetting() : "engine is not being reset"; - engineHolder.closePendingEngine(); - final Engine pendingEngine = maybeResetEngineToSafeCommit(); - assert pendingEngine != null : "pending engine is not reset"; - pendingEngine.recoverFromTranslog(Long.MAX_VALUE); - engineHolder.adjustMinRequiredCheckpoint(-1); - engineHolder.maybeActivatePendingEngine(); - assert isEngineResetting() == false : "engine was not reset with local history"; - } - - /** - * An {@link EngineHolder} holds one or two engine references and manages its transition during primary-replica resync. - */ - static final class EngineHolder implements Closeable { - private volatile Engine activeEngine; // allow to read without synchronization - private Engine pendingEngine; - // avoid losing acknowledged writes, only activate the pending engine when its local_checkpoint at least this guard. - private long minRequiredCheckpoint = NO_OPS_PERFORMED; - - synchronized void activateNewEngine(Engine newEngine) { - assert activeEngine == null : "engine is activated already"; - assert pendingEngine == null : "engine is being reset"; - this.activeEngine = newEngine; - } - - Engine getActiveEngineOrNull() { - return activeEngine; - } - - void makeActiveEngineSearchOnly() throws IOException { - Engine closing = null; - assert activeEngine != null : "engine is not activated yet"; - synchronized (this) { - if ((this.activeEngine instanceof SearchOnlyEngine) == false) { - final SeqNoStats seqNoStats = this.activeEngine.getSeqNoStats(activeEngine.getLastSyncedGlobalCheckpoint()); - final TranslogStats translogStats = this.activeEngine.getTranslogStats(); - closing = this.activeEngine; - this.activeEngine = new SearchOnlyEngine(this.activeEngine.config(), seqNoStats, translogStats); - } - } - IOUtils.close(closing); - } - - synchronized Engine getEngineForResync() { - final Engine target = pendingEngine != null ? pendingEngine : activeEngine; - if (target == null) { - throw new AlreadyClosedException("engine is closed"); - } - return target; - } - - synchronized boolean hasPendingEngine() { - return pendingEngine != null; - } - - synchronized void setPendingEngine(Engine pendingEngine, long minRequiredCheckpoint) { - assert this.activeEngine != null : "engine is not activated"; - assert this.pendingEngine == null : "engine is being reset already"; - this.pendingEngine = pendingEngine; - this.minRequiredCheckpoint = minRequiredCheckpoint; - } - - synchronized void adjustMinRequiredCheckpoint(long newMinRequiredCheckpoint) { - this.minRequiredCheckpoint = Math.min(minRequiredCheckpoint, newMinRequiredCheckpoint); - } - - void closePendingEngine() throws IOException { - final Engine closing; - synchronized (this) { - closing = this.pendingEngine; - this.pendingEngine = null; - } - IOUtils.close(closing); - } - - void maybeActivatePendingEngine() throws IOException { - Engine closing = null; - synchronized (this) { - if (pendingEngine != null && pendingEngine.getLocalCheckpoint() >= minRequiredCheckpoint) { - // make sure that all acknowledged writes are visible - pendingEngine.refresh("rollback"); - closing = this.activeEngine; - this.activeEngine = this.pendingEngine; - this.pendingEngine = null; - } - } - IOUtils.close(closing); - } - - @Override - public void close() throws IOException { - final Engine closingActive; - final Engine closingPending; - synchronized (this) { - closingActive = this.activeEngine; - this.activeEngine = null; - closingPending = this.pendingEngine; - this.pendingEngine = null; - } - IOUtils.close(closingActive, closingPending); - } + final SeqNoStats seqNoStats = currentEngine.getSeqNoStats(lastSyncedGlobalCheckpoint); + logger.info("resetting replica engine from max_seq_no [{}] to global checkpoint [{}]", + seqNoStats.getMaxSeqNo(), lastSyncedGlobalCheckpoint); + final TranslogStats translogStats = currentEngine.getTranslogStats(); + final SearchOnlyEngine searchOnlyEngine = new SearchOnlyEngine(currentEngine.config(), seqNoStats, translogStats); + IOUtils.close(currentEngineReference.getAndSet(searchOnlyEngine)); + maxSeqNoOfResettingEngine = seqNoStats.getMaxSeqNo(); + resettingEngine = createNewEngine(newEngineConfig()); + active.set(true); + } + resettingEngine.recoverFromTranslog(upToSeqNo); + // FIXME: The resetting engine might not contain all acknowledged writes in the previous engine. + // In order to not temporarily lose the visibility of any previous acknowledged writes, we should not activate + // the resetting engine until its local checkpoint is at least the max_seq_no of the previous engine. + // However, this delay will prevent the new acknowledged writes from being visible even with an + // acknowledged refresh. Other option is to make search and get unavailable until the resetting + // engine engine is ready. But this option would redirect all traffic to the primary. + // We should revisit a document-level rollback which does not suffer this limitation. + IOUtils.close(currentEngineReference.getAndSet(resettingEngine)); } } diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index e810e04614720..e82839856bb2f 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -654,7 +654,8 @@ public void testRollbackOnPromotion() throws Exception { } } if (indexed == numberOfReplicas) { - ackedDocIds.add(id); + // TODO: the current rollback impl only guarantees the visibility for acknowledged writes before global checkpoint. + // ackedDocIds.add(id); } if (randomBoolean()) { shards.syncGlobalCheckpoint(); diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index b042b40863455..a82659d80a46a 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -156,21 +156,16 @@ import static org.elasticsearch.test.hamcrest.RegexMatcher.matches; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.hasToString; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.isIn; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; -import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; -import static org.hamcrest.Matchers.sameInstance; /** * Simple unit-test IndexShard related operations. @@ -746,11 +741,6 @@ public void onFailure(Exception e) { ActionListener listener = new ActionListener() { @Override public void onResponse(Releasable releasable) { - try { - indexShard.afterApplyResyncOperationsBulk(newGlobalCheckPoint); - } catch (IOException e) { - throw new AssertionError(e); - } assertThat(indexShard.getPendingPrimaryTerm(), equalTo(newPrimaryTerm)); assertThat(TestTranslog.getCurrentTerm(getTranslog(indexShard)), equalTo(newPrimaryTerm)); assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint)); @@ -817,8 +807,7 @@ private void finish() { } else { assertTrue(onResponse.get()); assertNull(onFailure.get()); - assertThat(getTranslog(indexShard).getGeneration().translogFileGeneration, - either(equalTo(translogGen + 1)).or(equalTo(translogGen + 2))); + assertThat(getTranslog(indexShard).getGeneration().translogFileGeneration, greaterThanOrEqualTo(translogGen + 1)); assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint)); assertThat(indexShard.getGlobalCheckpoint(), equalTo(newGlobalCheckPoint)); } @@ -890,16 +879,18 @@ public void testGlobalCheckpointSync() throws IOException { closeShards(replicaShard, primaryShard); } - public void testRestoreLocalHistoryOnPromotion() throws IOException, InterruptedException { - final Version indexVersion = randomFrom(Version.V_5_5_0, Version.V_6_3_0, Version.V_6_4_0, Version.CURRENT); - final IndexShard indexShard = newStartedShard(false, indexVersion); - final int operations = scaledRandomIntBetween(1, 1000); + public void testRestoreHistoryFromTranslogOnPromotion() throws IOException, InterruptedException { + final IndexShard indexShard = newStartedShard(false); + final int operations = 1024 - scaledRandomIntBetween(0, 1024); indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - long maxSeqNo = indexShard.seqNoStats().getMaxSeqNo(); - final long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, indexShard.getLocalCheckpoint()); - final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, globalCheckpoint); + + final long maxSeqNo = indexShard.seqNoStats().getMaxSeqNo(); + final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test"); + final long globalCheckpoint = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); + final CountDownLatch latch = new CountDownLatch(1); + Set docIds = getShardDocUIDs(indexShard, true); indexShard.acquireReplicaOperationPermit( indexShard.getPendingPrimaryTerm() + 1, globalCheckpoint, @@ -916,14 +907,8 @@ public void onFailure(Exception e) { } }, ThreadPool.Names.SAME, ""); + latch.await(); - final boolean shouldReset = indexShard.canResetEngine() && globalCheckpoint < maxSeqNo; - assertThat(indexShard.isEngineResetting(), equalTo(shouldReset)); - if (shouldReset && randomBoolean()) { - // trimmed by the max_seqno on the primary - maxSeqNo = randomLongBetween(globalCheckpoint, maxSeqNo); - indexShard.afterApplyResyncOperationsBulk(maxSeqNo); - } final ShardRouting newRouting = indexShard.routingEntry().moveActiveReplicaToPrimary(); final CountDownLatch resyncLatch = new CountDownLatch(1); @@ -936,25 +921,35 @@ public void onFailure(Exception e) { new IndexShardRoutingTable.Builder(newRouting.shardId()).addShard(newRouting).build(), Collections.emptySet()); resyncLatch.await(); - assertThat(indexShard.isEngineResetting(), equalTo(false)); assertThat(indexShard.getLocalCheckpoint(), equalTo(maxSeqNo)); assertThat(indexShard.seqNoStats().getMaxSeqNo(), equalTo(maxSeqNo)); + assertThat(getShardDocUIDs(indexShard, true), equalTo(docIds)); closeShards(indexShard); } - public void testResetReplicaEngineOnPromotion() throws IOException, InterruptedException { + public void testRollbackReplicaEngineOnPromotion() throws IOException, InterruptedException { final IndexShard indexShard = newStartedShard(false); - - // most of the time this is large enough that most of the time there will be at least one gap - final int operations = 1024 - scaledRandomIntBetween(0, 1024); - indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - + final int operations = scaledRandomIntBetween(0, 1024); + for (int seqNo = 0; seqNo < operations; seqNo++) { + if (rarely()) { + continue; // gap in sequence number + } + final String id = Integer.toString(seqNo); + SourceToParse sourceToParse = SourceToParse.source(indexShard.shardId().getIndexName(), "_doc", id, + new BytesArray("{}"), XContentType.JSON); + indexShard.applyIndexOperationOnReplica(seqNo, 1, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, sourceToParse); + if (rarely()) { + indexShard.flush(new FlushRequest()); + } + if (rarely()) { + indexShard.getEngine().rollTranslogGeneration(); + } + } final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test"); final long globalCheckpoint = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); - - final Set lastDocIds = getShardDocUIDs(indexShard, true); - final Engine prevEngine = getEngine(indexShard); + Set docsBelowGlobalCheckpoint = getShardDocUIDs(indexShard).stream() + .filter(id -> Long.parseLong(id) <= Math.max(globalCheckpointOnReplica, globalCheckpoint)).collect(Collectors.toSet()); final CountDownLatch latch = new CountDownLatch(1); indexShard.acquireReplicaOperationPermit( indexShard.pendingPrimaryTerm + 1, @@ -974,25 +969,13 @@ public void onFailure(final Exception e) { ThreadPool.Names.SAME, ""); latch.await(); - if (globalCheckpointOnReplica == SequenceNumbers.UNASSIGNED_SEQ_NO && globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO) { + if (globalCheckpointOnReplica == SequenceNumbers.UNASSIGNED_SEQ_NO + && globalCheckpoint == SequenceNumbers.UNASSIGNED_SEQ_NO) { assertThat(indexShard.getLocalCheckpoint(), equalTo(SequenceNumbers.NO_OPS_PERFORMED)); } else { assertThat(indexShard.getLocalCheckpoint(), equalTo(Math.max(globalCheckpoint, globalCheckpointOnReplica))); } - assertThat(lastDocIds, everyItem(isIn(getShardDocUIDs(indexShard, false)))); - if (indexShard.seqNoStats().getMaxSeqNo() == indexShard.getGlobalCheckpoint()) { - assertThat(indexShard.isEngineResetting(), equalTo(false)); - assertThat(getEngine(indexShard), sameInstance(prevEngine)); - } else { - assertThat(indexShard.isEngineResetting(), equalTo(true)); - assertThat(getEngine(indexShard), not(sameInstance(prevEngine))); - indexShard.afterApplyResyncOperationsBulk( - randomLongBetween(indexShard.getLocalCheckpoint() + 1, indexShard.seqNoStats().getMaxSeqNo())); - assertThat(indexShard.isEngineResetting(), equalTo(true)); - indexShard.afterApplyResyncOperationsBulk( - randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, indexShard.getLocalCheckpoint())); - assertThat(indexShard.isEngineResetting(), equalTo(false)); - } + assertThat(getShardDocUIDs(indexShard), equalTo(docsBelowGlobalCheckpoint)); // ensure that after the local checkpoint throw back and indexing again, the local checkpoint advances final Result result = indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(indexShard.getLocalCheckpoint())); assertThat(indexShard.getLocalCheckpoint(), equalTo((long) result.localCheckpoint)); @@ -1000,33 +983,6 @@ public void onFailure(final Exception e) { closeShards(indexShard); } - public void testDoNoResetEngineOnOldVersions() throws Exception { - Version oldVersion = VersionUtils.randomVersionBetween(random(), null, VersionUtils.getPreviousVersion(Version.V_6_4_0)); - IndexShard shard = newStartedShard(false, oldVersion); - assertThat(shard.canResetEngine(), equalTo(false)); - indexOnReplicaWithGaps(shard, between(5, 50), Math.toIntExact(SequenceNumbers.NO_OPS_PERFORMED)); - long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, shard.getLocalCheckpoint()); - Engine engine = shard.getEngine(); - CountDownLatch acquired = new CountDownLatch(1); - shard.acquireReplicaOperationPermit(shard.getPendingPrimaryTerm() + 1, globalCheckpoint, new ActionListener() { - @Override - public void onResponse(Releasable releasable) { - releasable.close(); - acquired.countDown(); - } - @Override - public void onFailure(Exception e) { - } - }, ThreadPool.Names.SAME, ""); - acquired.await(); - assertThat(shard.getEngine(), sameInstance(engine)); - assertThat(shard.isEngineResetting(), equalTo(false)); - shard.afterApplyResyncOperationsBulk(randomLongBetween(globalCheckpoint, globalCheckpoint + 5)); - assertThat(shard.getEngine(), sameInstance(engine)); - assertThat(shard.isEngineResetting(), equalTo(false)); - closeShards(shard); - } - public void testConcurrentTermIncreaseOnReplicaShard() throws BrokenBarrierException, InterruptedException, IOException { final IndexShard indexShard = newStartedShard(false); @@ -1883,7 +1839,9 @@ public void testRecoverFromStoreRemoveStaleOperations() throws Exception { SourceToParse.source(indexName, "_doc", "doc-1", new BytesArray("{}"), XContentType.JSON)); flushShard(shard); assertThat(getShardDocUIDs(shard), containsInAnyOrder("doc-0", "doc-1")); - // Simulate resync (without rollback): Noop #1, index #2 + // This test tries to assert that a store recovery recovers from the safe commit the replays only valid translog operations. + // Here we try to put stale operations which is no longer possible since we rollback on a primary fail-over. + shard.pendingPrimaryTerm += 1; // primary fail-over without rollback shard.markSeqNoAsNoop(1, "test"); shard.applyIndexOperationOnReplica(2, 1, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, SourceToParse.source(indexName, "_doc", "doc-2", new BytesArray("{}"), XContentType.JSON)); @@ -2720,9 +2678,6 @@ private Result indexOnReplicaWithGaps( } else { gap = true; } - if (rarely()) { - indexShard.flush(new FlushRequest()); - } } assert localCheckpoint == indexShard.getLocalCheckpoint(); assert !gap || (localCheckpoint != max); @@ -3227,21 +3182,4 @@ public void testOnCloseStats() throws IOException { } - private IndexShard newStartedShard(boolean primary, Version indexCreatedVersion) throws IOException { - Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexCreatedVersion) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).build(); - ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId("index", "_na_", 0), randomAlphaOfLength(10), primary, - ShardRoutingState.INITIALIZING, - primary ? RecoverySource.StoreRecoverySource.EMPTY_STORE_INSTANCE : RecoverySource.PeerRecoverySource.INSTANCE); - IndexMetaData.Builder metaData = IndexMetaData.builder(shardRouting.getIndexName()) - .settings(settings).primaryTerm(0, randomLongBetween(1, 1000)).putMapping("_doc", "{ \"properties\": {} }"); - IndexShard shard = newShard(shardRouting, metaData.build()); - if (primary) { - recoverShardFromStore(shard); - } else { - recoveryEmptyReplica(shard, true); - } - return shard; - } - } diff --git a/test/framework/src/main/java/org/elasticsearch/index/replication/ESIndexLevelReplicationTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/replication/ESIndexLevelReplicationTestCase.java index 77bc644909ab8..9b63f0d233e09 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/replication/ESIndexLevelReplicationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/replication/ESIndexLevelReplicationTestCase.java @@ -728,7 +728,7 @@ protected PrimaryResult performOnPrimary(IndexShard primary, ResyncReplicationRe @Override protected void performOnReplica(ResyncReplicationRequest request, IndexShard replica) throws Exception { - executeResyncOnReplica(replica, request); + executeResyncOnReplica(replica, request, getPrimaryShard().getPendingPrimaryTerm(), getPrimaryShard().getGlobalCheckpoint()); } } @@ -741,8 +741,15 @@ private TransportWriteAction.WritePrimaryResult acquirePermitFuture = new PlainActionFuture<>(); + replica.acquireReplicaOperationPermit( + operationPrimaryTerm, globalCheckpointOnPrimary, acquirePermitFuture, ThreadPool.Names.SAME, request); + try (Releasable ignored = acquirePermitFuture.actionGet()) { + location = TransportResyncReplicationAction.performOnReplica(request, replica); + } TransportWriteActionTestHelper.performPostWriteActions(replica, request, location, logger); } } From ef98712117314303dfa6fd5ca43ce3958601395f Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 23 Aug 2018 21:12:11 -0400 Subject: [PATCH 13/17] retry on swap --- .../elasticsearch/index/shard/IndexShard.java | 42 +++++++--- .../index/engine/SearchOnlyEngineTests.java | 8 +- .../RecoveryDuringReplicationTests.java | 26 +++--- .../index/shard/IndexShardTests.java | 83 +++++++++++++++---- .../index/engine/EngineTestCase.java | 26 +++--- .../index/shard/IndexShardTestCase.java | 7 +- 6 files changed, 139 insertions(+), 53 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index d68260ebeb66c..221e508562463 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -157,6 +157,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.Consumer; +import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -503,8 +504,11 @@ public void updateShardState(final ShardRouting newRouting, * replaying the translog and marking any operations there are completed. */ if (seqNoStats().getLocalCheckpoint() < maxSeqNoOfResettingEngine) { + // The engine was reset before but hasn't restored all existing operations yet. + // We need to reset the engine again with all the local history. getEngine().flush(); resetEngineUpToSeqNo(Long.MAX_VALUE); + maxSeqNoOfResettingEngine = SequenceNumbers.NO_OPS_PERFORMED; } else { getEngine().restoreLocalCheckpointFromTranslog(); } @@ -842,8 +846,7 @@ private Engine.DeleteResult delete(Engine engine, Engine.Delete delete) throws I } public Engine.GetResult get(Engine.Get get) { - readAllowed(); - return getEngine().get(get, this::acquireSearcher); + return accessEngineAndRetryOnSwap(engine -> engine.get(get, this::acquireSearcher)); } /** @@ -1172,9 +1175,7 @@ private void markSearcherAccessed() { } private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) { - readAllowed(); - final Engine engine = getEngine(); - final Engine.Searcher searcher = engine.acquireSearcher(source, scope); + final Engine.Searcher searcher = accessEngineAndRetryOnSwap(engine -> engine.acquireSearcher(source, scope)); boolean success = false; try { final Engine.Searcher wrappedSearcher = searcherWrapper == null ? searcher : searcherWrapper.wrap(searcher); @@ -1190,6 +1191,26 @@ private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scop } } + private T accessEngineAndRetryOnSwap(Function accessor) { + // Retry only occurs when we are swapping the engine during the primary-replica resync. + // We limit the number of retries to 10 as a safe guard - this should happen at most twice. + int remained = 10; + while (true) { + remained--; + readAllowed(); + Engine engine = getEngine(); + try { + return accessor.apply(engine); + } catch (AlreadyClosedException e) { + if (engine != getEngine() && remained > 0) { + logger.debug("retrying as engine was swapped - remained=" + remained); + }else { + throw e; + } + } + } + } + public void close(String reason, boolean flushEngine) throws IOException { synchronized (mutex) { try { @@ -2290,7 +2311,6 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); if (localCheckpoint < getEngine().getSeqNoStats(localCheckpoint).getMaxSeqNo()) { - sync(); getEngine().flush(true, true); getEngine().resetLocalCheckpoint(localCheckpoint); resetEngineUpToSeqNo(localCheckpoint); @@ -2645,15 +2665,15 @@ public void afterRefresh(boolean didRefresh) throws IOException { } /** Rollback the current engine to the safe commit, the replay local translog up to the given {@code upToSeqNo} (inclusive) */ - private void resetEngineUpToSeqNo(long upToSeqNo) throws IOException { + void resetEngineUpToSeqNo(long upToSeqNo) throws IOException { assert getActiveOperationsCount() == 0 : "Ongoing writes [" + getActiveOperations() + "]"; final Engine resettingEngine; + sync(); // persist the global checkpoint which will be used to trim unsafe commits synchronized (mutex) { final Engine currentEngine = getEngine(); - final long lastSyncedGlobalCheckpoint = getLastSyncedGlobalCheckpoint(); - final SeqNoStats seqNoStats = currentEngine.getSeqNoStats(lastSyncedGlobalCheckpoint); - logger.info("resetting replica engine from max_seq_no [{}] to global checkpoint [{}]", - seqNoStats.getMaxSeqNo(), lastSyncedGlobalCheckpoint); + final SeqNoStats seqNoStats = currentEngine.getSeqNoStats(getGlobalCheckpoint()); + logger.info("resetting replica engine from max_seq_no [{}] to seq_no [{}] global checkpoint [{}]", + seqNoStats.getMaxSeqNo(), upToSeqNo, seqNoStats.getGlobalCheckpoint()); final TranslogStats translogStats = currentEngine.getTranslogStats(); final SearchOnlyEngine searchOnlyEngine = new SearchOnlyEngine(currentEngine.config(), seqNoStats, translogStats); IOUtils.close(currentEngineReference.getAndSet(searchOnlyEngine)); diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java index ef8ba9b0b41e9..a9336c812f3fe 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -64,7 +64,9 @@ public void testSearchOnlyEngine() throws Exception { lastDocIds = getDocIds(engine, true); assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); assertThat(searchOnlyEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(searchOnlyEngine, false), equalTo(lastDocIds)); + try (Engine.Searcher searcher = searchOnlyEngine.acquireSearcher("test")) { + assertThat(getDocIds(searcher), equalTo(lastDocIds)); + } for (int i = 0; i < numDocs; i++) { if (randomBoolean()) { String delId = Integer.toString(i); @@ -77,7 +79,9 @@ public void testSearchOnlyEngine() throws Exception { // the locked down engine should still point to the previous commit assertThat(searchOnlyEngine.getLocalCheckpoint(), equalTo(lastSeqNoStats.getLocalCheckpoint())); assertThat(searchOnlyEngine.getSeqNoStats(globalCheckpoint.get()).getMaxSeqNo(), equalTo(lastSeqNoStats.getMaxSeqNo())); - assertThat(getDocIds(searchOnlyEngine, false), equalTo(lastDocIds)); + try (Engine.Searcher searcher = searchOnlyEngine.acquireSearcher("test")) { + assertThat(getDocIds(searcher), equalTo(lastDocIds)); + } } // Close and reopen the main engine trimUnsafeCommits(config); diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index e82839856bb2f..f84e7e9419cac 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -22,7 +22,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; -import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.index.Term; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.bulk.BulkShardRequest; import org.elasticsearch.action.index.IndexRequest; @@ -39,9 +39,11 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.EngineConfig; import org.elasticsearch.index.engine.EngineFactory; +import org.elasticsearch.index.engine.EngineTestCase; import org.elasticsearch.index.engine.InternalEngineFactory; import org.elasticsearch.index.engine.InternalEngineTests; import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.IndexShardTestCase; import org.elasticsearch.index.shard.PrimaryReplicaSyncer; @@ -665,34 +667,36 @@ public void testRollbackOnPromotion() throws Exception { } } shards.refresh("test"); + CountDownLatch latch = new CountDownLatch(numberOfReplicas); for (int i = 0; i < numberOfReplicas; i++) { IndexShard replica = shards.getReplicas().get(i); Thread thread = new Thread(() -> { - // should fail at most twice with two transitions: normal engine -> read-only engine -> resetting engine - long hitClosedExceptions = 0; + latch.countDown(); while (isDone.get() == false) { - try { - Set docIds = getShardDocUIDs(replica, false); + try (Engine.Searcher searcher = replica.acquireSearcher("test")) { + Set docIds = EngineTestCase.getDocIds(searcher); assertThat(ackedDocIds, everyItem(isIn(docIds))); - assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); - } catch (AlreadyClosedException e) { - hitClosedExceptions++; } catch (IOException e) { throw new AssertionError(e); } + for (String id : randomSubsetOf(ackedDocIds)) { + try (Engine.GetResult getResult = replica.get( + new Engine.Get(randomBoolean(), randomBoolean(), "type", id, new Term("_id", Uid.encodeId(id))))) { + assertThat("doc [" + id + "] not found", getResult.exists(), equalTo(true)); + } + } + assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); } - assertThat(hitClosedExceptions, lessThanOrEqualTo(2L)); }); threads.add(thread); thread.start(); } + latch.await(); shards.promoteReplicaToPrimary(newPrimary).get(); shards.assertAllEqual(initDocs + extraDocsOnNewPrimary); int moreDocs = shards.indexDocs(scaledRandomIntBetween(1, 10)); shards.assertAllEqual(initDocs + extraDocsOnNewPrimary + moreDocs); isDone.set(true); // stop before closing shards to have an accurate "AlreadyClosedException" count - } finally { - isDone.set(true); for (Thread thread : threads) { thread.join(); } diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index a82659d80a46a..8763ee2dcfbbf 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -157,11 +157,13 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.hasToString; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.isIn; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.notNullValue; @@ -929,22 +931,7 @@ public void onFailure(Exception e) { public void testRollbackReplicaEngineOnPromotion() throws IOException, InterruptedException { final IndexShard indexShard = newStartedShard(false); - final int operations = scaledRandomIntBetween(0, 1024); - for (int seqNo = 0; seqNo < operations; seqNo++) { - if (rarely()) { - continue; // gap in sequence number - } - final String id = Integer.toString(seqNo); - SourceToParse sourceToParse = SourceToParse.source(indexShard.shardId().getIndexName(), "_doc", id, - new BytesArray("{}"), XContentType.JSON); - indexShard.applyIndexOperationOnReplica(seqNo, 1, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, sourceToParse); - if (rarely()) { - indexShard.flush(new FlushRequest()); - } - if (rarely()) { - indexShard.getEngine().rollTranslogGeneration(); - } - } + indexOnReplicaWithGaps(indexShard, scaledRandomIntBetween(0, 1024), Math.toIntExact(indexShard.getLocalCheckpoint())); final long globalCheckpointOnReplica = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test"); final long globalCheckpoint = randomLongBetween(SequenceNumbers.UNASSIGNED_SEQ_NO, indexShard.getLocalCheckpoint()); @@ -977,7 +964,7 @@ public void onFailure(final Exception e) { } assertThat(getShardDocUIDs(indexShard), equalTo(docsBelowGlobalCheckpoint)); // ensure that after the local checkpoint throw back and indexing again, the local checkpoint advances - final Result result = indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(indexShard.getLocalCheckpoint())); + final Result result = indexOnReplicaWithGaps(indexShard, between(0, 100), Math.toIntExact(indexShard.getLocalCheckpoint())); assertThat(indexShard.getLocalCheckpoint(), equalTo((long) result.localCheckpoint)); closeShards(indexShard); @@ -2678,6 +2665,9 @@ private Result indexOnReplicaWithGaps( } else { gap = true; } + if (rarely()) { + indexShard.flush(new FlushRequest()); + } } assert localCheckpoint == indexShard.getLocalCheckpoint(); assert !gap || (localCheckpoint != max); @@ -3182,4 +3172,63 @@ public void testOnCloseStats() throws IOException { } + public void testResetEngine() throws Exception { + IndexShard replica = newStartedShard(false); + indexOnReplicaWithGaps(replica, between(0, 1000), Math.toIntExact(replica.getLocalCheckpoint())); + long globalCheckpoint = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, replica.getLocalCheckpoint()); + replica.updateGlobalCheckpointOnReplica(globalCheckpoint, "test"); + Set allDocs = getShardDocUIDs(replica); + Thread[] searchers = new Thread[between(1, 4)]; + AtomicBoolean done = new AtomicBoolean(); + AtomicLong ackedSeqNo = new AtomicLong(globalCheckpoint); + CountDownLatch latch = new CountDownLatch(searchers.length); + for (int i = 0; i < searchers.length; i++) { + searchers[i] = new Thread(() -> { + latch.countDown(); + while (done.get() == false) { + Set ackedDocs = allDocs.stream() + .filter(id -> Long.parseLong(id) <= ackedSeqNo.get()).collect(Collectors.toSet()); + try (Engine.Searcher searcher = replica.acquireSearcher("test")) { + Set docIds = EngineTestCase.getDocIds(searcher); + assertThat(ackedDocs, everyItem(isIn(docIds))); + } catch (IOException e) { + throw new AssertionError(e); + } + for (String id : randomSubsetOf(ackedDocs)) { + Engine.Get get = new Engine.Get(randomBoolean(), randomBoolean(), "_doc", id, new Term("_id", Uid.encodeId(id))); + try (Engine.GetResult getResult = replica.get(get)) { + assertThat("doc [" + id + "] not found" + ackedSeqNo, getResult.exists(), equalTo(true)); + } + } + } + }); + searchers[i].start(); + } + latch.await(); + int iterations = between(1, 10); + for (int i = 0; i < iterations; i++) { + globalCheckpoint = randomLongBetween(replica.getGlobalCheckpoint(), replica.getLocalCheckpoint()); + replica.updateGlobalCheckpointOnReplica(globalCheckpoint, "test"); + long upToSeqNo = randomLongBetween(globalCheckpoint, replica.seqNoStats().getMaxSeqNo()); + logger.debug("resetting from {} to {}", replica.seqNoStats().getMaxSeqNo(), upToSeqNo); + replica.flush(new FlushRequest()); // we flush before reset in the production. + replica.resetEngineUpToSeqNo(upToSeqNo); + ackedSeqNo.set(globalCheckpoint); // expose to the background threads + Set expectedDocs = getShardDocUIDs(replica).stream() + .filter(id -> Long.parseLong(id) <= upToSeqNo).collect(Collectors.toSet()); + assertThat(getShardDocUIDs(replica), equalTo(expectedDocs)); + if (randomBoolean()) { + replica.resetEngineUpToSeqNo(Long.MAX_VALUE); + replica.getEngine().fillSeqNoGaps(replica.pendingPrimaryTerm); + if (randomBoolean()) { + indexOnReplicaWithGaps(replica, between(0, 100), Math.toIntExact(replica.getLocalCheckpoint())); + } + } + } + done.set(true); + for (Thread searcher : searchers) { + searcher.join(); + } + closeShards(replica); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 257ee0d87cb97..49623da15dd58 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -520,20 +520,24 @@ public static Set getDocIds(Engine engine, boolean refresh) throws IOExc engine.refresh("test"); } try (Engine.Searcher searcher = engine.acquireSearcher("test")) { - Set ids = new HashSet<>(); - for (LeafReaderContext leafContext : searcher.reader().leaves()) { - LeafReader reader = leafContext.reader(); - Bits liveDocs = reader.getLiveDocs(); - for (int i = 0; i < reader.maxDoc(); i++) { - if (liveDocs == null || liveDocs.get(i)) { - Document uuid = reader.document(i, Collections.singleton(IdFieldMapper.NAME)); - BytesRef binaryID = uuid.getBinaryValue(IdFieldMapper.NAME); - ids.add(Uid.decodeId(Arrays.copyOfRange(binaryID.bytes, binaryID.offset, binaryID.offset + binaryID.length))); - } + return getDocIds(searcher); + } + } + + public static Set getDocIds(Engine.Searcher searcher) throws IOException { + Set ids = new HashSet<>(); + for (LeafReaderContext leafContext : searcher.reader().leaves()) { + LeafReader reader = leafContext.reader(); + Bits liveDocs = reader.getLiveDocs(); + for (int i = 0; i < reader.maxDoc(); i++) { + if (liveDocs == null || liveDocs.get(i)) { + Document uuid = reader.document(i, Collections.singleton(IdFieldMapper.NAME)); + BytesRef binaryID = uuid.getBinaryValue(IdFieldMapper.NAME); + ids.add(Uid.decodeId(Arrays.copyOfRange(binaryID.bytes, binaryID.offset, binaryID.offset + binaryID.length))); } } - return ids; } + return ids; } static void trimUnsafeCommits(EngineConfig config) throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java index 09925a687907c..b5a68b58fc25f 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/IndexShardTestCase.java @@ -575,7 +575,12 @@ private Store.MetadataSnapshot getMetadataSnapshotOrEmpty(IndexShard replica) th } public static Set getShardDocUIDs(final IndexShard shard, boolean refresh) throws IOException { - return EngineTestCase.getDocIds(shard.getEngine(), refresh); + if (refresh) { + shard.refresh("test"); + } + try (Engine.Searcher searcher = shard.acquireSearcher("test")) { + return EngineTestCase.getDocIds(searcher); + } } public static Set getShardDocUIDs(final IndexShard shard) throws IOException { From 021f94b163546c9d78a75693fd41f1d300dedad3 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 24 Aug 2018 01:09:01 -0400 Subject: [PATCH 14/17] minimize changes --- .../java/org/elasticsearch/index/shard/IndexShard.java | 1 - .../elasticsearch/index/engine/InternalEngineTests.java | 9 +++++++++ .../index/engine/SearchOnlyEngineTests.java | 2 +- .../org/elasticsearch/index/shard/IndexShardTests.java | 1 + .../org/elasticsearch/index/engine/EngineTestCase.java | 9 --------- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 221e508562463..e3eb5a2ff6feb 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -508,7 +508,6 @@ public void updateShardState(final ShardRouting newRouting, // We need to reset the engine again with all the local history. getEngine().flush(); resetEngineUpToSeqNo(Long.MAX_VALUE); - maxSeqNoOfResettingEngine = SequenceNumbers.NO_OPS_PERFORMED; } else { getEngine().restoreLocalCheckpointFromTranslog(); } diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 9bc05fd717c19..0580a4ce825be 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -4768,6 +4768,15 @@ public void testTrimUnsafeCommits() throws Exception { } } + static void trimUnsafeCommits(EngineConfig config) throws IOException { + final Store store = config.getStore(); + final TranslogConfig translogConfig = config.getTranslogConfig(); + final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); + final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); + final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); + store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); + } + void assertLuceneOperations(InternalEngine engine, long expectedAppends, long expectedUpdates, long expectedDeletes) { String message = "Lucene operations mismatched;" + " appends [actual:" + engine.getNumDocAppends() + ", expected:" + expectedAppends + "]," + diff --git a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java index a9336c812f3fe..5fe1c9edc3167 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SearchOnlyEngineTests.java @@ -84,7 +84,7 @@ public void testSearchOnlyEngine() throws Exception { } } // Close and reopen the main engine - trimUnsafeCommits(config); + InternalEngineTests.trimUnsafeCommits(config); try (InternalEngine recoveringEngine = new InternalEngine(config)) { recoveringEngine.recoverFromTranslog(Long.MAX_VALUE); // the locked down engine should still point to the previous commit diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 8763ee2dcfbbf..8a93d90f6af5a 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -3218,6 +3218,7 @@ public void testResetEngine() throws Exception { .filter(id -> Long.parseLong(id) <= upToSeqNo).collect(Collectors.toSet()); assertThat(getShardDocUIDs(replica), equalTo(expectedDocs)); if (randomBoolean()) { + // simulate a primary promotion replica.resetEngineUpToSeqNo(Long.MAX_VALUE); replica.getEngine().fillSeqNoGaps(replica.pendingPrimaryTerm); if (randomBoolean()) { diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 49623da15dd58..8dff5bda97d46 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -539,13 +539,4 @@ public static Set getDocIds(Engine.Searcher searcher) throws IOException } return ids; } - - static void trimUnsafeCommits(EngineConfig config) throws IOException { - final Store store = config.getStore(); - final TranslogConfig translogConfig = config.getTranslogConfig(); - final String translogUUID = store.readLastCommittedSegmentsInfo().getUserData().get(Translog.TRANSLOG_UUID_KEY); - final long globalCheckpoint = Translog.readGlobalCheckpoint(translogConfig.getTranslogPath(), translogUUID); - final long minRetainedTranslogGen = Translog.readMinTranslogGeneration(translogConfig.getTranslogPath(), translogUUID); - store.trimUnsafeCommits(globalCheckpoint, minRetainedTranslogGen, config.getIndexSettings().getIndexVersionCreated()); - } } From 607535df70bc2585dd08339e5bc02375b797f0aa Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 24 Aug 2018 01:26:04 -0400 Subject: [PATCH 15/17] add flush assertion --- .../src/main/java/org/elasticsearch/index/shard/IndexShard.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index e3eb5a2ff6feb..b0445d122d87c 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -2666,6 +2666,8 @@ public void afterRefresh(boolean didRefresh) throws IOException { /** Rollback the current engine to the safe commit, the replay local translog up to the given {@code upToSeqNo} (inclusive) */ void resetEngineUpToSeqNo(long upToSeqNo) throws IOException { assert getActiveOperationsCount() == 0 : "Ongoing writes [" + getActiveOperations() + "]"; + assert SequenceNumbers.loadSeqNoInfoFromLuceneCommit(commitStats().getUserData().entrySet()).maxSeqNo == seqNoStats().getMaxSeqNo() + : "engine must be flushed before reset; max_seq_no[" + seqNoStats() + "] commit[" + commitStats().getUserData() + "]"; final Engine resettingEngine; sync(); // persist the global checkpoint which will be used to trim unsafe commits synchronized (mutex) { From b0ac93ad2ebd896afbc65890c4429dd9e488f1ec Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 30 Aug 2018 10:13:14 -0400 Subject: [PATCH 16/17] backout retry-on-swap change --- .../elasticsearch/index/shard/IndexShard.java | 28 ++++-------------- .../RecoveryDuringReplicationTests.java | 29 ++++++++++++------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index b0445d122d87c..309d124245a7d 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -157,7 +157,6 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.Consumer; -import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -845,7 +844,8 @@ private Engine.DeleteResult delete(Engine engine, Engine.Delete delete) throws I } public Engine.GetResult get(Engine.Get get) { - return accessEngineAndRetryOnSwap(engine -> engine.get(get, this::acquireSearcher)); + readAllowed(); + return getEngine().get(get, this::acquireSearcher); } /** @@ -1174,7 +1174,9 @@ private void markSearcherAccessed() { } private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) { - final Engine.Searcher searcher = accessEngineAndRetryOnSwap(engine -> engine.acquireSearcher(source, scope)); + readAllowed(); + final Engine engine = getEngine(); + final Engine.Searcher searcher = engine.acquireSearcher(source, scope); boolean success = false; try { final Engine.Searcher wrappedSearcher = searcherWrapper == null ? searcher : searcherWrapper.wrap(searcher); @@ -1190,26 +1192,6 @@ private Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scop } } - private T accessEngineAndRetryOnSwap(Function accessor) { - // Retry only occurs when we are swapping the engine during the primary-replica resync. - // We limit the number of retries to 10 as a safe guard - this should happen at most twice. - int remained = 10; - while (true) { - remained--; - readAllowed(); - Engine engine = getEngine(); - try { - return accessor.apply(engine); - } catch (AlreadyClosedException e) { - if (engine != getEngine() && remained > 0) { - logger.debug("retrying as engine was swapped - remained=" + remained); - }else { - throw e; - } - } - } - } - public void close(String reason, boolean flushEngine) throws IOException { synchronized (mutex) { try { diff --git a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java index f84e7e9419cac..8cc374a725032 100644 --- a/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java +++ b/server/src/test/java/org/elasticsearch/index/replication/RecoveryDuringReplicationTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.bulk.BulkShardRequest; import org.elasticsearch.action.index.IndexRequest; @@ -672,21 +673,27 @@ public void testRollbackOnPromotion() throws Exception { IndexShard replica = shards.getReplicas().get(i); Thread thread = new Thread(() -> { latch.countDown(); + int hitClosedException = 0; while (isDone.get() == false) { - try (Engine.Searcher searcher = replica.acquireSearcher("test")) { - Set docIds = EngineTestCase.getDocIds(searcher); - assertThat(ackedDocIds, everyItem(isIn(docIds))); - } catch (IOException e) { - throw new AssertionError(e); - } - for (String id : randomSubsetOf(ackedDocIds)) { - try (Engine.GetResult getResult = replica.get( - new Engine.Get(randomBoolean(), randomBoolean(), "type", id, new Term("_id", Uid.encodeId(id))))) { - assertThat("doc [" + id + "] not found", getResult.exists(), equalTo(true)); + try { + try (Engine.Searcher searcher = replica.acquireSearcher("test")) { + Set docIds = EngineTestCase.getDocIds(searcher); + assertThat(ackedDocIds, everyItem(isIn(docIds))); } + for (String id : randomSubsetOf(ackedDocIds)) { + try (Engine.GetResult getResult = replica.get( + new Engine.Get(randomBoolean(), randomBoolean(), "type", id, new Term("_id", Uid.encodeId(id))))) { + assertThat("doc [" + id + "] not found", getResult.exists(), equalTo(true)); + } + } + assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); + } catch (AlreadyClosedException e) { + hitClosedException++; + } catch (Exception e) { + throw new AssertionError(e); } - assertThat(replica.getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L)); } + assertThat(hitClosedException, lessThanOrEqualTo(2)); }); threads.add(thread); thread.start(); From 0f19a2887f5a357e67b56a8f75e761ec26eaee06 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 2 Sep 2018 17:26:33 -0400 Subject: [PATCH 17/17] feedback --- .../index/engine/SearchOnlyEngine.java | 31 +++++++++------ .../elasticsearch/index/shard/IndexShard.java | 39 ++++++++++++------- .../indices/recovery/RecoveryState.java | 12 ------ .../index/shard/IndexShardTests.java | 1 + 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java index dfa695f6fdfac..959f2b52b19a4 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SearchOnlyEngine.java @@ -21,11 +21,13 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SoftDeletesDirectoryReaderWrapper; +import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.concurrent.ReleasableLock; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.index.mapper.MapperService; @@ -57,12 +59,15 @@ public SearchOnlyEngine(EngineConfig config, SeqNoStats seqNoStats, TranslogStat this.translogStats = translogStats; try { store.incRef(); - ElasticsearchDirectoryReader reader = null; + DirectoryReader reader = null; boolean success = false; try { this.lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); - reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(store.directory()), config.getShardId()); - this.searcherManager = new SearcherManager(reader, new RamAccountingSearcherFactory(config.getCircuitBreakerService())); + reader = DirectoryReader.open(store.directory()); + if (config.getIndexSettings().isSoftDeleteEnabled()) { + reader = new SoftDeletesDirectoryReaderWrapper(reader, Lucene.SOFT_DELETES_FIELD); + } + this.searcherManager = new SearcherManager(reader, new SearcherFactory()); success = true; } finally { if (success == false) { @@ -100,6 +105,7 @@ public GetResult get(Get get, BiFunction search @Override public Searcher acquireSearcher(String source, SearcherScope scope) throws EngineException { + ensureOpen(); Releasable releasable = null; try (ReleasableLock ignored = readLock.acquire()) { store.incRef(); @@ -169,12 +175,12 @@ public boolean ensureTranslogSynced(Stream locations) { @Override public void syncTranslog() { - ensureUnsupportedMethodNeverCalled(); + // noop } @Override public Closeable acquireRetentionLockForPeerRecovery() { - return null; + return ensureUnsupportedMethodNeverCalled(); } @Override @@ -205,7 +211,8 @@ public TranslogStats getTranslogStats() { @Override public Translog.Location getTranslogLastWriteLocation() { - return ensureUnsupportedMethodNeverCalled(); + // noop - returns null as the caller treats null as noop. + return null; } @Override @@ -245,7 +252,7 @@ public List segments(boolean verbose) { @Override public void refresh(String source) throws EngineException { - ensureUnsupportedMethodNeverCalled(); + // noop } @Override @@ -260,17 +267,17 @@ public boolean shouldPeriodicallyFlush() { @Override public SyncedFlushResult syncFlush(String syncId, CommitId expectedCommitId) throws EngineException { - return ensureUnsupportedMethodNeverCalled(); + return SyncedFlushResult.PENDING_OPERATIONS; } @Override public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException { - return ensureUnsupportedMethodNeverCalled(); + return new CommitId(lastCommittedSegmentInfos.getId()); } @Override public CommitId flush() throws EngineException { - return ensureUnsupportedMethodNeverCalled(); + return flush(false, false); } @Override @@ -309,7 +316,7 @@ public boolean shouldRollTranslogGeneration() { @Override public void rollTranslogGeneration() throws EngineException { - ensureUnsupportedMethodNeverCalled(); + // noop } @Override diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 86502347336df..243267214b4a3 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -509,21 +509,23 @@ public void updateShardState(final ShardRouting newRouting, * numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by * replaying the translog and marking any operations there are completed. */ + Engine engine = getEngine(); if (seqNoStats().getLocalCheckpoint() < maxSeqNoOfResettingEngine) { // The engine was reset before but hasn't restored all existing operations yet. // We need to reset the engine again with all the local history. - getEngine().flush(); + engine.flush(); resetEngineUpToSeqNo(Long.MAX_VALUE); + engine = getEngine(); // engine was swapped } else { - getEngine().restoreLocalCheckpointFromTranslog(); + engine.restoreLocalCheckpointFromTranslog(); } /* Rolling the translog generation is not strictly needed here (as we will never have collisions between * sequence numbers in a translog generation in a new primary as it takes the last known sequence number * as a starting point), but it simplifies reasoning about the relationship between primary terms and * translog generations. */ - getEngine().rollTranslogGeneration(); - getEngine().fillSeqNoGaps(newPrimaryTerm); + engine.rollTranslogGeneration(); + engine.fillSeqNoGaps(newPrimaryTerm); replicationTracker.updateLocalCheckpoint(currentRouting.allocationId().getId(), getLocalCheckpoint()); primaryReplicaSyncer.accept(this, new ActionListener() { @Override @@ -1287,8 +1289,11 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o // package-private for testing int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOException { - recoveryState.getTranslog().setOrIncreaseTotalOperations(snapshot.totalOperations()); - recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); + final boolean isResetting = isEngineResetting(); + if (isResetting == false) { + recoveryState.getTranslog().totalOperations(snapshot.totalOperations()); + recoveryState.getTranslog().totalOperationsOnStart(snapshot.totalOperations()); + } int opsRecovered = 0; Translog.Operation operation; while ((operation = snapshot.next()) != null) { @@ -1307,7 +1312,9 @@ int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot) throws IOExce } opsRecovered++; - recoveryState.getTranslog().incrementRecoveredOperations(); + if (isResetting == false) { + recoveryState.getTranslog().incrementRecoveredOperations(); + } } catch (Exception e) { if (ExceptionsHelper.status(e) == RestStatus.BAD_REQUEST) { // mainly for MapperParsingException and Failure to detect xcontent @@ -1460,8 +1467,7 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn IndexShardState state = this.state; // one time volatile read if (origin.isRecovery()) { - final boolean isResetting = getEngineOrNull() instanceof SearchOnlyEngine; - if (state != IndexShardState.RECOVERING && isResetting == false) { + if (state != IndexShardState.RECOVERING && isEngineResetting() == false) { throw new IllegalIndexShardStateException(shardId, state, "operation only allowed when recovering, origin [" + origin + "]"); } } else { @@ -1477,6 +1483,10 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn } } + private boolean isEngineResetting() { + return getEngineOrNull() instanceof SearchOnlyEngine; + } + private boolean assertPrimaryMode() { assert shardRouting.primary() && replicationTracker.isPrimaryMode() : "shard " + shardRouting + " is not a primary shard in primary mode"; return true; @@ -2315,13 +2325,14 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g } logger.info("detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]", opPrimaryTerm, getLocalCheckpoint(), localCheckpoint); - if (localCheckpoint < getEngine().getSeqNoStats(localCheckpoint).getMaxSeqNo()) { - getEngine().flush(true, true); - getEngine().resetLocalCheckpoint(localCheckpoint); + Engine engine = getEngine(); + if (localCheckpoint < engine.getSeqNoStats(localCheckpoint).getMaxSeqNo()) { + engine.flush(true, true); + engine.resetLocalCheckpoint(localCheckpoint); resetEngineUpToSeqNo(localCheckpoint); } else { - getEngine().resetLocalCheckpoint(localCheckpoint); - getEngine().rollTranslogGeneration(); + engine.resetLocalCheckpoint(localCheckpoint); + engine.rollTranslogGeneration(); } }); } diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java b/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java index aed77f9e34ac4..0d57d9506628f 100644 --- a/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java +++ b/server/src/main/java/org/elasticsearch/indices/recovery/RecoveryState.java @@ -504,18 +504,6 @@ public synchronized void totalOperations(int total) { assert total == UNKNOWN || total >= recovered : "total, if known, should be > recovered. total [" + total + "], recovered [" + recovered + "]"; } - /** - * Sets or increases the total number of translog operations to be recovered by the given value. - */ - public synchronized void setOrIncreaseTotalOperations(int newOperations) { - if (total == UNKNOWN) { - this.total = newOperations; - } else { - this.total += newOperations; - } - assert total >= recovered : "total [" + total + "] < recovered [" + recovered + "]"; - } - /** * returns the total number of translog operations to recovered, on the start of the recovery. Unlike {@link #totalOperations} * this does change during recovery. diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index b84031a47a2d4..e8da87f69a23e 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -1851,6 +1851,7 @@ public void testRecoverFromStoreRemoveStaleOperations() throws Exception { assertThat(getShardDocUIDs(shard), containsInAnyOrder("doc-0", "doc-1", "doc-2")); // Recovering from store should discard doc #1 final ShardRouting replicaRouting = shard.routingEntry(); + shard.close("test", false); IndexShard newShard = reinitShard(shard, newShardRouting(replicaRouting.shardId(), replicaRouting.currentNodeId(), true, ShardRoutingState.INITIALIZING, RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE));