Skip to content

Commit f5d4550

Browse files
authored
Fold EngineDiskUtils into Store, for better lock semantics (#29156)
#28245 has introduced the utility class`EngineDiskUtils` with a set of methods to prepare/change translog and lucene commit points. That util class bundled everything that's needed to create and empty shard, bootstrap a shard from a lucene index that was just restored etc. In order to safely do these manipulations, the util methods acquired the IndexWriter's lock. That would sometime fail due to concurrent shard store fetching or other short activities that require the files not to be changed while they read from them. Since there is no way to wait on the index writer lock, the `Store` class has other locks to make sure that once we try to acquire the IW lock, it will succeed. To side step this waiting problem, this PR folds `EngineDiskUtils` into `Store`. Sadly this comes with a price - the store class doesn't and shouldn't know about the translog. As such the logic is slightly less tight and callers have to do the translog manipulations on their own.
1 parent a9392f6 commit f5d4550

File tree

14 files changed

+293
-388
lines changed

14 files changed

+293
-388
lines changed

docs/reference/indices/flush.asciidoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ which returns something similar to:
9393
{
9494
"commit" : {
9595
"id" : "3M3zkw2GHMo2Y4h4/KFKCg==",
96-
"generation" : 3,
96+
"generation" : 4,
9797
"user_data" : {
9898
"translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA",
9999
"history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ",

server/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.apache.lucene.search.Sort;
2525
import org.apache.lucene.store.AlreadyClosedException;
2626
import org.apache.lucene.util.Accountable;
27-
import org.elasticsearch.core.internal.io.IOUtils;
2827
import org.elasticsearch.action.ActionListener;
2928
import org.elasticsearch.client.Client;
3029
import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -40,6 +39,7 @@
4039
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
4140
import org.elasticsearch.common.util.concurrent.FutureUtils;
4241
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
42+
import org.elasticsearch.core.internal.io.IOUtils;
4343
import org.elasticsearch.env.NodeEnvironment;
4444
import org.elasticsearch.env.ShardLock;
4545
import org.elasticsearch.env.ShardLockObtainFailedException;

server/src/main/java/org/elasticsearch/index/engine/EngineDiskUtils.java

Lines changed: 0 additions & 144 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@
4040
import org.elasticsearch.common.unit.ByteSizeValue;
4141
import org.elasticsearch.common.unit.TimeValue;
4242
import org.elasticsearch.index.Index;
43-
import org.elasticsearch.index.engine.EngineDiskUtils;
4443
import org.elasticsearch.index.engine.EngineException;
4544
import org.elasticsearch.index.engine.InternalEngine;
4645
import org.elasticsearch.index.mapper.MapperService;
4746
import org.elasticsearch.index.seqno.SequenceNumbers;
4847
import org.elasticsearch.index.snapshots.IndexShardRestoreFailedException;
4948
import org.elasticsearch.index.store.Store;
49+
import org.elasticsearch.index.translog.Translog;
5050
import org.elasticsearch.indices.recovery.RecoveryState;
5151
import org.elasticsearch.repositories.IndexId;
5252
import org.elasticsearch.repositories.Repository;
@@ -390,7 +390,11 @@ private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRe
390390
recoveryState.getIndex().updateVersion(version);
391391
if (recoveryState.getRecoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) {
392392
assert indexShouldExists;
393-
EngineDiskUtils.bootstrapNewHistoryFromLuceneIndex(store.directory(), indexShard.shardPath().resolveTranslog(), shardId);
393+
store.bootstrapNewHistory();
394+
final SegmentInfos segmentInfos = store.readLastCommittedSegmentsInfo();
395+
final long maxSeqNo = Long.parseLong(segmentInfos.userData.get(SequenceNumbers.MAX_SEQ_NO));
396+
final String translogUUID = Translog.createEmptyTranslog(indexShard.shardPath().resolveTranslog(), maxSeqNo, shardId);
397+
store.associateIndexWithNewTranslog(translogUUID);
394398
} else if (indexShouldExists) {
395399
// since we recover from local, just fill the files and size
396400
try {
@@ -402,7 +406,10 @@ private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRe
402406
logger.debug("failed to list file details", e);
403407
}
404408
} else {
405-
EngineDiskUtils.createEmpty(store.directory(), indexShard.shardPath().resolveTranslog(), shardId);
409+
store.createEmpty();
410+
final String translogUUID = Translog.createEmptyTranslog(indexShard.shardPath().resolveTranslog(),
411+
SequenceNumbers.NO_OPS_PERFORMED, shardId);
412+
store.associateIndexWithNewTranslog(translogUUID);
406413
}
407414
indexShard.openEngineAndRecoverFromTranslog();
408415
indexShard.getEngine().fillSeqNoGaps(indexShard.getPrimaryTerm());
@@ -445,8 +452,12 @@ private void restore(final IndexShard indexShard, final Repository repository, f
445452
}
446453
final IndexId indexId = repository.getRepositoryData().resolveIndexId(indexName);
447454
repository.restoreShard(indexShard, restoreSource.snapshot().getSnapshotId(), restoreSource.version(), indexId, snapshotShardId, indexShard.recoveryState());
448-
EngineDiskUtils.bootstrapNewHistoryFromLuceneIndex(indexShard.store().directory(), indexShard.shardPath().resolveTranslog(),
449-
shardId);
455+
final Store store = indexShard.store();
456+
store.bootstrapNewHistory();
457+
final SegmentInfos segmentInfos = store.readLastCommittedSegmentsInfo();
458+
final long maxSeqNo = Long.parseLong(segmentInfos.userData.get(SequenceNumbers.MAX_SEQ_NO));
459+
final String translogUUID = Translog.createEmptyTranslog(indexShard.shardPath().resolveTranslog(), maxSeqNo, shardId);
460+
store.associateIndexWithNewTranslog(translogUUID);
450461
assert indexShard.shardRouting.primary() : "only primary shards can recover from store";
451462
indexShard.openEngineAndRecoverFromTranslog();
452463
indexShard.getEngine().fillSeqNoGaps(indexShard.getPrimaryTerm());

server/src/main/java/org/elasticsearch/index/store/Store.java

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.apache.lucene.index.IndexFormatTooOldException;
3131
import org.apache.lucene.index.IndexNotFoundException;
3232
import org.apache.lucene.index.IndexWriter;
33+
import org.apache.lucene.index.IndexWriterConfig;
34+
import org.apache.lucene.index.NoMergePolicy;
3335
import org.apache.lucene.index.SegmentCommitInfo;
3436
import org.apache.lucene.index.SegmentInfos;
3537
import org.apache.lucene.store.AlreadyClosedException;
@@ -46,7 +48,6 @@
4648
import org.apache.lucene.util.ArrayUtil;
4749
import org.apache.lucene.util.BytesRef;
4850
import org.apache.lucene.util.BytesRefBuilder;
49-
import org.elasticsearch.core.internal.io.IOUtils;
5051
import org.apache.lucene.util.Version;
5152
import org.elasticsearch.ElasticsearchException;
5253
import org.elasticsearch.ExceptionsHelper;
@@ -69,11 +70,13 @@
6970
import org.elasticsearch.common.util.concurrent.AbstractRefCounted;
7071
import org.elasticsearch.common.util.concurrent.RefCounted;
7172
import org.elasticsearch.common.util.iterable.Iterables;
73+
import org.elasticsearch.core.internal.io.IOUtils;
7274
import org.elasticsearch.env.NodeEnvironment;
7375
import org.elasticsearch.env.ShardLock;
7476
import org.elasticsearch.env.ShardLockObtainFailedException;
7577
import org.elasticsearch.index.IndexSettings;
7678
import org.elasticsearch.index.engine.Engine;
79+
import org.elasticsearch.index.engine.InternalEngine;
7780
import org.elasticsearch.index.seqno.SequenceNumbers;
7881
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
7982
import org.elasticsearch.index.shard.IndexShard;
@@ -155,7 +158,8 @@ public Store(ShardId shardId, IndexSettings indexSettings, DirectoryService dire
155158
this(shardId, indexSettings, directoryService, shardLock, OnClose.EMPTY);
156159
}
157160

158-
public Store(ShardId shardId, IndexSettings indexSettings, DirectoryService directoryService, ShardLock shardLock, OnClose onClose) throws IOException {
161+
public Store(ShardId shardId, IndexSettings indexSettings, DirectoryService directoryService, ShardLock shardLock,
162+
OnClose onClose) throws IOException {
159163
super(shardId, indexSettings);
160164
final Settings settings = indexSettings.getSettings();
161165
this.directory = new StoreDirectory(directoryService.newDirectory(), Loggers.getLogger("index.store.deletes", settings, shardId));
@@ -1454,4 +1458,100 @@ private static long estimateSize(Directory directory) throws IOException {
14541458
}
14551459
}
14561460

1461+
/**
1462+
* creates an empty lucene index and a corresponding empty translog. Any existing data will be deleted.
1463+
*/
1464+
public void createEmpty() throws IOException {
1465+
metadataLock.writeLock().lock();
1466+
try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.CREATE, directory)) {
1467+
final Map<String, String> map = new HashMap<>();
1468+
map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
1469+
map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
1470+
map.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
1471+
map.put(InternalEngine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
1472+
updateCommitData(writer, map);
1473+
} finally {
1474+
metadataLock.writeLock().unlock();
1475+
}
1476+
}
1477+
1478+
1479+
/**
1480+
* Marks an existing lucene index with a new history uuid.
1481+
* This is used to make sure no existing shard will recovery from this index using ops based recovery.
1482+
*/
1483+
public void bootstrapNewHistory() throws IOException {
1484+
metadataLock.writeLock().lock();
1485+
try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
1486+
final Map<String, String> userData = getUserData(writer);
1487+
final long maxSeqNo = Long.parseLong(userData.get(SequenceNumbers.MAX_SEQ_NO));
1488+
final Map<String, String> map = new HashMap<>();
1489+
map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
1490+
map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(maxSeqNo));
1491+
updateCommitData(writer, map);
1492+
} finally {
1493+
metadataLock.writeLock().unlock();
1494+
}
1495+
}
1496+
1497+
/**
1498+
* Force bakes the given translog generation as recovery information in the lucene index. This is
1499+
* used when recovering from a snapshot or peer file based recovery where a new empty translog is
1500+
* created and the existing lucene index needs should be changed to use it.
1501+
*/
1502+
public void associateIndexWithNewTranslog(final String translogUUID) throws IOException {
1503+
metadataLock.writeLock().lock();
1504+
try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
1505+
if (translogUUID.equals(getUserData(writer).get(Translog.TRANSLOG_UUID_KEY))) {
1506+
throw new IllegalArgumentException("a new translog uuid can't be equal to existing one. got [" + translogUUID + "]");
1507+
}
1508+
final Map<String, String> map = new HashMap<>();
1509+
map.put(Translog.TRANSLOG_GENERATION_KEY, "1");
1510+
map.put(Translog.TRANSLOG_UUID_KEY, translogUUID);
1511+
updateCommitData(writer, map);
1512+
} finally {
1513+
metadataLock.writeLock().unlock();
1514+
}
1515+
}
1516+
1517+
1518+
/**
1519+
* Checks that the Lucene index contains a history uuid marker. If not, a new one is generated and committed.
1520+
*/
1521+
public void ensureIndexHasHistoryUUID() throws IOException {
1522+
metadataLock.writeLock().lock();
1523+
try (IndexWriter writer = newIndexWriter(IndexWriterConfig.OpenMode.APPEND, directory)) {
1524+
final Map<String, String> userData = getUserData(writer);
1525+
if (userData.containsKey(Engine.HISTORY_UUID_KEY) == false) {
1526+
updateCommitData(writer, Collections.singletonMap(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID()));
1527+
}
1528+
} finally {
1529+
metadataLock.writeLock().unlock();
1530+
}
1531+
}
1532+
1533+
private void updateCommitData(IndexWriter writer, Map<String, String> keysToUpdate) throws IOException {
1534+
final Map<String, String> userData = getUserData(writer);
1535+
userData.putAll(keysToUpdate);
1536+
writer.setLiveCommitData(userData.entrySet());
1537+
writer.commit();
1538+
}
1539+
1540+
private Map<String, String> getUserData(IndexWriter writer) {
1541+
final Map<String, String> userData = new HashMap<>();
1542+
writer.getLiveCommitData().forEach(e -> userData.put(e.getKey(), e.getValue()));
1543+
return userData;
1544+
}
1545+
1546+
private IndexWriter newIndexWriter(IndexWriterConfig.OpenMode openMode, final Directory dir) throws IOException {
1547+
IndexWriterConfig iwc = new IndexWriterConfig(null)
1548+
.setCommitOnClose(false)
1549+
// we don't want merges to happen here - we call maybe merge on the engine
1550+
// later once we stared it up otherwise we would need to wait for it here
1551+
// we also don't specify a codec here and merges should use the engines for this index
1552+
.setMergePolicy(NoMergePolicy.INSTANCE)
1553+
.setOpenMode(openMode);
1554+
return new IndexWriter(dir, iwc);
1555+
}
1556+
14571557
}

0 commit comments

Comments
 (0)