Skip to content

Commit e48e1d0

Browse files
committed
Obey lock order if working with store to get metadata snapshots (#24787)
Today when we get a metadata snapshot from the index shard we ensure that if there is no engine started on the shard that we lock the index writer before we go and fetch the store metadata. Yet, if we concurrently recover that shard, recovery finalization might fail since it can't acquire the IW lock on the directory. This is mainly due to the wrong order of aquiring the IW lock and the metadata lock. Fetching store metadata without a started engine should block on the metadata lock in Store.java but since IndexShard locks the writer first we get into a failed recovery dance especially in test. In production this is less of an issue since we rarely get into this siutation if at all. Closes #24481
1 parent 37fa377 commit e48e1d0

File tree

3 files changed

+85
-8
lines changed

3 files changed

+85
-8
lines changed

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@
2727
import org.apache.lucene.index.IndexFormatTooNewException;
2828
import org.apache.lucene.index.IndexFormatTooOldException;
2929
import org.apache.lucene.index.IndexWriter;
30+
import org.apache.lucene.index.IndexOptions;
3031
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
3132
import org.apache.lucene.index.SnapshotDeletionPolicy;
3233
import org.apache.lucene.index.Term;
3334
import org.apache.lucene.search.Query;
3435
import org.apache.lucene.search.QueryCachingPolicy;
3536
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
3637
import org.apache.lucene.store.AlreadyClosedException;
37-
import org.apache.lucene.store.Lock;
3838
import org.apache.lucene.util.IOUtils;
3939
import org.apache.lucene.util.ThreadInterruptedException;
4040
import org.elasticsearch.ElasticsearchException;
@@ -870,9 +870,7 @@ public Store.MetadataSnapshot snapshotStoreMetadata() throws IOException {
870870
// That can be done out of mutex, since the engine can be closed half way.
871871
Engine engine = getEngineOrNull();
872872
if (engine == null) {
873-
try (Lock ignored = store.directory().obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
874-
return store.getMetadata(null);
875-
}
873+
return store.getMetadata(null, true);
876874
}
877875
}
878876
indexCommit = deletionPolicy.snapshot();

core/src/main/java/org/elasticsearch/index/store/Store.java

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
import java.util.Map;
9797
import java.util.concurrent.TimeUnit;
9898
import java.util.concurrent.atomic.AtomicBoolean;
99+
import java.util.concurrent.locks.ReentrantLock;
99100
import java.util.concurrent.locks.ReentrantReadWriteLock;
100101
import java.util.zip.CRC32;
101102
import java.util.zip.Checksum;
@@ -223,7 +224,8 @@ final void ensureOpen() {
223224
* {@link #readMetadataSnapshot(Path, ShardId, NodeEnvironment.ShardLocker, Logger)} to read a meta data while locking
224225
* {@link IndexShard#snapshotStoreMetadata()} to safely read from an existing shard
225226
* {@link IndexShard#acquireIndexCommit(boolean)} to get an {@link IndexCommit} which is safe to use but has to be freed
226-
*
227+
* @param commit the index commit to read the snapshot from or <code>null</code> if the latest snapshot should be read from the
228+
* directory
227229
* @throws CorruptIndexException if the lucene index is corrupted. This can be caused by a checksum mismatch or an
228230
* unexpected exception when opening the index reading the segments file.
229231
* @throws IndexFormatTooOldException if the lucene index is too old to be opened.
@@ -233,16 +235,47 @@ final void ensureOpen() {
233235
* @throws IndexNotFoundException if the commit point can't be found in this store
234236
*/
235237
public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException {
238+
return getMetadata(commit, false);
239+
}
240+
241+
/**
242+
* Returns a new MetadataSnapshot for the given commit. If the given commit is <code>null</code>
243+
* the latest commit point is used.
244+
*
245+
* Note that this method requires the caller verify it has the right to access the store and
246+
* no concurrent file changes are happening. If in doubt, you probably want to use one of the following:
247+
*
248+
* {@link #readMetadataSnapshot(Path, ShardId, NodeEnvironment.ShardLocker, Logger)} to read a meta data while locking
249+
* {@link IndexShard#snapshotStoreMetadata()} to safely read from an existing shard
250+
* {@link IndexShard#acquireIndexCommit(boolean)} to get an {@link IndexCommit} which is safe to use but has to be freed
251+
*
252+
* @param commit the index commit to read the snapshot from or <code>null</code> if the latest snapshot should be read from the
253+
* directory
254+
* @param lockDirectory if <code>true</code> the index writer lock will be obtained before reading the snapshot. This should
255+
* only be used if there is no started shard using this store.
256+
* @throws CorruptIndexException if the lucene index is corrupted. This can be caused by a checksum mismatch or an
257+
* unexpected exception when opening the index reading the segments file.
258+
* @throws IndexFormatTooOldException if the lucene index is too old to be opened.
259+
* @throws IndexFormatTooNewException if the lucene index is too new to be opened.
260+
* @throws FileNotFoundException if one or more files referenced by a commit are not present.
261+
* @throws NoSuchFileException if one or more files referenced by a commit are not present.
262+
* @throws IndexNotFoundException if the commit point can't be found in this store
263+
*/
264+
public MetadataSnapshot getMetadata(IndexCommit commit, boolean lockDirectory) throws IOException {
236265
ensureOpen();
237266
failIfCorrupted();
238-
metadataLock.readLock().lock();
239-
try {
267+
assert lockDirectory ? commit == null : true : "IW lock should not be obtained if there is a commit point available";
268+
// if we lock the directory we also acquire the write lock since that makes sure that nobody else tries to lock the IW
269+
// on this store at the same time.
270+
java.util.concurrent.locks.Lock lock = lockDirectory ? metadataLock.writeLock() : metadataLock.readLock();
271+
lock.lock();
272+
try (Closeable ignored = lockDirectory ? directory.obtainLock(IndexWriter.WRITE_LOCK_NAME) : () -> {} ) {
240273
return new MetadataSnapshot(commit, directory, logger);
241274
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
242275
markStoreCorrupted(ex);
243276
throw ex;
244277
} finally {
245-
metadataLock.readLock().unlock();
278+
lock.unlock();
246279
}
247280
}
248281

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,52 @@ public void testDocStats() throws IOException {
14471447
}
14481448
}
14491449

1450+
/**
1451+
* here we are simulating the scenario that happens when we do async shard fetching from GatewaySerivce while we are finishing
1452+
* a recovery and concurrently clean files. This should always be possible without any exception. Yet there was a bug where IndexShard
1453+
* acquired the index writer lock before it called into the store that has it's own locking for metadata reads
1454+
*/
1455+
public void testReadSnapshotConcurrently() throws IOException, InterruptedException {
1456+
IndexShard indexShard = newStartedShard();
1457+
indexDoc(indexShard, "doc", "0", "{\"foo\" : \"bar\"}");
1458+
if (randomBoolean()) {
1459+
indexShard.refresh("test");
1460+
}
1461+
indexDoc(indexShard, "doc", "1", "{\"foo\" : \"bar\"}");
1462+
indexShard.flush(new FlushRequest());
1463+
closeShards(indexShard);
1464+
1465+
final IndexShard newShard = reinitShard(indexShard);
1466+
Store.MetadataSnapshot storeFileMetaDatas = newShard.snapshotStoreMetadata();
1467+
assertTrue("at least 2 files, commit and data: " +storeFileMetaDatas.toString(), storeFileMetaDatas.size() > 1);
1468+
AtomicBoolean stop = new AtomicBoolean(false);
1469+
CountDownLatch latch = new CountDownLatch(1);
1470+
expectThrows(AlreadyClosedException.class, () -> newShard.getEngine()); // no engine
1471+
Thread thread = new Thread(() -> {
1472+
latch.countDown();
1473+
while(stop.get() == false){
1474+
try {
1475+
Store.MetadataSnapshot readMeta = newShard.snapshotStoreMetadata();
1476+
assertEquals(0, storeFileMetaDatas.recoveryDiff(readMeta).different.size());
1477+
assertEquals(0, storeFileMetaDatas.recoveryDiff(readMeta).missing.size());
1478+
assertEquals(storeFileMetaDatas.size(), storeFileMetaDatas.recoveryDiff(readMeta).identical.size());
1479+
} catch (IOException e) {
1480+
throw new AssertionError(e);
1481+
}
1482+
}
1483+
});
1484+
thread.start();
1485+
latch.await();
1486+
1487+
int iters = iterations(10, 100);
1488+
for (int i = 0; i < iters; i++) {
1489+
newShard.store().cleanupAndVerify("test", storeFileMetaDatas);
1490+
}
1491+
assertTrue(stop.compareAndSet(false, true));
1492+
thread.join();
1493+
closeShards(newShard);
1494+
}
1495+
14501496
/** A dummy repository for testing which just needs restore overridden */
14511497
private abstract static class RestoreOnlyRepository extends AbstractLifecycleComponent implements Repository {
14521498
private final String indexName;

0 commit comments

Comments
 (0)