Skip to content

Commit e58fc03

Browse files
Restore from Individual Shard Snapshot Files in Parallel (#48110)
Make restoring shard snapshots run in parallel on the `SNAPSHOT` thread-pool.
1 parent 678492d commit e58fc03

File tree

15 files changed

+383
-285
lines changed

15 files changed

+383
-285
lines changed

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.ExceptionsHelper;
4343
import org.elasticsearch.Version;
4444
import org.elasticsearch.action.ActionListener;
45+
import org.elasticsearch.action.ActionRunnable;
4546
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
4647
import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest;
4748
import org.elasticsearch.action.admin.indices.upgrade.post.UpgradeRequest;
@@ -1816,12 +1817,16 @@ public boolean recoverFromStore() {
18161817
return storeRecovery.recoverFromStore(this);
18171818
}
18181819

1819-
public boolean restoreFromRepository(Repository repository) {
1820-
assert shardRouting.primary() : "recover from store only makes sense if the shard is a primary shard";
1821-
assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT : "invalid recovery type: " +
1822-
recoveryState.getRecoverySource();
1823-
StoreRecovery storeRecovery = new StoreRecovery(shardId, logger);
1824-
return storeRecovery.recoverFromRepository(this, repository);
1820+
public void restoreFromRepository(Repository repository, ActionListener<Boolean> listener) {
1821+
try {
1822+
assert shardRouting.primary() : "recover from store only makes sense if the shard is a primary shard";
1823+
assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT : "invalid recovery type: " +
1824+
recoveryState.getRecoverySource();
1825+
StoreRecovery storeRecovery = new StoreRecovery(shardId, logger);
1826+
storeRecovery.recoverFromRepository(this, repository, listener);
1827+
} catch (Exception e) {
1828+
listener.onFailure(e);
1829+
}
18251830
}
18261831

18271832
/**
@@ -2504,17 +2509,15 @@ public void startRecovery(RecoveryState recoveryState, PeerRecoveryTargetService
25042509
case SNAPSHOT:
25052510
markAsRecovering("from snapshot", recoveryState); // mark the shard as recovering on the cluster state thread
25062511
SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) recoveryState.getRecoverySource();
2507-
threadPool.generic().execute(() -> {
2508-
try {
2509-
final Repository repository = repositoriesService.repository(recoverySource.snapshot().getRepository());
2510-
if (restoreFromRepository(repository)) {
2511-
recoveryListener.onRecoveryDone(recoveryState);
2512-
}
2513-
} catch (Exception e) {
2514-
recoveryListener.onRecoveryFailure(recoveryState,
2515-
new RecoveryFailedException(recoveryState, null, e), true);
2516-
}
2517-
});
2512+
threadPool.generic().execute(
2513+
ActionRunnable.<Boolean>wrap(ActionListener.wrap(r -> {
2514+
if (r) {
2515+
recoveryListener.onRecoveryDone(recoveryState);
2516+
}
2517+
},
2518+
e -> recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true)),
2519+
restoreListener -> restoreFromRepository(
2520+
repositoriesService.repository(recoverySource.snapshot().getRepository()), restoreListener)));
25182521
break;
25192522
case LOCAL_SHARDS:
25202523
final IndexMetaData indexMetaData = indexSettings().getIndexMetaData();

server/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java

Lines changed: 99 additions & 76 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/repositories/FilterRepository.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,9 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
123123
in.snapshotShard(store, mapperService, snapshotId, indexId, snapshotIndexCommit, snapshotStatus, writeShardGens, listener);
124124
}
125125
@Override
126-
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState) {
127-
in.restoreShard(store, snapshotId, indexId, snapshotShardId, recoveryState);
126+
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState,
127+
ActionListener<Void> listener) {
128+
in.restoreShard(store, snapshotId, indexId, snapshotShardId, recoveryState, listener);
128129
}
129130

130131
@Override

server/src/main/java/org/elasticsearch/repositories/Repository.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,10 @@ void snapshotShard(Store store, MapperService mapperService, SnapshotId snapshot
211211
* @param indexId id of the index in the repository from which the restore is occurring
212212
* @param snapshotShardId shard id (in the snapshot)
213213
* @param recoveryState recovery state
214+
* @param listener listener to invoke once done
214215
*/
215-
void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState);
216-
216+
void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState,
217+
ActionListener<Void> listener);
217218
/**
218219
* Retrieve shard snapshot status for the stored snapshot
219220
*

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,11 +1195,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
11951195
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
11961196
// Start as many workers as fit into the snapshot pool at once at the most
11971197
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), indexIncrementalFileCount);
1198-
final ActionListener<Void> filesListener = ActionListener.delegateResponse(
1199-
new GroupedActionListener<>(allFilesUploadedListener, workers), (l, e) -> {
1200-
filesToSnapshot.clear(); // Stop uploading the remaining files if we run into any exception
1201-
l.onFailure(e);
1202-
});
1198+
final ActionListener<Void> filesListener = fileQueueListener(filesToSnapshot, workers, allFilesUploadedListener);
12031199
for (int i = 0; i < workers; ++i) {
12041200
executor.execute(ActionRunnable.run(filesListener, () -> {
12051201
BlobStoreIndexShardSnapshot.FileInfo snapshotFileInfo = filesToSnapshot.poll(0L, TimeUnit.MILLISECONDS);
@@ -1223,19 +1219,42 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
12231219

12241220
@Override
12251221
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId,
1226-
RecoveryState recoveryState) {
1227-
ShardId shardId = store.shardId();
1228-
try {
1229-
final BlobContainer container = shardContainer(indexId, snapshotShardId);
1230-
BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId);
1231-
SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles());
1222+
RecoveryState recoveryState, ActionListener<Void> listener) {
1223+
final ShardId shardId = store.shardId();
1224+
final ActionListener<Void> restoreListener = ActionListener.delegateResponse(listener,
1225+
(l, e) -> l.onFailure(new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId + "]", e)));
1226+
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
1227+
final BlobContainer container = shardContainer(indexId, snapshotShardId);
1228+
executor.execute(ActionRunnable.wrap(restoreListener, l -> {
1229+
final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId);
1230+
final SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles());
12321231
new FileRestoreContext(metadata.name(), shardId, snapshotId, recoveryState) {
12331232
@Override
1234-
protected void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store) throws IOException {
1235-
// restore the files from the snapshot to the Lucene store
1236-
for (final BlobStoreIndexShardSnapshot.FileInfo fileToRecover : filesToRecover) {
1237-
logger.trace("[{}] [{}] restoring file [{}]", shardId, snapshotId, fileToRecover.name());
1238-
restoreFile(fileToRecover, store);
1233+
protected void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store,
1234+
ActionListener<Void> listener) {
1235+
if (filesToRecover.isEmpty()) {
1236+
listener.onResponse(null);
1237+
} else {
1238+
// Start as many workers as fit into the snapshot pool at once at the most
1239+
final int workers =
1240+
Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), snapshotFiles.indexFiles().size());
1241+
final BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files = new LinkedBlockingQueue<>(filesToRecover);
1242+
final ActionListener<Void> allFilesListener =
1243+
fileQueueListener(files, workers, ActionListener.map(listener, v -> null));
1244+
// restore the files from the snapshot to the Lucene store
1245+
for (int i = 0; i < workers; ++i) {
1246+
executor.execute(ActionRunnable.run(allFilesListener, () -> {
1247+
store.incRef();
1248+
try {
1249+
BlobStoreIndexShardSnapshot.FileInfo fileToRecover;
1250+
while ((fileToRecover = files.poll(0L, TimeUnit.MILLISECONDS)) != null) {
1251+
restoreFile(fileToRecover, store);
1252+
}
1253+
} finally {
1254+
store.decRef();
1255+
}
1256+
}));
1257+
}
12391258
}
12401259
}
12411260

@@ -1275,10 +1294,16 @@ protected InputStream openSlice(long slice) throws IOException {
12751294
}
12761295
}
12771296
}
1278-
}.restore(snapshotFiles, store);
1279-
} catch (Exception e) {
1280-
throw new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId + "]", e);
1281-
}
1297+
}.restore(snapshotFiles, store, l);
1298+
}));
1299+
}
1300+
1301+
private static ActionListener<Void> fileQueueListener(BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files, int workers,
1302+
ActionListener<Collection<Void>> listener) {
1303+
return ActionListener.delegateResponse(new GroupedActionListener<>(listener, workers), (l, e) -> {
1304+
files.clear(); // Stop uploading the remaining files if we run into any exception
1305+
l.onFailure(e);
1306+
});
12821307
}
12831308

12841309
private static InputStream maybeRateLimit(InputStream stream, @Nullable RateLimiter rateLimiter, CounterMetric metric) {

server/src/main/java/org/elasticsearch/repositories/blobstore/FileRestoreContext.java

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.logging.log4j.LogManager;
2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.logging.log4j.message.ParameterizedMessage;
24+
import org.elasticsearch.action.ActionListener;
2425
import org.elasticsearch.common.lucene.Lucene;
2526
import org.elasticsearch.common.util.iterable.Iterables;
2627
import org.elasticsearch.index.shard.ShardId;
@@ -74,7 +75,7 @@ protected FileRestoreContext(String repositoryName, ShardId shardId, SnapshotId
7475
/**
7576
* Performs restore operation
7677
*/
77-
public void restore(SnapshotFiles snapshotFiles, Store store) {
78+
public void restore(SnapshotFiles snapshotFiles, Store store, ActionListener<Void> listener) {
7879
store.incRef();
7980
try {
8081
logger.debug("[{}] [{}] restoring to [{}] ...", snapshotId, repositoryName, shardId);
@@ -150,36 +151,49 @@ public void restore(SnapshotFiles snapshotFiles, Store store) {
150151
}
151152
}
152153

153-
restoreFiles(filesToRecover, store);
154+
restoreFiles(filesToRecover, store, ActionListener.wrap(
155+
v -> {
156+
store.incRef();
157+
try {
158+
afterRestore(snapshotFiles, store, restoredSegmentsFile);
159+
listener.onResponse(null);
160+
} finally {
161+
store.decRef();
162+
}
163+
}, listener::onFailure));
154164
} catch (IOException ex) {
155165
throw new IndexShardRestoreFailedException(shardId, "Failed to recover index", ex);
156166
}
167+
} catch (Exception e) {
168+
listener.onFailure(e);
169+
} finally {
170+
store.decRef();
171+
}
172+
}
157173

158-
// read the snapshot data persisted
159-
try {
160-
Lucene.pruneUnreferencedFiles(restoredSegmentsFile.name(), store.directory());
161-
} catch (IOException e) {
162-
throw new IndexShardRestoreFailedException(shardId, "Failed to fetch index version after copying it over", e);
163-
}
174+
private void afterRestore(SnapshotFiles snapshotFiles, Store store, StoreFileMetaData restoredSegmentsFile) {
175+
// read the snapshot data persisted
176+
try {
177+
Lucene.pruneUnreferencedFiles(restoredSegmentsFile.name(), store.directory());
178+
} catch (IOException e) {
179+
throw new IndexShardRestoreFailedException(shardId, "Failed to fetch index version after copying it over", e);
180+
}
164181

165-
/// now, go over and clean files that are in the store, but were not in the snapshot
166-
try {
167-
for (String storeFile : store.directory().listAll()) {
168-
if (Store.isAutogenerated(storeFile) || snapshotFiles.containPhysicalIndexFile(storeFile)) {
169-
continue; //skip write.lock, checksum files and files that exist in the snapshot
170-
}
171-
try {
172-
store.deleteQuiet("restore", storeFile);
173-
store.directory().deleteFile(storeFile);
174-
} catch (IOException e) {
175-
logger.warn("[{}] [{}] failed to delete file [{}] during snapshot cleanup", shardId, snapshotId, storeFile);
176-
}
182+
/// now, go over and clean files that are in the store, but were not in the snapshot
183+
try {
184+
for (String storeFile : store.directory().listAll()) {
185+
if (Store.isAutogenerated(storeFile) || snapshotFiles.containPhysicalIndexFile(storeFile)) {
186+
continue; //skip write.lock, checksum files and files that exist in the snapshot
187+
}
188+
try {
189+
store.deleteQuiet("restore", storeFile);
190+
store.directory().deleteFile(storeFile);
191+
} catch (IOException e) {
192+
logger.warn("[{}] [{}] failed to delete file [{}] during snapshot cleanup", shardId, snapshotId, storeFile);
177193
}
178-
} catch (IOException e) {
179-
logger.warn("[{}] [{}] failed to list directory - some of files might not be deleted", shardId, snapshotId);
180194
}
181-
} finally {
182-
store.decRef();
195+
} catch (IOException e) {
196+
logger.warn("[{}] [{}] failed to list directory - some of files might not be deleted", shardId, snapshotId);
183197
}
184198
}
185199

@@ -189,7 +203,8 @@ public void restore(SnapshotFiles snapshotFiles, Store store) {
189203
* @param filesToRecover List of files to restore
190204
* @param store Store to restore into
191205
*/
192-
protected abstract void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store) throws IOException;
206+
protected abstract void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store,
207+
ActionListener<Void> listener);
193208

194209
@SuppressWarnings("unchecked")
195210
private static Iterable<StoreFileMetaData> concat(Store.RecoveryDiff diff) {

server/src/main/java/org/elasticsearch/snapshots/RestoreService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
* {@link RoutingTable.Builder#addAsRestore(IndexMetaData, SnapshotRecoverySource)} method.
108108
* <p>
109109
* Individual shards are getting restored as part of normal recovery process in
110-
* {@link IndexShard#restoreFromRepository(Repository)} )}
110+
* {@link IndexShard#restoreFromRepository} )}
111111
* method, which detects that shard should be restored from snapshot rather than recovered from gateway by looking
112112
* at the {@link ShardRouting#recoverySource()} property.
113113
* <p>

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,23 +2338,24 @@ public void testRestoreShard() throws IOException {
23382338

23392339
DiscoveryNode localNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
23402340
target.markAsRecovering("store", new RecoveryState(routing, localNode, null));
2341-
assertTrue(target.restoreFromRepository(new RestoreOnlyRepository("test") {
2341+
final PlainActionFuture<Boolean> future = PlainActionFuture.newFuture();
2342+
target.restoreFromRepository(new RestoreOnlyRepository("test") {
23422343
@Override
23432344
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId,
2344-
RecoveryState recoveryState) {
2345-
try {
2345+
RecoveryState recoveryState, ActionListener<Void> listener) {
2346+
ActionListener.completeWith(listener, () -> {
23462347
cleanLuceneIndex(targetStore.directory());
23472348
for (String file : sourceStore.directory().listAll()) {
23482349
if (file.equals("write.lock") || file.startsWith("extra")) {
23492350
continue;
23502351
}
23512352
targetStore.directory().copyFrom(sourceStore.directory(), file, file, IOContext.DEFAULT);
23522353
}
2353-
} catch (Exception ex) {
2354-
throw new RuntimeException(ex);
2355-
}
2354+
return null;
2355+
});
23562356
}
2357-
}));
2357+
}, future);
2358+
assertTrue(future.actionGet());
23582359
assertThat(target.getLocalCheckpoint(), equalTo(2L));
23592360
assertThat(target.seqNoStats().getMaxSeqNo(), equalTo(2L));
23602361
assertThat(target.seqNoStats().getGlobalCheckpoint(), equalTo(0L));

server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
205205

206206
@Override
207207
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId,
208-
RecoveryState recoveryState) {
208+
RecoveryState recoveryState, ActionListener<Void> listener) {
209+
209210
}
210211

211212
@Override

0 commit comments

Comments
 (0)