Skip to content

Commit d64a72c

Browse files
Snapshot Pagination and Scalability Improvements Backport to 7.x (#74676)
Backport of the recently introduced snapshot pagination and scalability improvements listed below. Merged as a single backport because the `7.x` and master snapshot status API logic had massively diverged between master and 7.x. With the work in the below PRs, the logic in master and 7.x once again has been aligned very closely again. #72842 #73172 #73199 #73570 #73952 #74236 #74451 (this one is only partly applicable as it was mainly a change to master to align `master` and `7.x` branches)
1 parent 28ab899 commit d64a72c

File tree

70 files changed

+2646
-526
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2646
-526
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/SnapshotIT.java

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,23 @@
3333
import org.elasticsearch.common.xcontent.XContentType;
3434
import org.elasticsearch.repositories.fs.FsRepository;
3535
import org.elasticsearch.rest.RestStatus;
36+
import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase;
3637
import org.elasticsearch.snapshots.RestoreInfo;
3738
import org.elasticsearch.snapshots.SnapshotInfo;
39+
import org.mockito.internal.util.collections.Sets;
3840

3941
import java.io.IOException;
4042
import java.util.Collections;
4143
import java.util.HashMap;
4244
import java.util.List;
4345
import java.util.Map;
44-
import java.util.Optional;
4546
import java.util.stream.Collectors;
4647

4748
import static org.elasticsearch.snapshots.SnapshotsService.NO_FEATURE_STATES_VALUE;
4849
import static org.elasticsearch.tasks.TaskResultsService.TASKS_FEATURE_NAME;
49-
import static org.hamcrest.Matchers.contains;
5050
import static org.hamcrest.Matchers.equalTo;
5151
import static org.hamcrest.Matchers.greaterThan;
52+
import static org.hamcrest.Matchers.hasSize;
5253
import static org.hamcrest.Matchers.is;
5354

5455
public class SnapshotIT extends ESRestHighLevelClientTestCase {
@@ -177,50 +178,54 @@ public void testCreateSnapshot() throws Exception {
177178
}
178179

179180
public void testGetSnapshots() throws IOException {
180-
String repository = "test_repository";
181+
String repository1 = "test_repository1";
182+
String repository2 = "test_repository2";
181183
String snapshot1 = "test_snapshot1";
182184
String snapshot2 = "test_snapshot2";
183185

184-
AcknowledgedResponse putRepositoryResponse = createTestRepository(repository, FsRepository.TYPE, "{\"location\": \".\"}");
186+
AcknowledgedResponse putRepositoryResponse =
187+
createTestRepository(repository1, FsRepository.TYPE, "{\"location\": \"loc1\"}");
185188
assertTrue(putRepositoryResponse.isAcknowledged());
186189

187-
CreateSnapshotRequest createSnapshotRequest1 = new CreateSnapshotRequest(repository, snapshot1);
190+
AcknowledgedResponse putRepositoryResponse2 =
191+
createTestRepository(repository2, FsRepository.TYPE, "{\"location\": \"loc2\"}");
192+
assertTrue(putRepositoryResponse2.isAcknowledged());
193+
194+
CreateSnapshotRequest createSnapshotRequest1 = new CreateSnapshotRequest(repository1, snapshot1);
188195
createSnapshotRequest1.waitForCompletion(true);
189196
CreateSnapshotResponse putSnapshotResponse1 = createTestSnapshot(createSnapshotRequest1);
190-
CreateSnapshotRequest createSnapshotRequest2 = new CreateSnapshotRequest(repository, snapshot2);
197+
CreateSnapshotRequest createSnapshotRequest2 = new CreateSnapshotRequest(repository2, snapshot2);
191198
createSnapshotRequest2.waitForCompletion(true);
192-
Map<String, Object> originalMetadata = randomUserMetadata();
199+
Map<String, Object> originalMetadata = AbstractSnapshotIntegTestCase.randomUserMetadata();
193200
createSnapshotRequest2.userMetadata(originalMetadata);
194201
CreateSnapshotResponse putSnapshotResponse2 = createTestSnapshot(createSnapshotRequest2);
195202
// check that the request went ok without parsing JSON here. When using the high level client, check acknowledgement instead.
196203
assertEquals(RestStatus.OK, putSnapshotResponse1.status());
197204
assertEquals(RestStatus.OK, putSnapshotResponse2.status());
198205

199-
GetSnapshotsRequest request;
200-
if (randomBoolean()) {
201-
request = new GetSnapshotsRequest(repository);
202-
} else if (randomBoolean()) {
203-
request = new GetSnapshotsRequest(repository, new String[] {"_all"});
206+
GetSnapshotsRequest request = new GetSnapshotsRequest(
207+
randomFrom(new String[]{"_all"}, new String[]{"*"}, new String[]{repository1, repository2}),
208+
randomFrom(new String[]{"_all"}, new String[]{"*"}, new String[]{snapshot1, snapshot2})
209+
);
210+
request.ignoreUnavailable(true);
204211

205-
} else {
206-
request = new GetSnapshotsRequest(repository, new String[] {snapshot1, snapshot2});
207-
}
208212
GetSnapshotsResponse response = execute(request, highLevelClient().snapshot()::get, highLevelClient().snapshot()::getAsync);
209213

210-
assertEquals(2, response.getSnapshots().size());
211-
assertThat(response.getSnapshots().stream().map((s) -> s.snapshotId().getName()).collect(Collectors.toList()),
212-
contains("test_snapshot1", "test_snapshot2"));
213-
Optional<Map<String, Object>> returnedMetadata = response.getSnapshots().stream()
214-
.filter(s -> s.snapshotId().getName().equals("test_snapshot2"))
215-
.findFirst()
216-
.map(SnapshotInfo::userMetadata);
217-
if (returnedMetadata.isPresent()) {
218-
assertEquals(originalMetadata, returnedMetadata.get());
219-
} else {
220-
assertNull("retrieved metadata is null, expected non-null metadata", originalMetadata);
221-
}
214+
assertThat(response.isFailed(), is(false));
215+
assertEquals(
216+
Sets.newSet(repository1, repository2),
217+
response.getSnapshots().stream().map(SnapshotInfo::repository).collect(Collectors.toSet())
218+
);
219+
220+
assertThat(response.getSnapshots(), hasSize(2));
221+
assertThat(response.getSnapshots().get(0).snapshotId().getName(), equalTo(snapshot1));
222+
assertThat(response.getSnapshots().get(0).repository(), equalTo(repository1));
223+
assertThat(response.getSnapshots().get(1).snapshotId().getName(), equalTo(snapshot2));
224+
assertThat(response.getSnapshots().get(1).userMetadata(), equalTo(originalMetadata));
225+
assertThat(response.getSnapshots().get(1).repository(), equalTo(repository2));
222226
}
223227

228+
224229
public void testSnapshotsStatus() throws IOException {
225230
String testRepository = "test";
226231
String testSnapshot = "snapshot";

docs/reference/cat/snapshots.asciidoc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ more repositories. A snapshot is a backup of an index or running {es} cluster.
1313

1414
`GET /_cat/snapshots/<repository>`
1515

16+
`GET /_cat/snapshots`
17+
1618
[[cat-snapshots-api-prereqs]]
1719
==== {api-prereq-title}
1820

@@ -27,9 +29,10 @@ more repositories. A snapshot is a backup of an index or running {es} cluster.
2729
`<repository>`::
2830
+
2931
--
30-
(Required, string) Snapshot repository used to limit the request.
32+
(Optional, string) Comma-separated list of snapshot repositories used to limit
33+
the request. Accepts wildcard expressions. `_all` returns all repositories.
3134

32-
If the repository fails during the request, {es} returns an error.
35+
If any repository fails during the request, {es} returns an error.
3336
--
3437

3538

@@ -127,9 +130,9 @@ The API returns the following response:
127130

128131
[source,txt]
129132
--------------------------------------------------
130-
id status start_epoch start_time end_epoch end_time duration indices successful_shards failed_shards total_shards
131-
snap1 FAILED 1445616705 18:11:45 1445616978 18:16:18 4.6m 1 4 1 5
132-
snap2 SUCCESS 1445634298 23:04:58 1445634672 23:11:12 6.2m 2 10 0 10
133+
id repository status start_epoch start_time end_epoch end_time duration indices successful_shards failed_shards total_shards
134+
snap1 repo1 FAILED 1445616705 18:11:45 1445616978 18:16:18 4.6m 1 4 1 5
135+
snap2 repo1 SUCCESS 1445634298 23:04:58 1445634672 23:11:12 6.2m 2 10 0 10
133136
--------------------------------------------------
134137
// TESTRESPONSE[s/FAILED/SUCCESS/ s/14456\d+/\\d+/ s/\d+(\.\d+)?(m|s|ms)/\\d+(\\.\\d+)?(m|s|ms)/]
135138
// TESTRESPONSE[s/\d+:\d+:\d+/\\d+:\\d+:\\d+/]

docs/reference/modules/threadpool.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ There are several thread pools, but the important ones include:
4343
keep-alive of `5m` and a max of `min(5, (`<<node.processors,
4444
`# of allocated processors`>>`) / 2)`.
4545

46+
`snapshot_meta`::
47+
For snapshot repository metadata read operations. Thread pool type is `scaling` with a
48+
keep-alive of `5m` and a max of `min(50, (`<<node.processors,
49+
`# of allocated processors`>>` pass:[ * ]3))`.
50+
4651
`warmer`::
4752
For segment warm-up operations. Thread pool type is `scaling` with a
4853
keep-alive of `5m` and a max of `min(5, (`<<node.processors,

docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ The API returns the following response:
176176
"snapshot": {
177177
"snapshot": "snapshot_2",
178178
"uuid": "vdRctLCxSketdKb54xw67g",
179+
"repository": "my_repository",
179180
"version_id": <version_id>,
180181
"version": <version>,
181182
"indices": [],

docs/reference/snapshot-restore/apis/get-snapshot-api.asciidoc

Lines changed: 177 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ PUT /_snapshot/my_repository
1919
2020
PUT /_snapshot/my_repository/my_snapshot?wait_for_completion=true
2121
22+
PUT /_snapshot/my_repository/snapshot_1?wait_for_completion=true
2223
PUT /_snapshot/my_repository/snapshot_2?wait_for_completion=true
24+
PUT /_snapshot/my_repository/snapshot_3?wait_for_completion=true
2325
----
2426
// TESTSETUP
2527
////
@@ -57,7 +59,8 @@ Use the get snapshot API to return information about one or more snapshots, incl
5759

5860
`<repository>`::
5961
(Required, string)
60-
Snapshot repository name used to limit the request.
62+
Comma-separated list of snapshot repository names used to limit the request.
63+
Wildcard (`*`) expressions are supported.
6164
+
6265
To get information about all snapshot repositories registered in the
6366
cluster, omit this parameter or use `*` or `_all`.
@@ -99,6 +102,45 @@ comprising the number of shards in the index, the total size of the index in
99102
bytes, and the maximum number of segments per shard in the index. Defaults to
100103
`false`, meaning that this information is omitted.
101104

105+
`sort`::
106+
(Optional, string)
107+
Allows setting a sort order for the result. Defaults to `start_time`, i.e. sorting by snapshot start time stamp.
108+
+
109+
.Valid values for `sort`
110+
[%collapsible%open]
111+
====
112+
`start_time`::
113+
Sort snapshots by their start time stamp and break ties by snapshot name.
114+
115+
`duration`::
116+
Sort snapshots by their duration and break ties by snapshot name.
117+
118+
`name`::
119+
Sort snapshots by their name.
120+
121+
`index_count`::
122+
Sort snapshots by the number of indices they contain and break ties by snapshot name.
123+
====
124+
125+
`size`::
126+
(Optional, integer)
127+
Maximum number of snapshots to return. Defaults to `0` which means return all that match the request without limit.
128+
129+
`order`::
130+
(Optional, string)
131+
Sort order. Valid values are `asc` for ascending and `desc` for descending order. Defaults to `asc`, meaning ascending order.
132+
133+
`after`::
134+
(Optional, string)
135+
Offset identifier to start pagination from as returned by the `next` field in the response body.
136+
137+
NOTE: The `after` parameter and `next` field allow for iterating through snapshots with some consistency guarantees regarding concurrent
138+
creation or deletion of snapshots. It is guaranteed that any snapshot that exists at the beginning of the iteration and not concurrently
139+
deleted will be seen during the iteration. Snapshots concurrently created may be seen during an iteration.
140+
141+
NOTE: The pagination parameters `size`, `order`, `after` and `sort` are not supported when using `verbose=false` and the sort order for
142+
requests with `verbose=false` is undefined.
143+
102144
[role="child_attributes"]
103145
[[get-snapshot-api-response-body]]
104146
==== {api-response-body-title}
@@ -236,6 +278,10 @@ The snapshot `state` can be one of the following values:
236278
that were not processed correctly.
237279
====
238280
--
281+
`next`::
282+
(string)
283+
If the request contained a size limit and there might be more results, a `next` field will be added to the response and can be used as the
284+
`after` query parameter to fetch additional results.
239285

240286
[[get-snapshot-api-example]]
241287
==== {api-examples-title}
@@ -256,6 +302,7 @@ The API returns the following response:
256302
{
257303
"snapshot": "snapshot_2",
258304
"uuid": "vdRctLCxSketdKb54xw67g",
305+
"repository": "my_repository",
259306
"version_id": <version_id>,
260307
"version": <version>,
261308
"indices": [],
@@ -265,7 +312,7 @@ The API returns the following response:
265312
"state": "SUCCESS",
266313
"start_time": "2020-07-06T21:55:18.129Z",
267314
"start_time_in_millis": 1593093628850,
268-
"end_time": "2020-07-06T21:55:18.876Z",
315+
"end_time": "2020-07-06T21:55:18.129Z",
269316
"end_time_in_millis": 1593094752018,
270317
"duration_in_millis": 0,
271318
"failures": [],
@@ -283,6 +330,133 @@ The API returns the following response:
283330
// TESTRESPONSE[s/"version": <version>/"version": $body.snapshots.0.version/]
284331
// TESTRESPONSE[s/"start_time": "2020-07-06T21:55:18.129Z"/"start_time": $body.snapshots.0.start_time/]
285332
// TESTRESPONSE[s/"start_time_in_millis": 1593093628850/"start_time_in_millis": $body.snapshots.0.start_time_in_millis/]
286-
// TESTRESPONSE[s/"end_time": "2020-07-06T21:55:18.876Z"/"end_time": $body.snapshots.0.end_time/]
333+
// TESTRESPONSE[s/"end_time": "2020-07-06T21:55:18.129Z"/"end_time": $body.snapshots.0.end_time/]
334+
// TESTRESPONSE[s/"end_time_in_millis": 1593094752018/"end_time_in_millis": $body.snapshots.0.end_time_in_millis/]
335+
// TESTRESPONSE[s/"duration_in_millis": 0/"duration_in_millis": $body.snapshots.0.duration_in_millis/]
336+
337+
The following request returns information for all snapshots with prefix `snapshot` in the `my_repository` repository,
338+
limiting the response size to 2 and sorting by snapshot name.
339+
340+
[source,console]
341+
----
342+
GET /_snapshot/my_repository/snapshot*?size=2&sort=name
343+
----
344+
345+
The API returns the following response:
346+
347+
[source,console-result]
348+
----
349+
{
350+
"snapshots": [
351+
{
352+
"snapshot": "snapshot_1",
353+
"uuid": "dKb54xw67gvdRctLCxSket",
354+
"repository": "my_repository",
355+
"version_id": <version_id>,
356+
"version": <version>,
357+
"indices": [],
358+
"data_streams": [],
359+
"feature_states": [],
360+
"include_global_state": true,
361+
"state": "SUCCESS",
362+
"start_time": "2020-07-06T21:55:18.129Z",
363+
"start_time_in_millis": 1593093628850,
364+
"end_time": "2020-07-06T21:55:18.129Z",
365+
"end_time_in_millis": 1593094752018,
366+
"duration_in_millis": 0,
367+
"failures": [],
368+
"shards": {
369+
"total": 0,
370+
"failed": 0,
371+
"successful": 0
372+
}
373+
},
374+
{
375+
"snapshot": "snapshot_2",
376+
"uuid": "vdRctLCxSketdKb54xw67g",
377+
"repository": "my_repository",
378+
"version_id": <version_id>,
379+
"version": <version>,
380+
"indices": [],
381+
"data_streams": [],
382+
"feature_states": [],
383+
"include_global_state": true,
384+
"state": "SUCCESS",
385+
"start_time": "2020-07-06T21:55:18.130Z",
386+
"start_time_in_millis": 1593093628851,
387+
"end_time": "2020-07-06T21:55:18.130Z",
388+
"end_time_in_millis": 1593094752019,
389+
"duration_in_millis": 1,
390+
"failures": [],
391+
"shards": {
392+
"total": 0,
393+
"failed": 0,
394+
"successful": 0
395+
}
396+
}
397+
],
398+
"next": "c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI="
399+
}
400+
----
401+
// TESTRESPONSE[s/"uuid": "dKb54xw67gvdRctLCxSket"/"uuid": $body.snapshots.0.uuid/]
402+
// TESTRESPONSE[s/"uuid": "vdRctLCxSketdKb54xw67g"/"uuid": $body.snapshots.1.uuid/]
403+
// TESTRESPONSE[s/"version_id": <version_id>/"version_id": $body.snapshots.0.version_id/]
404+
// TESTRESPONSE[s/"version": <version>/"version": $body.snapshots.0.version/]
405+
// TESTRESPONSE[s/"start_time": "2020-07-06T21:55:18.129Z"/"start_time": $body.snapshots.0.start_time/]
406+
// TESTRESPONSE[s/"start_time": "2020-07-06T21:55:18.130Z"/"start_time": $body.snapshots.1.start_time/]
407+
// TESTRESPONSE[s/"start_time_in_millis": 1593093628850/"start_time_in_millis": $body.snapshots.0.start_time_in_millis/]
408+
// TESTRESPONSE[s/"start_time_in_millis": 1593093628851/"start_time_in_millis": $body.snapshots.1.start_time_in_millis/]
409+
// TESTRESPONSE[s/"end_time": "2020-07-06T21:55:18.129Z"/"end_time": $body.snapshots.0.end_time/]
410+
// TESTRESPONSE[s/"end_time": "2020-07-06T21:55:18.130Z"/"end_time": $body.snapshots.1.end_time/]
287411
// TESTRESPONSE[s/"end_time_in_millis": 1593094752018/"end_time_in_millis": $body.snapshots.0.end_time_in_millis/]
412+
// TESTRESPONSE[s/"end_time_in_millis": 1593094752019/"end_time_in_millis": $body.snapshots.1.end_time_in_millis/]
288413
// TESTRESPONSE[s/"duration_in_millis": 0/"duration_in_millis": $body.snapshots.0.duration_in_millis/]
414+
// TESTRESPONSE[s/"duration_in_millis": 1/"duration_in_millis": $body.snapshots.1.duration_in_millis/]
415+
416+
A subsequent request for the remaining snapshots can then be made using the `next` value from the previous response as `after` parameter.
417+
418+
[source,console]
419+
----
420+
GET /_snapshot/my_repository/snapshot*?size=2&sort=name&after=c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=
421+
----
422+
423+
The API returns the following response:
424+
425+
[source,console-result]
426+
----
427+
{
428+
"snapshots": [
429+
{
430+
"snapshot": "snapshot_3",
431+
"uuid": "dRctdKb54xw67gvLCxSket",
432+
"repository": "my_repository",
433+
"version_id": <version_id>,
434+
"version": <version>,
435+
"indices": [],
436+
"data_streams": [],
437+
"feature_states": [],
438+
"include_global_state": true,
439+
"state": "SUCCESS",
440+
"start_time": "2020-07-06T21:55:18.129Z",
441+
"start_time_in_millis": 1593093628850,
442+
"end_time": "2020-07-06T21:55:18.129Z",
443+
"end_time_in_millis": 1593094752018,
444+
"duration_in_millis": 0,
445+
"failures": [],
446+
"shards": {
447+
"total": 0,
448+
"failed": 0,
449+
"successful": 0
450+
}
451+
}
452+
]
453+
}
454+
----
455+
// TESTRESPONSE[s/"uuid": "dRctdKb54xw67gvLCxSket"/"uuid": $body.snapshots.0.uuid/]
456+
// TESTRESPONSE[s/"version_id": <version_id>/"version_id": $body.snapshots.0.version_id/]
457+
// TESTRESPONSE[s/"version": <version>/"version": $body.snapshots.0.version/]
458+
// TESTRESPONSE[s/"start_time": "2020-07-06T21:55:18.129Z"/"start_time": $body.snapshots.0.start_time/]
459+
// TESTRESPONSE[s/"start_time_in_millis": 1593093628850/"start_time_in_millis": $body.snapshots.0.start_time_in_millis/]
460+
// TESTRESPONSE[s/"end_time": "2020-07-06T21:55:18.129Z"/"end_time": $body.snapshots.0.end_time/]
461+
// TESTRESPONSE[s/"end_time_in_millis": 1593094752018/"end_time_in_millis": $body.snapshots.0.end_time_in_millis/]
462+
// TESTRESPONSE[s/"duration_in_millis": 0/"duration_in_millis": $body.snapshots.0.duration_in_millis/]

0 commit comments

Comments
 (0)