Skip to content

Commit f136798

Browse files
PnPiejimczi
authored andcommitted
Add scroll parameter to _reindex API (#28041)
Be able to change scroll timeout in _reindex API (by default: 5m)
1 parent 8cbac9d commit f136798

File tree

6 files changed

+55
-8
lines changed

6 files changed

+55
-8
lines changed

core/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByScrollRequest.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.common.io.stream.StreamInput;
2929
import org.elasticsearch.common.io.stream.StreamOutput;
3030
import org.elasticsearch.common.unit.TimeValue;
31+
import org.elasticsearch.search.Scroll;
3132
import org.elasticsearch.search.builder.SearchSourceBuilder;
3233
import org.elasticsearch.tasks.Task;
3334
import org.elasticsearch.tasks.TaskId;
@@ -42,7 +43,7 @@
4243
public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScrollRequest<Self>> extends ActionRequest {
4344

4445
public static final int SIZE_ALL_MATCHES = -1;
45-
private static final TimeValue DEFAULT_SCROLL_TIMEOUT = timeValueMinutes(5);
46+
static final TimeValue DEFAULT_SCROLL_TIMEOUT = timeValueMinutes(5);
4647
static final int DEFAULT_SCROLL_SIZE = 1000;
4748

4849
public static final int AUTO_SLICES = 0;
@@ -341,6 +342,21 @@ public boolean getShouldStoreResult() {
341342
return shouldStoreResult;
342343
}
343344

345+
/**
346+
* Set scroll timeout for {@link SearchRequest}
347+
*/
348+
public Self setScroll(TimeValue keepAlive) {
349+
searchRequest.scroll(new Scroll(keepAlive));
350+
return self();
351+
}
352+
353+
/**
354+
* Get scroll timeout
355+
*/
356+
public TimeValue getScrollTime() {
357+
return searchRequest.scroll().keepAlive();
358+
}
359+
344360
/**
345361
* The number of slices this task should be divided into. Defaults to 1 meaning the task isn't sliced into subtasks.
346362
*/

docs/reference/docs/delete-by-query.asciidoc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ POST twitter/_delete_by_query?scroll_size=5000
142142
=== URL Parameters
143143

144144
In addition to the standard parameters like `pretty`, the Delete By Query API
145-
also supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, and `timeout`.
145+
also supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, `timeout`
146+
and `scroll`.
146147

147148
Sending the `refresh` will refresh all shards involved in the delete by query
148149
once the request completes. This is different than the Delete API's `refresh`
@@ -161,7 +162,9 @@ Elasticsearch can reclaim the space it uses.
161162
before proceeding with the request. See <<index-wait-for-active-shards,here>>
162163
for details. `timeout` controls how long each write request waits for unavailable
163164
shards to become available. Both work exactly how they work in the
164-
<<docs-bulk,Bulk API>>.
165+
<<docs-bulk,Bulk API>>. As `_delete_by_query` uses scroll search, you can also specify
166+
the `scroll` parameter to control how long it keeps the "search context" alive,
167+
eg `?scroll=10m`, by default it's 5 minutes.
165168

166169
`requests_per_second` can be set to any positive decimal number (`1.4`, `6`,
167170
`1000`, etc) and throttles rate at which `_delete_by_query` issues batches of

docs/reference/docs/reindex.asciidoc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,8 +512,8 @@ POST _reindex
512512
=== URL Parameters
513513

514514
In addition to the standard parameters like `pretty`, the Reindex API also
515-
supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, `timeout`, and
516-
`requests_per_second`.
515+
supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, `timeout`,
516+
`scroll` and `requests_per_second`.
517517

518518
Sending the `refresh` url parameter will cause all indexes to which the request
519519
wrote to be refreshed. This is different than the Index API's `refresh`
@@ -531,7 +531,9 @@ Elasticsearch can reclaim the space it uses.
531531
before proceeding with the reindexing. See <<index-wait-for-active-shards,here>>
532532
for details. `timeout` controls how long each write request waits for unavailable
533533
shards to become available. Both work exactly how they work in the
534-
<<docs-bulk,Bulk API>>.
534+
<<docs-bulk,Bulk API>>. As `_reindex` uses scroll search, you can also specify
535+
the `scroll` parameter to control how long it keeps the "search context" alive,
536+
eg `?scroll=10m`, by default it's 5 minutes.
535537

536538
`requests_per_second` can be set to any positive decimal number (`1.4`, `6`,
537539
`1000`, etc) and throttles rate at which reindex issues batches of index

docs/reference/docs/update-by-query.asciidoc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ POST twitter/_update_by_query?pipeline=set-foo
200200
=== URL Parameters
201201

202202
In addition to the standard parameters like `pretty`, the Update By Query API
203-
also supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, and `timeout`.
203+
also supports `refresh`, `wait_for_completion`, `wait_for_active_shards`, `timeout`
204+
and `scroll`.
204205

205206
Sending the `refresh` will update all shards in the index being updated when
206207
the request completes. This is different than the Index API's `refresh`
@@ -218,7 +219,9 @@ Elasticsearch can reclaim the space it uses.
218219
before proceeding with the request. See <<index-wait-for-active-shards,here>>
219220
for details. `timeout` controls how long each write request waits for unavailable
220221
shards to become available. Both work exactly how they work in the
221-
<<docs-bulk,Bulk API>>.
222+
<<docs-bulk,Bulk API>>. As `_update_by_query` uses scroll search, you can also specify
223+
the `scroll` parameter to control how long it keeps the "search context" alive,
224+
eg `?scroll=10m`, by default it's 5 minutes.
222225

223226
`requests_per_second` can be set to any positive decimal number (`1.4`, `6`,
224227
`1000`, etc) and throttles rate at which `_update_by_query` issues batches of

modules/reindex/src/main/java/org/elasticsearch/index/reindex/RestReindexAction.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ protected ReindexRequest buildRequest(RestRequest request) throws IOException {
119119
try (XContentParser parser = request.contentParser()) {
120120
PARSER.parse(parser, internal, null);
121121
}
122+
if (request.hasParam("scroll")) {
123+
internal.setScroll(parseTimeValue(request.param("scroll"), "scroll"));
124+
}
122125
return internal;
123126
}
124127

modules/reindex/src/test/java/org/elasticsearch/index/reindex/RestReindexActionTests.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121

2222
import org.elasticsearch.action.index.IndexRequest;
2323
import org.elasticsearch.action.search.SearchRequest;
24+
import org.elasticsearch.common.bytes.BytesArray;
2425
import org.elasticsearch.common.bytes.BytesReference;
2526
import org.elasticsearch.common.settings.Settings;
2627
import org.elasticsearch.common.xcontent.XContentBuilder;
2728
import org.elasticsearch.common.xcontent.XContentParser;
29+
import org.elasticsearch.common.xcontent.XContentType;
2830
import org.elasticsearch.common.xcontent.json.JsonXContent;
2931
import org.elasticsearch.rest.RestController;
3032
import org.elasticsearch.test.ESTestCase;
@@ -150,6 +152,24 @@ public void testPipelineQueryParameterIsError() throws IOException {
150152
assertEquals("_reindex doesn't support [pipeline] as a query parmaeter. Specify it in the [dest] object instead.", e.getMessage());
151153
}
152154

155+
public void testSetScrollTimeout() throws IOException {
156+
{
157+
RestReindexAction action = new RestReindexAction(Settings.EMPTY, mock(RestController.class));
158+
FakeRestRequest.Builder requestBuilder = new FakeRestRequest.Builder(xContentRegistry());
159+
requestBuilder.withContent(new BytesArray("{}"), XContentType.JSON);
160+
ReindexRequest request = action.buildRequest(requestBuilder.build());
161+
assertEquals(AbstractBulkByScrollRequest.DEFAULT_SCROLL_TIMEOUT, request.getScrollTime());
162+
}
163+
{
164+
RestReindexAction action = new RestReindexAction(Settings.EMPTY, mock(RestController.class));
165+
FakeRestRequest.Builder requestBuilder = new FakeRestRequest.Builder(xContentRegistry());
166+
requestBuilder.withParams(singletonMap("scroll", "10m"));
167+
requestBuilder.withContent(new BytesArray("{}"), XContentType.JSON);
168+
ReindexRequest request = action.buildRequest(requestBuilder.build());
169+
assertEquals("10m", request.getScrollTime().toString());
170+
}
171+
}
172+
153173
private RemoteInfo buildRemoteInfoHostTestCase(String hostInRest) throws IOException {
154174
Map<String, Object> remote = new HashMap<>();
155175
remote.put("host", hostInRest);

0 commit comments

Comments
 (0)