Skip to content

Commit a57f929

Browse files
author
Hendrik Muhs
authored
[7.17][Transform] Prevent stopping of transforms due to threadpool limitation (#83539)
remove the indexer threadpool and use the generic threadpool instead(The indexer threadpool was only used on start) fixes #81796 backport #81912
1 parent b3b99ee commit a57f929

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/Transform.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@
5151
import org.elasticsearch.rest.RestHandler;
5252
import org.elasticsearch.rest.RestStatus;
5353
import org.elasticsearch.script.ScriptService;
54-
import org.elasticsearch.threadpool.ExecutorBuilder;
55-
import org.elasticsearch.threadpool.FixedExecutorBuilder;
5654
import org.elasticsearch.threadpool.ThreadPool;
5755
import org.elasticsearch.watcher.ResourceWatcherService;
5856
import org.elasticsearch.xcontent.NamedXContentRegistry;
@@ -147,7 +145,6 @@
147145
public class Transform extends Plugin implements SystemIndexPlugin, PersistentTaskPlugin {
148146

149147
public static final String NAME = "transform";
150-
public static final String TASK_THREAD_POOL_NAME = "transform_indexing";
151148

152149
private static final Logger logger = LogManager.getLogger(Transform.class);
153150

@@ -289,17 +286,6 @@ public List<RestHandler> getRestHandlers(
289286
);
290287
}
291288

292-
@Override
293-
public List<ExecutorBuilder<?>> getExecutorBuilders(Settings settingsToUse) {
294-
if (transportClientMode) {
295-
return emptyList();
296-
}
297-
298-
FixedExecutorBuilder indexing = new FixedExecutorBuilder(settingsToUse, TASK_THREAD_POOL_NAME, 4, 4, "transform.task_thread_pool");
299-
300-
return Collections.singletonList(indexing);
301-
}
302-
303289
@Override
304290
public Collection<Object> createComponents(
305291
Client client,

x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformIndexer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ protected IterationResult<TransformIndexerPosition> doProcess(SearchResponse sea
570570

571571
@Override
572572
public synchronized boolean maybeTriggerAsyncJob(long now) {
573+
// threadpool: trigger_engine_scheduler if triggered from the scheduler, generic if called from the task on start
573574
if (context.getTaskState() == TransformTaskState.FAILED) {
574575
logger.debug("[{}] schedule was triggered for transform but task is failed. Ignoring trigger.", getJobId());
575576
return false;

x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformPersistentTasksExecutor.java

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public TransformPersistentTasksExecutor(
8383
Settings settings,
8484
IndexNameExpressionResolver resolver
8585
) {
86-
super(TransformField.TASK_NAME, Transform.TASK_THREAD_POOL_NAME);
86+
super(TransformField.TASK_NAME, ThreadPool.Names.GENERIC);
8787
this.client = client;
8888
this.transformServices = transformServices;
8989
this.threadPool = threadPool;
@@ -100,6 +100,13 @@ public PersistentTasksCustomMetadata.Assignment getAssignment(
100100
Collection<DiscoveryNode> candidateNodes,
101101
ClusterState clusterState
102102
) {
103+
/* Note:
104+
*
105+
* This method is executed on the _master_ node. The master and transform node might be on a different version.
106+
* Therefore certain checks must happen on the corresponding node, e.g. the existence of the internal index.
107+
*
108+
* Operations on the transform node happen in {@link #nodeOperation()}
109+
*/
103110
if (TransformMetadata.getTransformMetadata(clusterState).isResetMode()) {
104111
return new PersistentTasksCustomMetadata.Assignment(
105112
null,
@@ -177,6 +184,12 @@ static List<String> verifyIndicesPrimaryShardsAreActive(ClusterState clusterStat
177184

178185
@Override
179186
protected void nodeOperation(AllocatedPersistentTask task, @Nullable TransformTaskParams params, PersistentTaskState state) {
187+
/* Note:
188+
*
189+
* This method is executed on the _transform_ node. The master and transform node might be on a different version.
190+
* Operations on master happen in {@link #getAssignment()}
191+
*/
192+
180193
final String transformId = params.getId();
181194
final TransformTask buildTask = (TransformTask) task;
182195
// NOTE: TransformPersistentTasksExecutor#createTask pulls in the stored task state from the ClusterState when the object
@@ -205,6 +218,7 @@ protected void nodeOperation(AllocatedPersistentTask task, @Nullable TransformTa
205218

206219
// <6> load next checkpoint
207220
ActionListener<TransformCheckpoint> getTransformNextCheckpointListener = ActionListener.wrap(nextCheckpoint -> {
221+
// threadpool: system_read
208222

209223
if (nextCheckpoint.isEmpty()) {
210224
// extra safety: reset position and progress if next checkpoint is empty
@@ -228,8 +242,9 @@ protected void nodeOperation(AllocatedPersistentTask task, @Nullable TransformTa
228242

229243
// <5> load last checkpoint
230244
ActionListener<TransformCheckpoint> getTransformLastCheckpointListener = ActionListener.wrap(lastCheckpoint -> {
231-
indexerBuilder.setLastCheckpoint(lastCheckpoint);
245+
// threadpool: system_read
232246

247+
indexerBuilder.setLastCheckpoint(lastCheckpoint);
233248
logger.trace("[{}] Loaded last checkpoint [{}], looking for next checkpoint", transformId, lastCheckpoint.getCheckpoint());
234249
transformServices.getConfigManager()
235250
.getTransformCheckpoint(transformId, lastCheckpoint.getCheckpoint() + 1, getTransformNextCheckpointListener);
@@ -244,6 +259,8 @@ protected void nodeOperation(AllocatedPersistentTask task, @Nullable TransformTa
244259
// Schedule execution regardless
245260
ActionListener<Tuple<TransformStoredDoc, SeqNoPrimaryTermAndIndex>> transformStatsActionListener = ActionListener.wrap(
246261
stateAndStatsAndSeqNoPrimaryTermAndIndex -> {
262+
// threadpool: system_read
263+
247264
TransformStoredDoc stateAndStats = stateAndStatsAndSeqNoPrimaryTermAndIndex.v1();
248265
SeqNoPrimaryTermAndIndex seqNoPrimaryTermAndIndex = stateAndStatsAndSeqNoPrimaryTermAndIndex.v2();
249266
// Since we have not set the value for this yet, it SHOULD be null
@@ -289,6 +306,7 @@ protected void nodeOperation(AllocatedPersistentTask task, @Nullable TransformTa
289306

290307
// <3> Validate the transform, assigning it to the indexer, and get the previous stats (if they exist)
291308
ActionListener<TransformConfig> getTransformConfigListener = ActionListener.wrap(config -> {
309+
// threadpool: system_read
292310
ValidationException validationException = config.validate(null);
293311
if (validationException == null) {
294312
indexerBuilder.setTransformConfig(config);
@@ -371,9 +389,12 @@ private void startTask(
371389
Long previousCheckpoint,
372390
ActionListener<StartTransformAction.Response> listener
373391
) {
374-
buildTask.initializeIndexer(indexerBuilder);
375-
// TransformTask#start will fail if the task state is FAILED
376-
buildTask.setNumFailureRetries(numFailureRetries).start(previousCheckpoint, listener);
392+
// switch the threadpool to generic, because the caller is on the system_read threadpool
393+
threadPool.executor(ThreadPool.Names.GENERIC).execute(() -> {
394+
buildTask.initializeIndexer(indexerBuilder);
395+
// TransformTask#start will fail if the task state is FAILED
396+
buildTask.setNumFailureRetries(numFailureRetries).start(previousCheckpoint, listener);
397+
});
377398
}
378399

379400
private void setNumFailureRetries(int numFailureRetries) {

0 commit comments

Comments
 (0)