Skip to content

Commit 6ecb8ff

Browse files
authored
Move to Error step if ClusterState* steps throw (#35069)
Previously, if ClusterStateActionSteps or ClusterStateWaitSteps threw an exception executing, the exception would only be caught and logged by the generic ClusterStateUpdateTask machinery and the index would become stuck on that step. Now, exceptions thrown in these steps will be caught and the index will be moved to the Error step.
1 parent 18c72e8 commit 6ecb8ff

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/indexlifecycle/ExecuteStepsUpdateTask.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,11 @@ public ClusterState execute(final ClusterState currentState) throws IOException
9292
// move the cluster state to the next step
9393
logger.trace("[{}] performing cluster state action ({}) [{}], next: [{}]",
9494
index.getName(), currentStep.getClass().getSimpleName(), currentStep.getKey(), currentStep.getNextStepKey());
95-
state = ((ClusterStateActionStep) currentStep).performAction(index, state);
95+
try {
96+
state = ((ClusterStateActionStep) currentStep).performAction(index, state);
97+
} catch (Exception exception) {
98+
return moveToErrorStep(state, currentStep.getKey(), exception);
99+
}
96100
if (currentStep.getNextStepKey() == null) {
97101
return state;
98102
} else {
@@ -108,7 +112,12 @@ public ClusterState execute(final ClusterState currentState) throws IOException
108112
// condition again
109113
logger.trace("[{}] waiting for cluster state step condition ({}) [{}], next: [{}]",
110114
index.getName(), currentStep.getClass().getSimpleName(), currentStep.getKey(), currentStep.getNextStepKey());
111-
ClusterStateWaitStep.Result result = ((ClusterStateWaitStep) currentStep).isConditionMet(index, state);
115+
ClusterStateWaitStep.Result result;
116+
try {
117+
result = ((ClusterStateWaitStep) currentStep).isConditionMet(index, state);
118+
} catch (Exception exception) {
119+
return moveToErrorStep(state, currentStep.getKey(), exception);
120+
}
112121
if (result.isComplete()) {
113122
logger.trace("[{}] cluster state step condition met successfully ({}) [{}], moving to next step {}",
114123
index.getName(), currentStep.getClass().getSimpleName(), currentStep.getKey(), currentStep.getNextStepKey());
@@ -172,4 +181,12 @@ public void onFailure(String source, Exception e) {
172181
throw new ElasticsearchException(
173182
"policy [" + policy + "] for index [" + index.getName() + "] failed on step [" + startStep.getKey() + "].", e);
174183
}
184+
185+
private ClusterState moveToErrorStep(final ClusterState state, Step.StepKey currentStepKey, Exception cause) throws IOException {
186+
logger.error("policy [{}] for index [{}] failed on cluster state step [{}]. Moving to ERROR step", policy, index.getName(),
187+
currentStepKey);
188+
MoveToErrorStepUpdateTask moveToErrorStepUpdateTask = new MoveToErrorStepUpdateTask(index, policy, currentStepKey, cause,
189+
nowSupplier);
190+
return moveToErrorStepUpdateTask.execute(state);
191+
}
175192
}

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/indexlifecycle/ExecuteStepsUpdateTaskTests.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.index.Index;
2323
import org.elasticsearch.node.Node;
2424
import org.elasticsearch.test.ESTestCase;
25+
import org.elasticsearch.xpack.core.indexlifecycle.ErrorStep;
2526
import org.elasticsearch.xpack.core.indexlifecycle.LifecycleExecutionState;
2627
import org.elasticsearch.xpack.core.indexlifecycle.IndexLifecycleMetadata;
2728
import org.elasticsearch.xpack.core.indexlifecycle.LifecyclePolicy;
@@ -253,6 +254,44 @@ public void testOnFailure() throws IOException {
253254
assertSame(expectedException, exception.getCause());
254255
}
255256

257+
public void testClusterActionStepThrowsException() throws IOException {
258+
RuntimeException thrownException = new RuntimeException("error");
259+
firstStep.setException(thrownException);
260+
setStateToKey(firstStepKey);
261+
Step startStep = policyStepsRegistry.getStep(indexMetaData, firstStepKey);
262+
long now = randomNonNegativeLong();
263+
ExecuteStepsUpdateTask task = new ExecuteStepsUpdateTask(mixedPolicyName, index, startStep, policyStepsRegistry, null, () -> now);
264+
ClusterState newState = task.execute(clusterState);
265+
LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(newState.getMetaData().index(index));
266+
StepKey currentStepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
267+
assertThat(currentStepKey, equalTo(new StepKey(firstStepKey.getPhase(), firstStepKey.getAction(), ErrorStep.NAME)));
268+
assertThat(firstStep.getExecuteCount(), equalTo(1L));
269+
assertThat(secondStep.getExecuteCount(), equalTo(0L));
270+
assertThat(task.getNextStepKey(), equalTo(secondStep.getKey()));
271+
assertThat(lifecycleState.getPhaseTime(), nullValue());
272+
assertThat(lifecycleState.getActionTime(), nullValue());
273+
assertThat(lifecycleState.getStepInfo(), equalTo("{\"type\":\"runtime_exception\",\"reason\":\"error\"}"));
274+
}
275+
276+
public void testClusterWaitStepThrowsException() throws IOException {
277+
RuntimeException thrownException = new RuntimeException("error");
278+
secondStep.setException(thrownException);
279+
setStateToKey(firstStepKey);
280+
Step startStep = policyStepsRegistry.getStep(indexMetaData, firstStepKey);
281+
long now = randomNonNegativeLong();
282+
ExecuteStepsUpdateTask task = new ExecuteStepsUpdateTask(mixedPolicyName, index, startStep, policyStepsRegistry, null, () -> now);
283+
ClusterState newState = task.execute(clusterState);
284+
LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(newState.getMetaData().index(index));
285+
StepKey currentStepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
286+
assertThat(currentStepKey, equalTo(new StepKey(firstStepKey.getPhase(), firstStepKey.getAction(), ErrorStep.NAME)));
287+
assertThat(firstStep.getExecuteCount(), equalTo(1L));
288+
assertThat(secondStep.getExecuteCount(), equalTo(1L));
289+
assertThat(task.getNextStepKey(), equalTo(thirdStepKey));
290+
assertThat(lifecycleState.getPhaseTime(), nullValue());
291+
assertThat(lifecycleState.getActionTime(), nullValue());
292+
assertThat(lifecycleState.getStepInfo(), equalTo("{\"type\":\"runtime_exception\",\"reason\":\"error\"}"));
293+
}
294+
256295
private void setStateToKey(StepKey stepKey) throws IOException {
257296
LifecycleExecutionState.Builder lifecycleState = LifecycleExecutionState.builder(
258297
LifecycleExecutionState.fromIndexMetadata(clusterState.getMetaData().index(index)));

0 commit comments

Comments
 (0)