Skip to content

[ILM] Rollover action errors after restart #34465

@colings86

Description

@colings86

4 node cluster with 1 hot, 2 warm and 1 cold node

  1. Create a policy:
PUT _ilm/my_lifecycle3
{
  "policy": {
    "phases": {
      "hot": {
        "actions": {
          "rollover": {
            "max_age": "30s"
          }
        }
      },
      "warm": {
        "actions": {
          "forcemerge": {
            "max_num_segments": 1
          },
          "allocate": {
            "number_of_replicas": 1,
            "include": {
              "box_type": "warm"
            },
            "exclude": {},
            "require": {}
          }
        }
      },
      "cold": {
        "minimum_age": "1m",
        "actions": {
          "allocate": {
            "number_of_replicas": 0,
            "include": {
              "box_type": "cold"
            },
            "exclude": {},
            "require": {}
          }
        }
      },
      "delete": {
        "minimum_age": "2m",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}
  1. Create an index template:
PUT _template/my_template
{
  "index_patterns": ["test-*"],
  "settings": {
    "number_of_shards": 2,
    "number_of_replicas": 0,
    "index.lifecycle.name": "my_lifecycle3",
    "index.lifecycle.rollover_alias": "test-alias",
    "index.routing.allocation.include.box_type": "hot"
  }
}
  1. create the first index:
PUT test-000001
{
  "aliases": {
    "test-alias":{
      "is_write_index": true
    }
  }
}
  1. Check the index is on the attempt_rollover step using the explain api:
GET test-*/_ilm/explain?human
  1. Shutdown all nodes

  2. Restart all nodes

  3. Wait for the 2nd index to appear using the explain API:

GET test-*/_ilm/explain?human
  1. Observe that the first index is in the ERROR state with a response something like the following
{
  "indices": {
    "test-000001": {
      "index": "test-000001",
      "managed": true,
      "policy": "my_lifecycle3",
      "skip": false,
      "lifecycle_date": "2018-10-15T14:49:32.281Z",
      "phase": "hot",
      "phase_time": "2018-10-15T14:49:32.531Z",
      "action": "rollover",
      "action_time": "2018-10-15T14:49:32.531Z",
      "step": "ERROR",
      "step_time": "2018-10-15T14:50:18.632Z",
      "failed_step": "attempt_rollover",
      "step_info": {
        "type": "resource_already_exists_exception",
        "reason": "index [test-000002/cRr06akcS4mJXfZniAFfGQ] already exists",
        "index_uuid": "cRr06akcS4mJXfZniAFfGQ",
        "index": "test-000002"
      },
      "phase_execution": {
        "policy": "my_lifecycle3",
        "phase_definition": {
          "minimum_age": "0ms",
          "actions": {
            "rollover": {
              "max_age": "30s"
            }
          }
        },
        "version": 1,
        "modified_date": "2018-10-15T14:49:24.641Z",
        "modified_date_in_millis": 1539614964641
      }
    },
    "test-000002": {
      "index": "test-000002",
      "managed": true,
      "policy": "my_lifecycle3",
      "skip": false,
      "lifecycle_date": "2018-10-15T14:50:17.338Z",
      "phase": "new",
      "phase_time": "2018-10-15T14:50:18.725Z",
      "action": "complete",
      "action_time": "2018-10-15T14:50:18.384Z",
      "step": "complete",
      "step_time": "2018-10-15T14:50:18.384Z"
    }
  }
}

It seems like in this scenario the rollover step might be getting run twice?

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions