Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
647eb22
Write state also on data nodes if not master eligible
brwe Mar 2, 2015
0f5afcd
folder is deleted now, no need to clean up metadata
brwe Mar 5, 2015
aa66bb9
Revert changes to InternalTestCluster
brwe Mar 5, 2015
dd28a5d
add artifial delay to start in DiscoveryService and remove ClusterCha…
brwe Mar 6, 2015
39359a6
remove artificial delay
brwe Mar 9, 2015
a3a9831
unit test for data only node write and refactor similar to #10016
brwe Mar 9, 2015
a72a297
format
brwe Mar 15, 2015
c680f84
make protected and final and move to end of file
brwe Mar 15, 2015
d15e732
empty list if not maser or dta
brwe Mar 15, 2015
c461fe6
rename
brwe Mar 15, 2015
18cf4e1
cleanup
brwe Mar 15, 2015
4e25695
add java docs
brwe Mar 15, 2015
8652750
move to GatewayMetaState
brwe Mar 15, 2015
8b58004
cleanup tests a little
brwe Mar 17, 2015
a792ad6
more cleanup
brwe Mar 17, 2015
a4baa80
cleanup
brwe Mar 17, 2015
9c1df62
remove check state on disk.
brwe Mar 18, 2015
c394971
check version first in order to avoid writing several times for each …
brwe Mar 18, 2015
e7994b3
remove superfluous == true/false
brwe Mar 18, 2015
bf906fe
pass ClusterState instead of ClusterStateEvent
brwe Mar 18, 2015
32dc21a
Revert "check version first in order to avoid writing several times f…
brwe Mar 18, 2015
82505cf
only write if the shard is actually new and there were no shards allo…
brwe Mar 19, 2015
aeedb29
better name for in memory meta data
brwe Mar 30, 2015
92d1f40
maintain list of indices that we wrote
brwe Mar 31, 2015
ab90dbe
add comments
brwe Apr 1, 2015
5770828
exception if cluster state inconsistent
brwe Apr 1, 2015
f88e821
check on disk if there is a shard written already for a closed index
brwe Apr 3, 2015
7c44299
Set -> ImmutableSet
brwe Apr 29, 2015
5cb39b8
simplify iteration
brwe Apr 29, 2015
7569a51
rename
brwe Apr 29, 2015
d2abcfa
don't delete indices if master is new
brwe Apr 3, 2015
9f6f0e1
make method private
brwe May 4, 2015
06d2b59
only gather closed indices list when previousMetaData == null not whe…
brwe May 4, 2015
8e8f8d1
add comment on ClusterChangedEvent and also //norelease
brwe May 4, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/main/java/org/elasticsearch/cluster/ClusterChangedEvent.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,14 @@ public List<String> indicesCreated() {
* Returns the indices deleted in this event
*/
public List<String> indicesDeleted() {
if (previousState == null) {

// if the new cluster state has a new master then we cannot know if an index which is not in the cluster state
// is actually supposed to be deleted or imported as dangling instead. for example a new master might not have
// the index in its cluster state because it was started with an empty data folder and in this case we want to
// import as dangling. we check here for new master too to be on the safe side in this case.
// norelease because we are not sure this is actually a good solution
// See discussion on https://github.com/elastic/elasticsearch/pull/9952
if (hasNewMaster() || previousState == null) {
return ImmutableList.of();
}
if (!metaDataChanged()) {
Expand Down Expand Up @@ -165,4 +172,23 @@ public boolean nodesAdded() {
public boolean nodesChanged() {
return nodesRemoved() || nodesAdded();
}

/**
* Checks if this cluster state comes from a different master than the previous one.
* This is a workaround for the scenario where a node misses a cluster state that has either
* no master block or state not recovered flag set. In this case we must make sure that
* if an index is missing from the cluster state is not deleted immediately but instead imported
* as dangling. See discussion on https://github.com/elastic/elasticsearch/pull/9952
*/
private boolean hasNewMaster() {
String oldMaster = previousState().getNodes().masterNodeId();
String newMaster = state().getNodes().masterNodeId();
if (oldMaster == null && newMaster == null) {
return false;
}
if (oldMaster == null && newMaster != null) {
return true;
}
return oldMaster.equals(newMaster) == false;
}
}
178 changes: 142 additions & 36 deletions src/main/java/org/elasticsearch/gateway/GatewayMetaState.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.gateway;

import com.google.common.collect.ImmutableSet;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterChangedEvent;
Expand All @@ -27,9 +28,7 @@
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.DjbHashFunction;
import org.elasticsearch.cluster.routing.HashFunction;
import org.elasticsearch.cluster.routing.SimpleHashFunction;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
Expand All @@ -43,6 +42,7 @@
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

/**
*
Expand All @@ -57,7 +57,9 @@ public class GatewayMetaState extends AbstractComponent implements ClusterStateL
private final DanglingIndicesState danglingIndicesState;

@Nullable
private volatile MetaData currentMetaData;
private volatile MetaData previousMetaData;

private volatile ImmutableSet<String> previouslyWrittenIndices = ImmutableSet.of();

@Inject
public GatewayMetaState(Settings settings, NodeEnvironment nodeEnv, MetaStateService metaStateService,
Expand All @@ -76,7 +78,7 @@ public GatewayMetaState(Settings settings, NodeEnvironment nodeEnv, MetaStateSer
if (DiscoveryNode.masterNode(settings) || DiscoveryNode.dataNode(settings)) {
nodeEnv.ensureAtomicMoveSupported();
}
if (DiscoveryNode.masterNode(settings)) {
if (DiscoveryNode.masterNode(settings) || DiscoveryNode.dataNode(settings)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering until when do we need the BWC bellow? (not saying remove now)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pre20Upgrade() we should still do because this change is going to 1.x as well. But we probably need not check on data nodes for pre 019 state. Would it harm to keep this check or should I explicitly only check pre 019 on master nodes and pre 20 on master and data nodes?

try {
ensureNoPre019State();
pre20Upgrade();
Expand All @@ -96,55 +98,62 @@ public MetaData loadMetaState() throws Exception {

@Override
public void clusterChanged(ClusterChangedEvent event) {
Set<String> relevantIndices = new HashSet<>();
final ClusterState state = event.state();
if (state.blocks().disableStatePersistence()) {
// reset the current metadata, we need to start fresh...
this.currentMetaData = null;
this.previousMetaData = null;
previouslyWrittenIndices = ImmutableSet.of();
return;
}

MetaData newMetaData = state.metaData();
// we don't check if metaData changed, since we might be called several times and we need to check dangling...

boolean success = true;
// only applied to master node, writing the global and index level states
if (state.nodes().localNode().masterNode()) {
// write the state if this node is a master eligible node or if it is a data node and has shards allocated on it
if (state.nodes().localNode().masterNode() || state.nodes().localNode().dataNode()) {
if (previousMetaData == null) {
try {
// we determine if or if not we write meta data on data only nodes by looking at the shard routing
// and only write if a shard of this index is allocated on this node
// however, closed indices do not appear in the shard routing. if the meta data for a closed index is
// updated it will therefore not be written in case the list of previouslyWrittenIndices is empty (because state
// persistence was disabled or the node was restarted), see getRelevantIndicesOnDataOnlyNode().
// we therefore have to check here if we have shards on disk and add their indices to the previouslyWrittenIndices list
if (isDataOnlyNode(state)) {
ImmutableSet.Builder<String> previouslyWrittenIndicesBuilder = ImmutableSet.builder();
for (IndexMetaData indexMetaData : newMetaData) {
IndexMetaData indexMetaDataOnDisk = null;
if (indexMetaData.state().equals(IndexMetaData.State.CLOSE)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we skip this loading from disk? I made the change so that we still load (see maybeLoadIndexState() below) if the meta data is not in memory but fail to understand why we do it. Is it an optimization?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as far as I am concerned we could just skip it since we write the new one anyway. Yet, we might have a new meta on disk but that would be a bug too no? so I think we can drop it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, removed this check now.

indexMetaDataOnDisk = metaStateService.loadIndexState(indexMetaData.index());
}
if (indexMetaDataOnDisk != null) {
previouslyWrittenIndicesBuilder.add(indexMetaDataOnDisk.index());
}
}
previouslyWrittenIndices = previouslyWrittenIndicesBuilder.addAll(previouslyWrittenIndices).build();
}
} catch (Throwable e) {
success = false;
}
}
// check if the global state changed?
if (currentMetaData == null || !MetaData.isGlobalStateEquals(currentMetaData, newMetaData)) {
if (previousMetaData == null || !MetaData.isGlobalStateEquals(previousMetaData, newMetaData)) {
try {
metaStateService.writeGlobalState("changed", newMetaData);
} catch (Throwable e) {
success = false;
}
}

Iterable<IndexMetaWriteInfo> writeInfo;
relevantIndices = getRelevantIndices(event.state(), previouslyWrittenIndices);
writeInfo = resolveStatesToBeWritten(previouslyWrittenIndices, relevantIndices, previousMetaData, event.state().metaData());
// check and write changes in indices
for (IndexMetaData indexMetaData : newMetaData) {
String writeReason = null;
IndexMetaData currentIndexMetaData;
if (currentMetaData == null) {
// a new event..., check from the state stored
try {
currentIndexMetaData = metaStateService.loadIndexState(indexMetaData.index());
} catch (IOException ex) {
throw new ElasticsearchException("failed to load index state", ex);
}
} else {
currentIndexMetaData = currentMetaData.index(indexMetaData.index());
}
if (currentIndexMetaData == null) {
writeReason = "freshly created";
} else if (currentIndexMetaData.version() != indexMetaData.version()) {
writeReason = "version changed from [" + currentIndexMetaData.version() + "] to [" + indexMetaData.version() + "]";
}

// we update the writeReason only if we really need to write it
if (writeReason == null) {
continue;
}

for (IndexMetaWriteInfo indexMetaWrite : writeInfo) {
try {
metaStateService.writeIndex(writeReason, indexMetaData, currentIndexMetaData);
metaStateService.writeIndex(indexMetaWrite.reason, indexMetaWrite.newMetaData, indexMetaWrite.previousMetaData);
} catch (Throwable e) {
success = false;
}
Expand All @@ -154,8 +163,27 @@ public void clusterChanged(ClusterChangedEvent event) {
danglingIndicesState.processDanglingIndices(newMetaData);

if (success) {
currentMetaData = newMetaData;
previousMetaData = newMetaData;
ImmutableSet.Builder<String> builder = ImmutableSet.builder();
previouslyWrittenIndices = builder.addAll(relevantIndices).build();
}
}

public static Set<String> getRelevantIndices(ClusterState state, ImmutableSet<String> previouslyWrittenIndices) {
Set<String> relevantIndices;
if (isDataOnlyNode(state)) {
relevantIndices = getRelevantIndicesOnDataOnlyNode(state, previouslyWrittenIndices);
} else if (state.nodes().localNode().masterNode() == true) {
relevantIndices = getRelevantIndicesForMasterEligibleNode(state);
} else {
relevantIndices = Collections.emptySet();
}
return relevantIndices;
}


protected static boolean isDataOnlyNode(ClusterState state) {
return ((state.nodes().localNode().masterNode() == false) && state.nodes().localNode().dataNode());
}

/**
Expand Down Expand Up @@ -229,7 +257,7 @@ private void pre20Upgrade() throws Exception {
}
}
}
if (hasCustomPre20HashFunction|| pre20UseType != null) {
if (hasCustomPre20HashFunction || pre20UseType != null) {
logger.warn("Settings [{}] and [{}] are deprecated. Index settings from your old indices have been updated to record the fact that they "
+ "used some custom routing logic, you can now remove these settings from your `elasticsearch.yml` file", DEPRECATED_SETTING_ROUTING_HASH_FUNCTION, DEPRECATED_SETTING_ROUTING_USE_TYPE);
}
Expand All @@ -251,4 +279,82 @@ private void ensureNoPre019ShardState(NodeEnvironment nodeEnv) throws Exception
}
}
}

/**
* Loads the current meta state for each index in the new cluster state and checks if it has to be persisted.
* Each index state that should be written to disk will be returned. This is only run for data only nodes.
* It will return only the states for indices that actually have a shard allocated on the current node.
*
* @param previouslyWrittenIndices A list of indices for which the state was already written before
* @param potentiallyUnwrittenIndices The list of indices for which state should potentially be written
* @param previousMetaData The last meta data we know of. meta data for all indices in previouslyWrittenIndices list is persisted now
* @param newMetaData The new metadata
* @return iterable over all indices states that should be written to disk
*/
public static Iterable<GatewayMetaState.IndexMetaWriteInfo> resolveStatesToBeWritten(ImmutableSet<String> previouslyWrittenIndices, Set<String> potentiallyUnwrittenIndices, MetaData previousMetaData, MetaData newMetaData) {
List<GatewayMetaState.IndexMetaWriteInfo> indicesToWrite = new ArrayList<>();
for (String index : potentiallyUnwrittenIndices) {
IndexMetaData newIndexMetaData = newMetaData.index(index);
IndexMetaData previousIndexMetaData = previousMetaData == null ? null : previousMetaData.index(index);
String writeReason = null;
if (previouslyWrittenIndices.contains(index) == false || previousIndexMetaData == null) {
writeReason = "freshly created";
} else if (previousIndexMetaData.version() != newIndexMetaData.version()) {
writeReason = "version changed from [" + previousIndexMetaData.version() + "] to [" + newIndexMetaData.version() + "]";
}
if (writeReason != null) {
indicesToWrite.add(new GatewayMetaState.IndexMetaWriteInfo(newIndexMetaData, previousIndexMetaData, writeReason));
}
}
return indicesToWrite;
}

public static Set<String> getRelevantIndicesOnDataOnlyNode(ClusterState state, ImmutableSet<String> previouslyWrittenIndices) {
RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().localNodeId());
if (newRoutingNode == null) {
throw new IllegalStateException("cluster state does not contain this node - cannot write index meta state");
}
Set<String> indices = new HashSet<>();
for (MutableShardRouting routing : newRoutingNode) {
indices.add(routing.index());
}
// we have to check the meta data also: closed indices will not appear in the routing table, but we must still write the state if we have it written on disk previously
for (IndexMetaData indexMetaData : state.metaData()) {
if (previouslyWrittenIndices.contains(indexMetaData.getIndex()) && state.metaData().getIndices().get(indexMetaData.getIndex()).state().equals(IndexMetaData.State.CLOSE)) {
indices.add(indexMetaData.getIndex());
}
}
return indices;
}

public static Set<String> getRelevantIndicesForMasterEligibleNode(ClusterState state) {
Set<String> relevantIndices;
relevantIndices = new HashSet<>();
// we have to iterate over the metadata to make sure we also capture closed indices
for (IndexMetaData indexMetaData : state.metaData()) {
relevantIndices.add(indexMetaData.getIndex());
}
return relevantIndices;
}


public static class IndexMetaWriteInfo {
final IndexMetaData newMetaData;
final String reason;
final IndexMetaData previousMetaData;

public IndexMetaWriteInfo(IndexMetaData newMetaData, IndexMetaData previousMetaData, String reason) {
this.newMetaData = newMetaData;
this.reason = reason;
this.previousMetaData = previousMetaData;
}

public IndexMetaData getNewMetaData() {
return newMetaData;
}

public String getReason() {
return reason;
}
}
}
Loading