Skip to content

Commit 7dc02c5

Browse files
Avoid needless index metadata builders during reroute (#88506)
This set of changes makes `org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater#applyChanges` essentially free even in clusters of O(100k) indices compared to using a disproportionately increasing amount of CPU as the cluster grows (about 1% of CPU time while bootstrapping many shards at 25k indices benchmarks and increasing from there). It also appears to have additional benefits end-to-end in those benchmarks, likely as a result of making diffing metadata cheaper by retaining more instance equality across the board. relates #77466
1 parent 4efc09c commit 7dc02c5

File tree

4 files changed

+303
-91
lines changed

4 files changed

+303
-91
lines changed

server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java

Lines changed: 217 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,8 @@ private IndexMetadata(
608608
final boolean isPartialSearchableSnapshot,
609609
@Nullable final IndexMode indexMode,
610610
@Nullable final Instant timeSeriesStart,
611-
@Nullable final Instant timeSeriesEnd
611+
@Nullable final Instant timeSeriesEnd,
612+
final Version indexCompatibilityVersion
612613
) {
613614
this.index = index;
614615
this.version = version;
@@ -654,7 +655,8 @@ private IndexMetadata(
654655
this.autoExpandReplicas = autoExpandReplicas;
655656
this.isSearchableSnapshot = isSearchableSnapshot;
656657
this.isPartialSearchableSnapshot = isPartialSearchableSnapshot;
657-
this.indexCompatibilityVersion = SETTING_INDEX_VERSION_COMPATIBILITY.get(settings);
658+
this.indexCompatibilityVersion = indexCompatibilityVersion;
659+
assert indexCompatibilityVersion.equals(SETTING_INDEX_VERSION_COMPATIBILITY.get(settings));
658660
this.indexMode = indexMode;
659661
this.timeSeriesStart = timeSeriesStart;
660662
this.timeSeriesEnd = timeSeriesEnd;
@@ -705,28 +707,227 @@ IndexMetadata withMappingMetadata(MappingMetadata mapping) {
705707
this.isPartialSearchableSnapshot,
706708
this.indexMode,
707709
this.timeSeriesStart,
708-
this.timeSeriesEnd
710+
this.timeSeriesEnd,
711+
this.indexCompatibilityVersion
709712
);
710713
}
711714

712-
public Index getIndex() {
713-
return index;
715+
/**
716+
* Copy constructor that sets the in-sync allocation ids for the specified shard.
717+
* @param shardId shard id to set in-sync allocation ids for
718+
* @param inSyncSet new in-sync allocation ids
719+
* @return updated instance
720+
*/
721+
public IndexMetadata withInSyncAllocationIds(int shardId, Set<String> inSyncSet) {
722+
if (inSyncSet.equals(inSyncAllocationIds.get(shardId))) {
723+
return this;
724+
}
725+
return new IndexMetadata(
726+
this.index,
727+
this.version,
728+
this.mappingVersion,
729+
this.settingsVersion,
730+
this.aliasesVersion,
731+
this.primaryTerms,
732+
this.state,
733+
this.numberOfShards,
734+
this.numberOfReplicas,
735+
this.settings,
736+
this.mapping,
737+
this.aliases,
738+
this.customData,
739+
Maps.copyMapWithAddedOrReplacedEntry(this.inSyncAllocationIds, shardId, Set.copyOf(inSyncSet)),
740+
this.requireFilters,
741+
this.initialRecoveryFilters,
742+
this.includeFilters,
743+
this.excludeFilters,
744+
this.indexCreatedVersion,
745+
this.routingNumShards,
746+
this.routingPartitionSize,
747+
this.routingPaths,
748+
this.waitForActiveShards,
749+
this.rolloverInfos,
750+
this.isSystem,
751+
this.isHidden,
752+
this.timestampRange,
753+
this.priority,
754+
this.creationDate,
755+
this.ignoreDiskWatermarks,
756+
this.tierPreference,
757+
this.shardsPerNodeLimit,
758+
this.lifecyclePolicyName,
759+
this.lifecycleExecutionState,
760+
this.autoExpandReplicas,
761+
this.isSearchableSnapshot,
762+
this.isPartialSearchableSnapshot,
763+
this.indexMode,
764+
this.timeSeriesStart,
765+
this.timeSeriesEnd,
766+
this.indexCompatibilityVersion
767+
);
714768
}
715769

716-
public String getIndexUUID() {
717-
return index.getUUID();
770+
/**
771+
* Creates a copy of this instance that has the primary term for the given shard id incremented.
772+
* @param shardId shard id to increment primary term for
773+
* @return updated instance with incremented primary term
774+
*/
775+
public IndexMetadata withIncrementedPrimaryTerm(int shardId) {
776+
final long[] incremented = this.primaryTerms.clone();
777+
incremented[shardId]++;
778+
return new IndexMetadata(
779+
this.index,
780+
this.version,
781+
this.mappingVersion,
782+
this.settingsVersion,
783+
this.aliasesVersion,
784+
incremented,
785+
this.state,
786+
this.numberOfShards,
787+
this.numberOfReplicas,
788+
this.settings,
789+
this.mapping,
790+
this.aliases,
791+
this.customData,
792+
this.inSyncAllocationIds,
793+
this.requireFilters,
794+
this.initialRecoveryFilters,
795+
this.includeFilters,
796+
this.excludeFilters,
797+
this.indexCreatedVersion,
798+
this.routingNumShards,
799+
this.routingPartitionSize,
800+
this.routingPaths,
801+
this.waitForActiveShards,
802+
this.rolloverInfos,
803+
this.isSystem,
804+
this.isHidden,
805+
this.timestampRange,
806+
this.priority,
807+
this.creationDate,
808+
this.ignoreDiskWatermarks,
809+
this.tierPreference,
810+
this.shardsPerNodeLimit,
811+
this.lifecyclePolicyName,
812+
this.lifecycleExecutionState,
813+
this.autoExpandReplicas,
814+
this.isSearchableSnapshot,
815+
this.isPartialSearchableSnapshot,
816+
this.indexMode,
817+
this.timeSeriesStart,
818+
this.timeSeriesEnd,
819+
this.indexCompatibilityVersion
820+
);
718821
}
719822

720823
/**
721-
* Test whether the current index UUID is the same as the given one. Returns true if either are _na_
824+
* @param timestampRange new timestamp range
825+
* @return copy of this instance with updated timestamp range
722826
*/
723-
public boolean isSameUUID(String otherUUID) {
724-
assert otherUUID != null;
725-
assert getIndexUUID() != null;
726-
if (INDEX_UUID_NA_VALUE.equals(otherUUID) || INDEX_UUID_NA_VALUE.equals(getIndexUUID())) {
727-
return true;
827+
public IndexMetadata withTimestampRange(IndexLongFieldRange timestampRange) {
828+
if (timestampRange.equals(this.timestampRange)) {
829+
return this;
728830
}
729-
return otherUUID.equals(getIndexUUID());
831+
return new IndexMetadata(
832+
this.index,
833+
this.version,
834+
this.mappingVersion,
835+
this.settingsVersion,
836+
this.aliasesVersion,
837+
this.primaryTerms,
838+
this.state,
839+
this.numberOfShards,
840+
this.numberOfReplicas,
841+
this.settings,
842+
this.mapping,
843+
this.aliases,
844+
this.customData,
845+
this.inSyncAllocationIds,
846+
this.requireFilters,
847+
this.initialRecoveryFilters,
848+
this.includeFilters,
849+
this.excludeFilters,
850+
this.indexCreatedVersion,
851+
this.routingNumShards,
852+
this.routingPartitionSize,
853+
this.routingPaths,
854+
this.waitForActiveShards,
855+
this.rolloverInfos,
856+
this.isSystem,
857+
this.isHidden,
858+
timestampRange,
859+
this.priority,
860+
this.creationDate,
861+
this.ignoreDiskWatermarks,
862+
this.tierPreference,
863+
this.shardsPerNodeLimit,
864+
this.lifecyclePolicyName,
865+
this.lifecycleExecutionState,
866+
this.autoExpandReplicas,
867+
this.isSearchableSnapshot,
868+
this.isPartialSearchableSnapshot,
869+
this.indexMode,
870+
this.timeSeriesStart,
871+
this.timeSeriesEnd,
872+
this.indexCompatibilityVersion
873+
);
874+
}
875+
876+
/**
877+
* @return a copy of this instance that has its version incremented by one
878+
*/
879+
public IndexMetadata withIncrementedVersion() {
880+
return new IndexMetadata(
881+
this.index,
882+
this.version + 1,
883+
this.mappingVersion,
884+
this.settingsVersion,
885+
this.aliasesVersion,
886+
this.primaryTerms,
887+
this.state,
888+
this.numberOfShards,
889+
this.numberOfReplicas,
890+
this.settings,
891+
this.mapping,
892+
this.aliases,
893+
this.customData,
894+
this.inSyncAllocationIds,
895+
this.requireFilters,
896+
this.initialRecoveryFilters,
897+
this.includeFilters,
898+
this.excludeFilters,
899+
this.indexCreatedVersion,
900+
this.routingNumShards,
901+
this.routingPartitionSize,
902+
this.routingPaths,
903+
this.waitForActiveShards,
904+
this.rolloverInfos,
905+
this.isSystem,
906+
this.isHidden,
907+
this.timestampRange,
908+
this.priority,
909+
this.creationDate,
910+
this.ignoreDiskWatermarks,
911+
this.tierPreference,
912+
this.shardsPerNodeLimit,
913+
this.lifecyclePolicyName,
914+
this.lifecycleExecutionState,
915+
this.autoExpandReplicas,
916+
this.isSearchableSnapshot,
917+
this.isPartialSearchableSnapshot,
918+
this.indexMode,
919+
this.timeSeriesStart,
920+
this.timeSeriesEnd,
921+
this.indexCompatibilityVersion
922+
);
923+
}
924+
925+
public Index getIndex() {
926+
return index;
927+
}
928+
929+
public String getIndexUUID() {
930+
return index.getUUID();
730931
}
731932

732933
public long getVersion() {
@@ -1581,10 +1782,6 @@ public Builder settingsVersion(final long settingsVersion) {
15811782
return this;
15821783
}
15831784

1584-
public long aliasesVersion() {
1585-
return aliasesVersion;
1586-
}
1587-
15881785
public Builder aliasesVersion(final long aliasesVersion) {
15891786
this.aliasesVersion = aliasesVersion;
15901787
return this;
@@ -1640,10 +1837,6 @@ public Builder timestampRange(IndexLongFieldRange timestampRange) {
16401837
return this;
16411838
}
16421839

1643-
public IndexLongFieldRange getTimestampRange() {
1644-
return timestampRange;
1645-
}
1646-
16471840
public IndexMetadata build() {
16481841
/*
16491842
* We expect that the metadata has been properly built to set the number of shards and the number of replicas, and do not rely
@@ -1804,7 +1997,8 @@ public IndexMetadata build() {
18041997
isSearchableSnapshot && settings.getAsBoolean(SEARCHABLE_SNAPSHOT_PARTIAL_SETTING_KEY, false),
18051998
isTsdb ? IndexMode.TIME_SERIES : null,
18061999
isTsdb ? IndexSettings.TIME_SERIES_START_TIME.get(settings) : null,
1807-
isTsdb ? IndexSettings.TIME_SERIES_END_TIME.get(settings) : null
2000+
isTsdb ? IndexSettings.TIME_SERIES_END_TIME.get(settings) : null,
2001+
SETTING_INDEX_VERSION_COMPATIBILITY.get(settings)
18082002
);
18092003
}
18102004

server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.cluster.block.ClusterBlockLevel;
2323
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
2424
import org.elasticsearch.cluster.metadata.IndexAbstraction.ConcreteIndex;
25+
import org.elasticsearch.cluster.routing.RoutingTable;
2526
import org.elasticsearch.common.Strings;
2627
import org.elasticsearch.common.UUIDs;
2728
import org.elasticsearch.common.collect.ImmutableOpenMap;
@@ -397,6 +398,47 @@ public Metadata withCoordinationMetadata(CoordinationMetadata coordinationMetada
397398
);
398399
}
399400

401+
/**
402+
* Creates a copy of this instance updated with the given {@link IndexMetadata} that must only contain changes to primary terms
403+
* and in-sync allocation ids relative to the existing entries. This method is only used by
404+
* {@link org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater#applyChanges(Metadata, RoutingTable)}.
405+
* @param updates map of index name to {@link IndexMetadata}.
406+
* @return updated metadata instance
407+
*/
408+
public Metadata withAllocationAndTermUpdatesOnly(Map<String, IndexMetadata> updates) {
409+
if (updates.isEmpty()) {
410+
return this;
411+
}
412+
final var updatedIndicesBuilder = ImmutableOpenMap.builder(indices);
413+
updatedIndicesBuilder.putAllFromMap(updates);
414+
return new Metadata(
415+
clusterUUID,
416+
clusterUUIDCommitted,
417+
version,
418+
coordinationMetadata,
419+
transientSettings,
420+
persistentSettings,
421+
settings,
422+
hashesOfConsistentSettings,
423+
totalNumberOfShards,
424+
totalOpenIndexShards,
425+
updatedIndicesBuilder.build(),
426+
aliasedIndices,
427+
templates,
428+
customs,
429+
allIndices,
430+
visibleIndices,
431+
allOpenIndices,
432+
visibleOpenIndices,
433+
allClosedIndices,
434+
visibleClosedIndices,
435+
indicesLookup,
436+
mappingsByHash,
437+
oldestIndexVersion,
438+
reservedStateMetadata
439+
);
440+
}
441+
400442
public long version() {
401443
return this.version;
402444
}
@@ -1392,7 +1434,7 @@ public Builder put(IndexMetadata indexMetadata, boolean incrementVersion) {
13921434
return this;
13931435
}
13941436
// if we put a new index metadata, increment its version
1395-
indexMetadata = IndexMetadata.builder(indexMetadata).version(indexMetadata.getVersion() + 1).build();
1437+
indexMetadata = indexMetadata.withIncrementedVersion();
13961438
previous = indices.put(name, indexMetadata);
13971439
} else {
13981440
previous = indices.put(name, indexMetadata);
@@ -1853,7 +1895,7 @@ public Metadata build() {
18531895
return build(false);
18541896
}
18551897

1856-
private Metadata build(boolean skipNameCollisionChecks) {
1898+
public Metadata build(boolean skipNameCollisionChecks) {
18571899
// TODO: We should move these datastructures to IndexNameExpressionResolver, this will give the following benefits:
18581900
// 1) The datastructures will be rebuilt only when needed. Now during serializing we rebuild these datastructures
18591901
// while these datastructures aren't even used.

0 commit comments

Comments
 (0)