Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ public final class HddsConfigKeys {
public static final double
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10;

public static final String HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT =
"hdds.scm.chillmode.atleast.one.node.reported.pipeline.pct";
public static final double
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT = 0.90;

public static final String HDDS_LOCK_MAX_CONCURRENCY =
"hdds.lock.max.concurrency";
public static final int HDDS_LOCK_MAX_CONCURRENCY_DEFAULT = 100;
Expand Down
10 changes: 10 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,16 @@
</description>
</property>

<property>
<name>hdds.scm.chillmode.atleast.one.node.reported.pipeline.pct</name>
<value>0.90</value>
<tag>HDDS,SCM,OPERATION</tag>
<description>
Percentage of pipelines, where at least one datanode is reported in the
pipeline.
</description>
</property>

<property>
<name>hdds.container.action.max.limit</name>
<value>20</value>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package org.apache.hadoop.hdds.scm.chillmode;

import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.
PipelineReportFromDatanode;
import org.apache.hadoop.hdds.server.events.EventHandler;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashSet;
import java.util.Set;

/**
* This rule covers whether we have atleast one datanode is reported for each
* pipeline. This rule is for all open containers, we have at least one
* replica available for read when we exit chill mode.
*/
public class OneReplicaPipelineChillModeRule implements
ChillModeExitRule<PipelineReportFromDatanode>,
EventHandler<PipelineReportFromDatanode> {

private static final Logger LOG =
LoggerFactory.getLogger(OneReplicaPipelineChillModeRule.class);

private int thresholdCount;
private Set<PipelineID> reportedPipelineIDSet = new HashSet<>();
private final PipelineManager pipelineManager;
private final SCMChillModeManager chillModeManager;

public OneReplicaPipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager chillModeManager,
Configuration configuration) {
this.chillModeManager = chillModeManager;
this.pipelineManager = pipelineManager;

double percent =
configuration.getDouble(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT,
HddsConfigKeys.
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT);

int totalPipelineCount =
pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS,
HddsProtos.ReplicationFactor.THREE).size();

thresholdCount = (int) Math.ceil(percent * totalPipelineCount);

LOG.info(" Total pipeline count is {}, pipeline's with atleast one " +
"datanode reported threshold count is {}", totalPipelineCount,
thresholdCount);

}
@Override
public boolean validate() {
if (reportedPipelineIDSet.size() >= thresholdCount) {
return true;
}
return false;
}

@Override
public void process(PipelineReportFromDatanode pipelineReportFromDatanode) {
Pipeline pipeline;
Preconditions.checkNotNull(pipelineReportFromDatanode);
PipelineReportsProto pipelineReport =
pipelineReportFromDatanode.getReport();

for (PipelineReport report : pipelineReport.getPipelineReportList()) {
PipelineID pipelineID = PipelineID
.getFromProtobuf(report.getPipelineID());
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
continue;
}

if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE &&
!reportedPipelineIDSet.contains(pipelineID)) {
reportedPipelineIDSet.add(pipelineID);
}
}
}

@Override
public void cleanup() {
reportedPipelineIDSet.clear();
}

@Override
public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
EventPublisher publisher) {

if (validate()) {
chillModeManager.validateChillModeExitRules(publisher);
return;
}

// Process pipeline report from datanode
process(pipelineReportFromDatanode);

if (chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger().info(
"SCM in chill mode. Pipelines with atleast one datanode reported " +
"count is {}, required atleast one datanode reported per " +
"pipeline count is {}",
reportedPipelineIDSet.size(), thresholdCount);
}

if (validate()) {
chillModeManager.validateChillModeExitRules(publisher);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public class SCMChillModeManager implements
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineChillModeRule";
private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE =
"AtleastOneDatanodeReportedRule";

private final EventQueue eventPublisher;
private final PipelineManager pipelineManager;
Expand All @@ -86,8 +88,14 @@ public SCMChillModeManager(Configuration conf,
&& pipelineManager != null) {
HealthyPipelineChillModeRule rule = new HealthyPipelineChillModeRule(
pipelineManager, this, config);
OneReplicaPipelineChillModeRule oneReplicaPipelineChillModeRule =
new OneReplicaPipelineChillModeRule(pipelineManager, this, conf);
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, rule);
exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE,
oneReplicaPipelineChillModeRule);
eventPublisher.addHandler(SCMEvents.PROCESSED_PIPELINE_REPORT, rule);
eventPublisher.addHandler(SCMEvents.PROCESSED_PIPELINE_REPORT,
oneReplicaPipelineChillModeRule);
}
emitChillModeStatus();
} else {
Expand Down Expand Up @@ -179,4 +187,10 @@ public HealthyPipelineChillModeRule getHealthyPipelineChillModeRule() {
exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
}

@VisibleForTesting
public OneReplicaPipelineChillModeRule getOneReplicaPipelineChillModeRule() {
return (OneReplicaPipelineChillModeRule)
exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE);
}

}
Loading