Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ public final class ScmConfigKeys {
public static final String OZONE_SCM_STALENODE_INTERVAL =
"ozone.scm.stale.node.interval";
public static final String OZONE_SCM_STALENODE_INTERVAL_DEFAULT =
"90s";
"5m";

public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT =
"ozone.scm.heartbeat.rpc-timeout";
Expand Down Expand Up @@ -331,7 +331,7 @@ public final class ScmConfigKeys {
"ozone.scm.pipeline.destroy.timeout";

public static final String OZONE_SCM_PIPELINE_DESTROY_TIMEOUT_DEFAULT =
"300s";
"66s";

public static final String OZONE_SCM_PIPELINE_CREATION_INTERVAL =
"ozone.scm.pipeline.creation.interval";
Expand Down
4 changes: 2 additions & 2 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@
</property>
<property>
<name>ozone.scm.stale.node.interval</name>
<value>90s</value>
<value>5m</value>
<tag>OZONE, MANAGEMENT</tag>
<description>
The interval for stale node flagging. Please
Expand Down Expand Up @@ -1282,7 +1282,7 @@
</property>
<property>
<name>ozone.scm.pipeline.destroy.timeout</name>
<value>300s</value>
<value>66s</value>
<tag>OZONE, SCM, PIPELINE</tag>
<description>
Once a pipeline is closed, SCM should wait for the above configured time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,25 +252,15 @@ public static long getStaleNodeInterval(Configuration conf) {
//
// Here we check that staleNodeInterval is at least five times more than the
// frequency at which the accounting thread is going to run.
try {
sanitizeUserArgs(staleNodeIntervalMs, heartbeatThreadFrequencyMs,
5, 1000);
} catch (IllegalArgumentException ex) {
LOG.error("Stale Node Interval is cannot be honored due to " +
"mis-configured {}. ex: {}",
OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, ex);
throw ex;
}
staleNodeIntervalMs = sanitizeUserArgs(OZONE_SCM_STALENODE_INTERVAL,
staleNodeIntervalMs, OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL,
heartbeatThreadFrequencyMs, 5, 1000);

// Make sure that stale node value is greater than configured value that
// datanodes are going to send HBs.
try {
sanitizeUserArgs(staleNodeIntervalMs, heartbeatIntervalMs, 3, 1000);
} catch (IllegalArgumentException ex) {
LOG.error("Stale Node Interval MS is cannot be honored due to " +
"mis-configured {}. ex: {}", HDDS_HEARTBEAT_INTERVAL, ex);
throw ex;
}
staleNodeIntervalMs = sanitizeUserArgs(OZONE_SCM_STALENODE_INTERVAL,
staleNodeIntervalMs, HDDS_HEARTBEAT_INTERVAL, heartbeatIntervalMs, 3,
1000);
return staleNodeIntervalMs;
}

Expand All @@ -289,16 +279,10 @@ public static long getDeadNodeInterval(Configuration conf) {
OZONE_SCM_DEADNODE_INTERVAL_DEFAULT,
TimeUnit.MILLISECONDS);

try {
// Make sure that dead nodes Ms is at least twice the time for staleNodes
// with a max of 1000 times the staleNodes.
sanitizeUserArgs(deadNodeIntervalMs, staleNodeIntervalMs, 2, 1000);
} catch (IllegalArgumentException ex) {
LOG.error("Dead Node Interval MS is cannot be honored due to " +
"mis-configured {}. ex: {}", OZONE_SCM_STALENODE_INTERVAL, ex);
throw ex;
}
return deadNodeIntervalMs;
// Make sure that dead nodes Ms is at least twice the time for staleNodes
// with a max of 1000 times the staleNodes.
return sanitizeUserArgs(OZONE_SCM_DEADNODE_INTERVAL, deadNodeIntervalMs,
OZONE_SCM_STALENODE_INTERVAL, staleNodeIntervalMs, 2, 1000);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,34 @@ private ServerUtils() {
* For example, sanitizeUserArgs(17, 3, 5, 10)
* ensures that 17 is greater/equal than 3 * 5 and less/equal to 3 * 10.
*
* @param key - config key of the value
* @param valueTocheck - value to check
* @param baseKey - config key of the baseValue
* @param baseValue - the base value that is being used.
* @param minFactor - range min - a 2 here makes us ensure that value
* valueTocheck is at least twice the baseValue.
* @param maxFactor - range max
* @return long
*/
public static long sanitizeUserArgs(long valueTocheck, long baseValue,
long minFactor, long maxFactor)
throws IllegalArgumentException {
if ((valueTocheck >= (baseValue * minFactor)) &&
(valueTocheck <= (baseValue * maxFactor))) {
return valueTocheck;
public static long sanitizeUserArgs(String key, long valueTocheck,
String baseKey, long baseValue, long minFactor, long maxFactor) {
long minLimit = baseValue * minFactor;
long maxLimit = baseValue * maxFactor;
if (valueTocheck < minLimit) {
LOG.warn(
"{} value = {} is smaller than min = {} based on"
+ " the key value of {}, reset to the min value {}.",
key, valueTocheck, minLimit, baseKey, minLimit);
valueTocheck = minLimit;
} else if (valueTocheck > maxLimit) {
LOG.warn(
"{} value = {} is larger than max = {} based on"
+ " the key value of {}, reset to the max value {}.",
key, valueTocheck, maxLimit, baseKey, maxLimit);
valueTocheck = maxLimit;
}
String errMsg = String.format("%d is not within min = %d or max = " +
"%d", valueTocheck, baseValue * minFactor, baseValue * maxFactor);
throw new IllegalArgumentException(errMsg);

return valueTocheck;
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,29 @@
package org.apache.hadoop.hdds.scm;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.server.ServerUtils;
import org.apache.hadoop.test.PathUtils;

import org.apache.commons.io.FileUtils;
import static org.junit.Assert.assertTrue;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.net.InetSocketAddress;
import java.util.concurrent.TimeUnit;

import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_PORT_DEFAULT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_NAMES;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static org.junit.Assert.assertEquals;

/**
Expand Down Expand Up @@ -154,4 +163,67 @@ public void testClientFailsWithMultipleScmNames() {
HddsServerUtil.getScmAddressForDataNodes(conf);
}

/**
* Test {@link ServerUtils#getScmDbDir}.
*/
@Test
public void testGetScmDbDir() {
final File testDir = PathUtils.getTestDir(TestHddsServerUtils.class);
final File dbDir = new File(testDir, "scmDbDir");
final File metaDir = new File(testDir, "metaDir"); // should be ignored.
final Configuration conf = new OzoneConfiguration();
conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS, dbDir.getPath());
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDir.getPath());

try {
assertEquals(dbDir, ServerUtils.getScmDbDir(conf));
assertTrue(dbDir.exists()); // should have been created.
} finally {
FileUtils.deleteQuietly(dbDir);
}
}

/**
* Test {@link ServerUtils#getScmDbDir} with fallback to OZONE_METADATA_DIRS
* when OZONE_SCM_DB_DIRS is undefined.
*/
@Test
public void testGetScmDbDirWithFallback() {
final File testDir = PathUtils.getTestDir(TestHddsServerUtils.class);
final File metaDir = new File(testDir, "metaDir");
final Configuration conf = new OzoneConfiguration();
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDir.getPath());
try {
assertEquals(metaDir, ServerUtils.getScmDbDir(conf));
assertTrue(metaDir.exists()); // should have been created.
} finally {
FileUtils.deleteQuietly(metaDir);
}
}

@Test
public void testNoScmDbDirConfigured() {
thrown.expect(IllegalArgumentException.class);
ServerUtils.getScmDbDir(new OzoneConfiguration());
}

@Test
public void testGetStaleNodeInterval() {
final Configuration conf = new OzoneConfiguration();

// Reset OZONE_SCM_STALENODE_INTERVAL to 300s that
// larger than max limit value.
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 300, TimeUnit.SECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100);
// the max limit value will be returned
assertEquals(100000, HddsServerUtil.getStaleNodeInterval(conf));

// Reset OZONE_SCM_STALENODE_INTERVAL to 10ms that
// smaller than min limit value.
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 10,
TimeUnit.MILLISECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100);
// the min limit value will be returned
assertEquals(90000, HddsServerUtil.getStaleNodeInterval(conf));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -234,37 +234,6 @@ public void testScmHealthyNodeCount()
}
}

/**
* Asserts that if user provides a value less than 5 times the heartbeat
* interval as the StaleNode Value, we throw since that is a QoS that we
* cannot maintain.
*
* @throws IOException
* @throws InterruptedException
* @throws TimeoutException
*/

@Test
public void testScmSanityOfUserConfig1()
throws IOException, AuthenticationException {
OzoneConfiguration conf = getConf();
final int interval = 100;
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval,
MILLISECONDS);
conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS);

// This should be 5 times more than OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL
// and 3 times more than OZONE_SCM_HEARTBEAT_INTERVAL
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, interval, MILLISECONDS);

thrown.expect(IllegalArgumentException.class);

// This string is a multiple of the interval value
thrown.expectMessage(
startsWith("100 is not within min = 500 or max = 100000"));
createNodeManager(conf);
}

/**
* Asserts that if Stale Interval value is more than 5 times the value of HB
* processing thread it is a sane value.
Expand Down