Skip to content

Commit 12daf87

Browse files
S O'Donnellahussein
authored andcommitted
HDDS-2199. In SCMNodeManager dnsToUuidMap cannot track multiple DNs on the same host
Closes apache#1551
1 parent 53c830e commit 12daf87

File tree

6 files changed

+149
-25
lines changed

6 files changed

+149
-25
lines changed

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,11 @@ void processNodeReport(DatanodeDetails datanodeDetails,
192192
DatanodeDetails getNodeByUuid(String uuid);
193193

194194
/**
195-
* Given datanode address(Ipaddress or hostname), returns the DatanodeDetails
196-
* for the node.
195+
* Given datanode address(Ipaddress or hostname), returns a list of
196+
* DatanodeDetails for the datanodes running at that address.
197197
*
198198
* @param address datanode address
199-
* @return the given datanode, or null if not found
199+
* @return the given datanode, or empty list if none found
200200
*/
201-
DatanodeDetails getNodeByAddress(String address);
201+
List<DatanodeDetails> getNodesByAddress(String address);
202202
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
import java.util.List;
2626
import java.util.Map;
2727
import java.util.Set;
28+
import java.util.LinkedList;
2829
import java.util.UUID;
2930
import java.util.concurrent.ConcurrentHashMap;
3031
import java.util.concurrent.ScheduledFuture;
3132
import java.util.stream.Collectors;
3233

34+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3335
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
3436
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
3537
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
@@ -98,7 +100,7 @@ public class SCMNodeManager implements NodeManager {
98100
private final NetworkTopology clusterMap;
99101
private final DNSToSwitchMapping dnsToSwitchMapping;
100102
private final boolean useHostname;
101-
private final ConcurrentHashMap<String, String> dnsToUuidMap =
103+
private final ConcurrentHashMap<String, Set<String>> dnsToUuidMap =
102104
new ConcurrentHashMap<>();
103105

104106
/**
@@ -260,7 +262,7 @@ public RegisteredCommand register(
260262
}
261263
nodeStateManager.addNode(datanodeDetails);
262264
clusterMap.add(datanodeDetails);
263-
dnsToUuidMap.put(dnsName, datanodeDetails.getUuidString());
265+
addEntryTodnsToUuidMap(dnsName, datanodeDetails.getUuidString());
264266
// Updating Node Report, as registration is successful
265267
processNodeReport(datanodeDetails, nodeReport);
266268
LOG.info("Registered Data node : {}", datanodeDetails);
@@ -275,6 +277,26 @@ public RegisteredCommand register(
275277
.build();
276278
}
277279

280+
/**
281+
* Add an entry to the dnsToUuidMap, which maps hostname / IP to the DNs
282+
* running on that host. As each address can have many DNs running on it,
283+
* this is a one to many mapping.
284+
* @param dnsName String representing the hostname or IP of the node
285+
* @param uuid String representing the UUID of the registered node.
286+
*/
287+
@SuppressFBWarnings(value="AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION",
288+
justification="The method is synchronized and this is the only place "+
289+
"dnsToUuidMap is modified")
290+
private synchronized void addEntryTodnsToUuidMap(
291+
String dnsName, String uuid) {
292+
Set<String> dnList = dnsToUuidMap.get(dnsName);
293+
if (dnList == null) {
294+
dnList = ConcurrentHashMap.newKeySet();
295+
dnsToUuidMap.put(dnsName, dnList);
296+
}
297+
dnList.add(uuid);
298+
}
299+
278300
/**
279301
* Send heartbeat to indicate the datanode is alive and doing well.
280302
*
@@ -584,29 +606,34 @@ public DatanodeDetails getNodeByUuid(String uuid) {
584606
}
585607

586608
/**
587-
* Given datanode address(Ipaddress or hostname), returns the DatanodeDetails
588-
* for the node.
609+
* Given datanode address(Ipaddress or hostname), return a list of
610+
* DatanodeDetails for the datanodes registered on that address.
589611
*
590612
* @param address datanode address
591-
* @return the given datanode, or null if not found
613+
* @return the given datanode, or empty list if none found
592614
*/
593615
@Override
594-
public DatanodeDetails getNodeByAddress(String address) {
616+
public List<DatanodeDetails> getNodesByAddress(String address) {
617+
List<DatanodeDetails> results = new LinkedList<>();
595618
if (Strings.isNullOrEmpty(address)) {
596619
LOG.warn("address is null");
597-
return null;
620+
return results;
598621
}
599-
String uuid = dnsToUuidMap.get(address);
600-
if (uuid != null) {
622+
Set<String> uuids = dnsToUuidMap.get(address);
623+
if (uuids == null) {
624+
LOG.warn("Cannot find node for address {}", address);
625+
return results;
626+
}
627+
628+
for (String uuid : uuids) {
601629
DatanodeDetails temp = DatanodeDetails.newBuilder().setUuid(uuid).build();
602630
try {
603-
return nodeStateManager.getNode(temp);
631+
results.add(nodeStateManager.getNode(temp));
604632
} catch (NodeNotFoundException e) {
605633
LOG.warn("Cannot find node for uuid {}", uuid);
606634
}
607635
}
608-
LOG.warn("Cannot find node for address {}", address);
609-
return null;
636+
return results;
610637
}
611638

612639
private String nodeResolve(String hostname) {

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,12 @@ public List<DatanodeDetails> sortDatanodes(List<String> nodes,
295295
boolean auditSuccess = true;
296296
try{
297297
NodeManager nodeManager = scm.getScmNodeManager();
298-
Node client = nodeManager.getNodeByAddress(clientMachine);
298+
Node client = null;
299+
List<DatanodeDetails> possibleClients =
300+
nodeManager.getNodesByAddress(clientMachine);
301+
if (possibleClients.size()>0){
302+
client = possibleClients.get(0);
303+
}
299304
List<Node> nodeList = new ArrayList();
300305
nodes.stream().forEach(uuid -> {
301306
DatanodeDetails node = nodeManager.getNodeByUuid(uuid);

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public class MockNodeManager implements NodeManager {
8888
private final Node2PipelineMap node2PipelineMap;
8989
private final Node2ContainerMap node2ContainerMap;
9090
private NetworkTopology clusterMap;
91-
private ConcurrentHashMap<String, String> dnsToUuidMap;
91+
private ConcurrentHashMap<String, Set<String>> dnsToUuidMap;
9292

9393
public MockNodeManager(boolean initializeFakeNodes, int nodeCount) {
9494
this.healthyNodes = new LinkedList<>();
@@ -386,7 +386,7 @@ public RegisteredCommand register(DatanodeDetails datanodeDetails,
386386
try {
387387
node2ContainerMap.insertNewDatanode(datanodeDetails.getUuid(),
388388
Collections.emptySet());
389-
dnsToUuidMap.put(datanodeDetails.getIpAddress(),
389+
addEntryTodnsToUuidMap(datanodeDetails.getIpAddress(),
390390
datanodeDetails.getUuidString());
391391
if (clusterMap != null) {
392392
datanodeDetails.setNetworkName(datanodeDetails.getUuidString());
@@ -398,6 +398,23 @@ public RegisteredCommand register(DatanodeDetails datanodeDetails,
398398
return null;
399399
}
400400

401+
/**
402+
* Add an entry to the dnsToUuidMap, which maps hostname / IP to the DNs
403+
* running on that host. As each address can have many DNs running on it,
404+
* this is a one to many mapping.
405+
* @param dnsName String representing the hostname or IP of the node
406+
* @param uuid String representing the UUID of the registered node.
407+
*/
408+
private synchronized void addEntryTodnsToUuidMap(
409+
String dnsName, String uuid) {
410+
Set<String> dnList = dnsToUuidMap.get(dnsName);
411+
if (dnList == null) {
412+
dnList = ConcurrentHashMap.newKeySet();
413+
dnsToUuidMap.put(dnsName, dnList);
414+
}
415+
dnList.add(uuid);
416+
}
417+
401418
/**
402419
* Send heartbeat to indicate the datanode is alive and doing well.
403420
*
@@ -484,8 +501,19 @@ public DatanodeDetails getNodeByUuid(String uuid) {
484501
}
485502

486503
@Override
487-
public DatanodeDetails getNodeByAddress(String address) {
488-
return getNodeByUuid(dnsToUuidMap.get(address));
504+
public List<DatanodeDetails> getNodesByAddress(String address) {
505+
List<DatanodeDetails> results = new LinkedList<>();
506+
Set<String> uuids = dnsToUuidMap.get(address);
507+
if (uuids == null) {
508+
return results;
509+
}
510+
for(String uuid : uuids) {
511+
DatanodeDetails dn = getNodeByUuid(uuid);
512+
if (dn != null) {
513+
results.add(dn);
514+
}
515+
}
516+
return results;
489517
}
490518

491519
public void setNetworkTopology(NetworkTopology topology) {

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,25 @@ public void testScmRegisterNodeWithHostname()
10641064
testScmRegisterNodeWithNetworkTopology(true);
10651065
}
10661066

1067+
/**
1068+
* Test getNodesByAddress when using IPs.
1069+
*
1070+
*/
1071+
@Test
1072+
public void testgetNodesByAddressWithIpAddress()
1073+
throws IOException, InterruptedException, AuthenticationException {
1074+
testGetNodesByAddress(false);
1075+
}
1076+
1077+
/**
1078+
* Test getNodesByAddress when using hostnames.
1079+
*/
1080+
@Test
1081+
public void testgetNodesByAddressWithHostname()
1082+
throws IOException, InterruptedException, AuthenticationException {
1083+
testGetNodesByAddress(true);
1084+
}
1085+
10671086
/**
10681087
* Test add node into a 4-layer network topology during node register.
10691088
*/
@@ -1152,11 +1171,55 @@ private void testScmRegisterNodeWithNetworkTopology(boolean useHostname)
11521171
// test get node
11531172
if (useHostname) {
11541173
Arrays.stream(hostNames).forEach(hostname ->
1155-
Assert.assertNotNull(nodeManager.getNodeByAddress(hostname)));
1174+
Assert.assertNotEquals(0, nodeManager.getNodesByAddress(hostname)
1175+
.size()));
11561176
} else {
11571177
Arrays.stream(ipAddress).forEach(ip ->
1158-
Assert.assertNotNull(nodeManager.getNodeByAddress(ip)));
1178+
Assert.assertNotEquals(0, nodeManager.getNodesByAddress(ip)
1179+
.size()));
11591180
}
11601181
}
11611182
}
1183+
1184+
/**
1185+
* Test add node into a 4-layer network topology during node register.
1186+
*/
1187+
private void testGetNodesByAddress(boolean useHostname)
1188+
throws IOException, InterruptedException, AuthenticationException {
1189+
OzoneConfiguration conf = getConf();
1190+
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 1000,
1191+
MILLISECONDS);
1192+
1193+
// create a set of hosts - note two hosts on "host1"
1194+
String[] hostNames = {"host1", "host1", "host2", "host3", "host4"};
1195+
String[] ipAddress =
1196+
{"1.2.3.4", "1.2.3.4", "2.3.4.5", "3.4.5.6", "4.5.6.7"};
1197+
1198+
if (useHostname) {
1199+
conf.set(DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, "true");
1200+
}
1201+
final int nodeCount = hostNames.length;
1202+
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
1203+
DatanodeDetails[] nodes = new DatanodeDetails[nodeCount];
1204+
for (int i = 0; i < nodeCount; i++) {
1205+
DatanodeDetails node = TestUtils.createDatanodeDetails(
1206+
UUID.randomUUID().toString(), hostNames[i], ipAddress[i], null);
1207+
nodeManager.register(node, null, null);
1208+
}
1209+
// test get node
1210+
Assert.assertEquals(0, nodeManager.getNodesByAddress(null).size());
1211+
if (useHostname) {
1212+
Assert.assertEquals(2,
1213+
nodeManager.getNodesByAddress("host1").size());
1214+
Assert.assertEquals(1, nodeManager.getNodesByAddress("host2").size());
1215+
Assert.assertEquals(0, nodeManager.getNodesByAddress("unknown").size());
1216+
} else {
1217+
Assert.assertEquals(2,
1218+
nodeManager.getNodesByAddress("1.2.3.4").size());
1219+
Assert.assertEquals(1, nodeManager.getNodesByAddress("2.3.4.5").size());
1220+
Assert.assertEquals(0, nodeManager.getNodesByAddress("1.9.8.7").size());
1221+
}
1222+
}
1223+
}
1224+
11621225
}

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import java.util.Map;
4545
import java.util.Set;
4646
import java.util.UUID;
47+
import java.util.LinkedList;
4748

4849
/**
4950
* A Node Manager to test replication.
@@ -323,7 +324,7 @@ public DatanodeDetails getNodeByUuid(String address) {
323324
}
324325

325326
@Override
326-
public DatanodeDetails getNodeByAddress(String address) {
327-
return null;
327+
public List<DatanodeDetails> getNodesByAddress(String address) {
328+
return new LinkedList<>();
328329
}
329330
}

0 commit comments

Comments
 (0)