From d04973b89cbb195f9384cb92d5b872bb2cb23bc3 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Wed, 10 Apr 2019 17:51:31 -0700 Subject: [PATCH 1/4] HDDS-1376. Datanode exits while executing client command when scmId is null. --- .../common/statemachine/DatanodeStateMachine.java | 1 - .../common/states/endpoint/VersionEndpointTask.java | 3 ++- .../ozone/container/ozoneimpl/OzoneContainer.java | 3 ++- .../TestCloseContainerCommandHandler.java | 11 +++++------ .../ozone/container/ozoneimpl/TestOzoneContainer.java | 5 ++--- .../ozoneimpl/TestOzoneContainerWithTLS.java | 5 ++--- .../container/ozoneimpl/TestSecureOzoneContainer.java | 5 ++--- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index ff391031a52f5..69782efbfdf45 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -166,7 +166,6 @@ public OzoneContainer getContainer() { private void start() throws IOException { long now = 0; - container.start(); reportManager.init(); initCommandHandlerThread(conf); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index e4c0eb16aab91..04eaa05f44c01 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -106,7 +106,8 @@ public EndpointStateMachine.EndPointStates call() throws Exception { volumeSet.writeUnlock(); } - ozoneContainer.getDispatcher().setScmId(scmId); + // Start the container services after getting the version information + ozoneContainer.start(scmId); EndpointStateMachine.EndPointStates nextState = rpcEndPoint.getState().getNextState(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 87266a99ce769..ed7c88c520cd3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -180,12 +180,13 @@ private void stopContainerScrub() { * * @throws IOException */ - public void start() throws IOException { + public void start(String scmId) throws IOException { LOG.info("Attempting to start container services."); startContainerScrub(); writeChannel.start(); readChannel.start(); hddsDispatcher.init(); + hddsDispatcher.setScmId(scmId); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerCommandHandler.java index 731e74c19cebd..7b74654c5ca0e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCloseContainerCommandHandler.java @@ -68,7 +68,7 @@ public void testCloseContainerViaRatis() final DatanodeDetails datanodeDetails = randomDatanodeDetails(); final OzoneContainer ozoneContainer = getOzoneContainer(conf, datanodeDetails); - ozoneContainer.start(); + ozoneContainer.start(UUID.randomUUID().toString()); try { final Container container = createContainer(conf, datanodeDetails, ozoneContainer); @@ -105,7 +105,7 @@ public void testCloseContainerViaStandalone() final DatanodeDetails datanodeDetails = randomDatanodeDetails(); final OzoneContainer ozoneContainer = getOzoneContainer(conf, datanodeDetails); - ozoneContainer.start(); + ozoneContainer.start(UUID.randomUUID().toString()); try { final Container container = createContainer(conf, datanodeDetails, ozoneContainer); @@ -140,7 +140,7 @@ public void testQuasiCloseToClose() throws Exception { final DatanodeDetails datanodeDetails = randomDatanodeDetails(); final OzoneContainer ozoneContainer = getOzoneContainer(conf, datanodeDetails); - ozoneContainer.start(); + ozoneContainer.start(UUID.randomUUID().toString()); try { final Container container = createContainer(conf, datanodeDetails, ozoneContainer); @@ -188,7 +188,7 @@ public void testForceCloseOpenContainer() throws Exception { final DatanodeDetails datanodeDetails = randomDatanodeDetails(); final OzoneContainer ozoneContainer = getOzoneContainer(conf, datanodeDetails); - ozoneContainer.start(); + ozoneContainer.start(UUID.randomUUID().toString()); try { final Container container = createContainer(conf, datanodeDetails, ozoneContainer); @@ -224,7 +224,7 @@ public void testQuasiCloseClosedContainer() final DatanodeDetails datanodeDetails = randomDatanodeDetails(); final OzoneContainer ozoneContainer = getOzoneContainer( conf, datanodeDetails); - ozoneContainer.start(); + ozoneContainer.start(UUID.randomUUID().toString()); try { final Container container = createContainer( conf, datanodeDetails, ozoneContainer); @@ -277,7 +277,6 @@ private OzoneContainer getOzoneContainer(final OzoneConfiguration conf, Mockito.when(context.getParent()).thenReturn(datanodeStateMachine); final OzoneContainer ozoneContainer = new OzoneContainer( datanodeDetails, conf, context, null); - ozoneContainer.getDispatcher().setScmId(UUID.randomUUID().toString()); return ozoneContainer; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java index eeb6c5375c917..fbdfbed75bd6a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java @@ -82,9 +82,8 @@ public void testCreateOzoneContainer() throws Exception { Mockito.when(dsm.getDatanodeDetails()).thenReturn(datanodeDetails); Mockito.when(context.getParent()).thenReturn(dsm); container = new OzoneContainer(datanodeDetails, conf, context, null); - //Setting scmId, as we start manually ozone container. - container.getDispatcher().setScmId(UUID.randomUUID().toString()); - container.start(); + //Set scmId and manually start ozone container. + container.start(UUID.randomUUID().toString()); XceiverClientGrpc client = new XceiverClientGrpc(pipeline, conf); client.connect(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java index a6e8dd06e8909..c2937a87262c2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java @@ -155,9 +155,8 @@ public void testCreateOzoneContainer() throws Exception { OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT, false); container = new OzoneContainer(dn, conf, getContext(dn), null); - //Setting scmId, as we start manually ozone container. - container.getDispatcher().setScmId(UUID.randomUUID().toString()); - container.start(); + //Set scmId and manually start ozone container. + container.start(UUID.randomUUID().toString()); XceiverClientGrpc client = new XceiverClientGrpc(pipeline, conf); client.connect(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java index 07836ea2815ae..c086f31925725 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java @@ -143,9 +143,8 @@ public void testCreateOzoneContainer() throws Exception { DatanodeDetails dn = TestUtils.randomDatanodeDetails(); container = new OzoneContainer(dn, conf, getContext(dn), caClient); - //Setting scmId, as we start manually ozone container. - container.getDispatcher().setScmId(UUID.randomUUID().toString()); - container.start(); + //Set scmId and manually start ozone container. + container.start(UUID.randomUUID().toString()); UserGroupInformation ugi = UserGroupInformation.createUserForTesting( "user1", new String[] {"usergroup"}); From 3db796e3d738a916b2d846603b6b9eb6b1d7be62 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Thu, 11 Apr 2019 15:43:01 -0700 Subject: [PATCH 2/4] Fixing TestDatanodeStateMachine test failure --- .../container/keyvalue/KeyValueContainer.java | 1 - .../helpers/KeyValueContainerLocationUtil.java | 1 - .../common/TestDatanodeStateMachine.java | 15 +++++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 26b0ce1d788b2..09730c348e43c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -107,7 +107,6 @@ public void create(VolumeSet volumeSet, VolumeChoosingPolicy Preconditions.checkNotNull(volumeChoosingPolicy, "VolumeChoosingPolicy " + "cannot be null"); Preconditions.checkNotNull(volumeSet, "VolumeSet cannot be null"); - Preconditions.checkNotNull(scmId, "scmId cannot be null"); File containerMetaDataPath = null; //acquiring volumeset read lock diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java index 0c7a04e51da3b..ef392577a7f3e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java @@ -78,7 +78,6 @@ private static String getBaseContainerLocation(String hddsVolumeDir, String scmId, long containerId) { Preconditions.checkNotNull(hddsVolumeDir, "Base Directory cannot be null"); - Preconditions.checkNotNull(scmId, "scmUuid cannot be null"); Preconditions.checkState(containerId >= 0, "Container Id cannot be negative."); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java index 29160eeca2280..54565afb70af1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java @@ -258,6 +258,21 @@ public void testDatanodeStateContext() throws IOException, task.execute(executorService); newState = task.await(10, TimeUnit.SECONDS); + + // Wait for GetVersion call (called by task.execute) to finish. After + // Earlier task.execute called into GetVersion. Wait for the execution + // to finish and the endPointState to move to REGISTER state. + GenericTestUtils.waitFor(() -> { + for (EndpointStateMachine endpoint : + stateMachine.getConnectionManager().getValues()) { + if (endpoint.getState() != + EndpointStateMachine.EndPointStates.REGISTER) { + return false; + } + } + return true; + },1000, 50000) + ; // If we are in running state, we should be in running. Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING, newState); From 4236ad7256adc7e2971c12cf1bf40a9805931ddc Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Fri, 12 Apr 2019 11:23:03 -0700 Subject: [PATCH 3/4] Fixing checkstyle issues --- .../ozone/container/common/TestDatanodeStateMachine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java index 54565afb70af1..47438dc88270a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java @@ -271,8 +271,8 @@ public void testDatanodeStateContext() throws IOException, } } return true; - },1000, 50000) - ; + }, 1000, 50000); + // If we are in running state, we should be in running. Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING, newState); From 60ea54e2ab02d87679198621cad028b0efef19bc Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Fri, 12 Apr 2019 13:54:33 -0700 Subject: [PATCH 4/4] Adding Preconditions back --- .../hadoop/ozone/container/keyvalue/KeyValueContainer.java | 1 + .../keyvalue/helpers/KeyValueContainerLocationUtil.java | 1 + 2 files changed, 2 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 09730c348e43c..26b0ce1d788b2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -107,6 +107,7 @@ public void create(VolumeSet volumeSet, VolumeChoosingPolicy Preconditions.checkNotNull(volumeChoosingPolicy, "VolumeChoosingPolicy " + "cannot be null"); Preconditions.checkNotNull(volumeSet, "VolumeSet cannot be null"); + Preconditions.checkNotNull(scmId, "scmId cannot be null"); File containerMetaDataPath = null; //acquiring volumeset read lock diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java index ef392577a7f3e..0c7a04e51da3b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java @@ -78,6 +78,7 @@ private static String getBaseContainerLocation(String hddsVolumeDir, String scmId, long containerId) { Preconditions.checkNotNull(hddsVolumeDir, "Base Directory cannot be null"); + Preconditions.checkNotNull(scmId, "scmUuid cannot be null"); Preconditions.checkState(containerId >= 0, "Container Id cannot be negative.");