From f73d13a7c16369edfdca2aa7bbebf205b6e7aedb Mon Sep 17 00:00:00 2001 From: Huaxiang Sun Date: Wed, 13 Apr 2022 12:41:11 -0700 Subject: [PATCH] HDFS-16540 Data locality is lost when DataNode pod restarts in kubernetes --- .BUILDING.txt.swp | Bin 0 -> 16384 bytes .../blockmanagement/DatanodeManager.java | 17 +++++++++- .../blockmanagement/TestDatanodeManager.java | 29 ++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 .BUILDING.txt.swp diff --git a/.BUILDING.txt.swp b/.BUILDING.txt.swp new file mode 100644 index 0000000000000000000000000000000000000000..1fb0c25d0a5160f91646c273b130ff1d4363c74f GIT binary patch literal 16384 zcmeHN&5s;M6)yvU@DUOv95_BmWbHcJ-8-|!_O2Ab-nEmB_rvmd4Guz9O?S;q@Ah}~NkP#Ik{{RRGgx{;~-mfeMAzK2}BmL&1 zK3={0)qAh1UQd7i=IT0K2)hEWM}!!^`sMzM??k8i!&72p%bbh<)oWeGm$cpqt{EM4 z!%W9AJF&~xSYWiP%}z9uiPlAQX=`1Fm>byZ|*nP^-FGf1Wz_`Ck3H zT9;otR{!_9{@J?x+sEqvR@Yyy%ilRx|88A>t1drNE70ii*SbEg>%VcV{x5a?b9Mdi zj@5s#E`PB8{9--fMt)t~ezXj<473ci473ci473ci473ci473ci473dVUos#QA#P)4 zKjn)sp8q%Z|F<3%;!WTs;OoE(!1KTmxDK2F{`g5Dehz#G(7>a>YYz$01K#_D5U&H< zz$Op@fBHE30N)3$0}H@!KPJT6z^{QH0zUv=0$v2p18;v+h_`@m0$&171Ao9J{`bId zfIGlbz&YUEj|lNHum^krc=N+T`~>(h@D<=;;1>@F@jc+Xz_);1U>&#&JPtepyz(K8 z2lRkX0|Iyh34{S~3wQwdCGrP91AYX&0(=$t6*y!bU%UTF1)?WtypyDc3ZtFwmxEv- zJRevS3$BVwlTolnfu*2FBj*ac6h*_-jmmx)>v80J#X3XBfikxGjUSOwnUYr7DABul zrezXMAUN#ow+Oc7MDk!^Cv?(;!#fn{BUX^) zdy@gh+#Jew7Q^{Q+ta5hPL9+Zp{yaxcMY`}MuRj1AE2SL7@Z{W^GAf$!(i^q>#RI? zJD5*dB+{usW=@%E##o`Co*+e~HBmp!qhTBq<8VQ+NayC!*k%$u9n?JeBn^SX1;L>a zW#NLMauiG(9=SP$LHV?8MHPiyD*I*b%D`!z#Uq*Kfr*^5 z?oikF4{i0p?MkBptX`pvOO*{^k7J#Ku&0-oX)#;~=kF@Qn@gaqRk=%LMzUlJ%Lo38 zH;*jsvr^Tv&>j5j41+{XsMr}|w0w{b35C{XzLdX8n|1jIwAiQ&5v4{N=yz9T##jqh zecgYu>v^+1fpkna-%fUd3jyBWFSDlo3~izvJ-c$1I^hL`Nd@<0Cm|>o*s>_Jaq!J} zN115Ff3SJZE{T~d+R&C?66(%}^JnLHg(Ef%nb-CXSId(OUbl-#&@_%^PJKnCRY@pT zD_2YNA?>?pA>$o6RF(!9Ax3z}_bo{e8Y!I%7?Rd*^k_xd9jA*>1o!+Uz#C7-lsQdx zX=rujvM;EZAmw6KElm`;k#ux(Jw%U-IUVvhvN|hWs&i)5jZ(|8tDmVc;VX(7UtCM` za<7R>wNOcu)VLBxHD!{1YDOT?0MXVFn&xRK(~;aFIcMz1B* z-C(_ZV;Z4dU%@7fLW>C+qCiqo7Fil2mdtqqAPG*X)7fpD8Fo&ahBfU zW!<>aqp^nPvSI%x%71Qg0X=(49a0n{?&R1o_PNA10^O?ce!vAW>zFhCFbMOY&78kg z#)v-gLVHtR1Wjg z`55!{y*D@3Zb4n_1~QHj5G#vT^vvNI2*DnM?eoB)IXG6{%yVF);3ht<@((Z3n_TS+ z(9wqwA30%Bh!Zy|%N-XE6HLJ;I}bBlz)YbZQ)-F-3d&#Min1~~{Fvap!S09H!3%we zXLfH6p<(2y3hGFoPeMDY>Q<}pM75zbvA~nW+XxDl1k9>(-iXZ$OYgm@4rlB;FdwGy zkRmm~GoA2k*zuDcr9&(nVXLYw8*at_T%8R?tvDQaf=H7z%2XRmMGcNn*g3$BobsaZ z{)?!2c>R0?=2c-1WWiCuq#z5;d2v8qe@bJ-T6hwI@O{xh8SLt)V78%7$v$L>B~+$zWvh3YX0Gr_Z)zQH203k6SSmI@ zHM?&r-S8YiVcFMkx|t(G4zPjV#+WRV_j;Tzc+XbZv3B)2w~8`Bkrvok*^X;7VFmka zI?t>iP-CKIT6m&xT519!pL&-wiRsL+(U*DZrj3fTb4^ua-pp{i*`q~242NYid$LZ| z*&9cY95J`@&Wcj-0OsJu^#KGwjFQQ1|5tCsHEnP{SUNbrF-Lx&Il7rz-Y_8@nGY`sn%`xnKPAi^-8SJ=O<%6c(lvp6}jW zc4f5-;le%-n0IreJ40J}x<@^4W_-Z&7a?ztOZUaqFL}S>P;N@^b5-cC97^#0{|(&N zKZUzH-~Vxq_ODlQzb^m{{26sG16#lifZIL>w6B(dmVuUmmVuUmmVuUmmVuUmmVuUm zmVy6m237}D=2p2mpDsaaY8%O#Cc%K)c$^_E#yL}8e{FLe8JrA{>1_QFL0rKdT)N8SiRkkzm{uM+c(0Axa{ zyRuKkLmaZkjrvtCz8mhLr+;qRkdi`4WUqbZ#weuWa}d^_3b8yw-loY!)pDDW_N5-` NK{pR(eLvCz{{Xbw)c^nh literal 0 HcmV?d00001 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index a9850aa7f5a9b..fa4f573da569c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -1171,6 +1171,7 @@ public void registerDatanode(DatanodeRegistration nodeReg) nodeN = null; } + boolean updateHost2DatanodeMap = false; if (nodeS != null) { if (nodeN == nodeS) { // The same datanode has been just restarted to serve the same data @@ -1189,7 +1190,11 @@ public void registerDatanode(DatanodeRegistration nodeReg) nodes with its data cleared (or user can just remove the StorageID value in "VERSION" file under the data directory of the datanode, but this is might not work if VERSION file format has changed - */ + */ + // Check if nodeS's host information is same as nodeReg's, if not, + // it needs to update host2DatanodeMap accordringly. + updateHost2DatanodeMap = !nodeS.getXferAddr().equals(nodeReg.getXferAddr()); + NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS + " is replaced by " + nodeReg + " with the same storageID " + nodeReg.getDatanodeUuid()); @@ -1199,6 +1204,11 @@ nodes with its data cleared (or user can just remove the StorageID try { // update cluster map getNetworkTopology().remove(nodeS); + + // Update Host2DatanodeMap + if (updateHost2DatanodeMap) { + getHost2DatanodeMap().remove(nodeS); + } if(shouldCountVersion(nodeS)) { decrementVersionCount(nodeS.getSoftwareVersion()); } @@ -1217,6 +1227,11 @@ nodes with its data cleared (or user can just remove the StorageID nodeS.setDependentHostNames( getNetworkDependenciesWithDefault(nodeS)); } + + if (updateHost2DatanodeMap) { + getHost2DatanodeMap().add(nodeS); + } + getNetworkTopology().add(nodeS); resolveUpgradeDomain(nodeS); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java index 5f5452ac16d59..232424d4404ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java @@ -138,6 +138,35 @@ public void testNumVersionsCorrectAfterReregister() mapToCheck.get("version1").intValue(), 1); } + /** + * This test checks that if a node is re-registered with a different ip, its + * host2DatanodeMap is correctly updated with the new ip. + */ + @Test + public void testHost2NodeMapCorrectAfterReregister() + throws IOException, InterruptedException { + //Create the DatanodeManager which will be tested + FSNamesystem fsn = Mockito.mock(FSNamesystem.class); + Mockito.when(fsn.hasWriteLock()).thenReturn(true); + Configuration conf = new Configuration(); + DatanodeManager dm = mockDatanodeManager(fsn, conf); + + String storageID = "someStorageID1"; + String ipOld = "someIPOld" + storageID; + String ipNew = "someIPNew" + storageID; + + dm.registerDatanode(new DatanodeRegistration( + new DatanodeID(ipOld, "", storageID, 9000, 0, 0, 0), + null, null, "version")); + + dm.registerDatanode(new DatanodeRegistration( + new DatanodeID(ipNew, "", storageID, 9000, 0, 0, 0), + null, null, "version")); + + assertNull("should be no node with old ip", dm.getDatanodeByHost(ipOld)); + assertNotNull("should be a node with new ip", dm.getDatanodeByHost(ipNew)); + } + /** * This test sends a random sequence of node registrations and node removals * to the DatanodeManager (of nodes with different IDs and versions), and