Skip to content

Commit 0dbeef9

Browse files
ferhuibasic02
authored andcommitted
CDPD-27239. HDFS-13671. Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet (apache#3114)
Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java (cherry picked from commit c14f3e4) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java Change-Id: Ie97cd44fd382192a97815a0baf1ab41524570933
1 parent d3d5b31 commit 0dbeef9

28 files changed

+611
-2564
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
248248
public static final int DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT = 2;
249249
public static final String DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY = "dfs.namenode.replication.max-streams-hard-limit";
250250
public static final int DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT = 4;
251-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_KEY
252-
= "dfs.namenode.storageinfo.defragment.interval.ms";
253-
public static final int
254-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_DEFAULT = 10 * 60 * 1000;
255-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_KEY
256-
= "dfs.namenode.storageinfo.defragment.timeout.ms";
257-
public static final int
258-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_DEFAULT = 4;
259-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_KEY
260-
= "dfs.namenode.storageinfo.defragment.ratio";
261-
public static final double
262-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_DEFAULT = 0.75;
263251
public static final String DFS_WEBHDFS_AUTHENTICATION_FILTER_KEY = "dfs.web.authentication.filter";
264252
/* Phrased as below to avoid javac inlining as a constant, to match the behavior when
265253
this was AuthFilter.class.getName(). Note that if you change the import for AuthFilter, you

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -959,8 +959,8 @@ public static JournalInfoProto convert(JournalInfo j) {
959959

960960

961961
public static BlockReportContext convert(BlockReportContextProto proto) {
962-
return new BlockReportContext(proto.getTotalRpcs(), proto.getCurRpc(),
963-
proto.getId(), proto.getLeaseId(), proto.getSorted());
962+
return new BlockReportContext(proto.getTotalRpcs(),
963+
proto.getCurRpc(), proto.getId(), proto.getLeaseId());
964964
}
965965

966966
public static BlockReportContextProto convert(BlockReportContext context) {
@@ -969,7 +969,6 @@ public static BlockReportContextProto convert(BlockReportContext context) {
969969
setCurRpc(context.getCurRpc()).
970970
setId(context.getReportId()).
971971
setLeaseId(context.getLeaseId()).
972-
setSorted(context.isSorted()).
973972
build();
974973
}
975974

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java

Lines changed: 152 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
import java.io.IOException;
2121
import java.util.Iterator;
22+
import java.util.LinkedList;
2223
import java.util.List;
23-
import java.util.NoSuchElementException;
2424

2525
import com.google.common.base.Preconditions;
2626
import org.apache.hadoop.classification.InterfaceAudience;
@@ -56,9 +56,19 @@ public abstract class BlockInfo extends Block
5656
/** For implementing {@link LightWeightGSet.LinkedElement} interface. */
5757
private LightWeightGSet.LinkedElement nextLinkedElement;
5858

59-
60-
// Storages this block is replicated on
61-
protected DatanodeStorageInfo[] storages;
59+
/**
60+
* This array contains triplets of references. For each i-th storage, the
61+
* block belongs to triplets[3*i] is the reference to the
62+
* {@link DatanodeStorageInfo} and triplets[3*i+1] and triplets[3*i+2] are
63+
* references to the previous and the next blocks, respectively, in the list
64+
* of blocks belonging to this storage.
65+
*
66+
* Using previous and next in Object triplets is done instead of a
67+
* {@link LinkedList} list to efficiently use memory. With LinkedList the cost
68+
* per replica is 42 bytes (LinkedList#Entry object per replica) versus 16
69+
* bytes using the triplets.
70+
*/
71+
protected Object[] triplets;
6272

6373
private BlockUnderConstructionFeature uc;
6474

@@ -68,14 +78,14 @@ public abstract class BlockInfo extends Block
6878
* in the block group
6979
*/
7080
public BlockInfo(short size) {
71-
this.storages = new DatanodeStorageInfo[size];
81+
this.triplets = new Object[3 * size];
7282
this.bcId = INVALID_INODE_ID;
7383
this.replication = isStriped() ? 0 : size;
7484
}
7585

7686
public BlockInfo(Block blk, short size) {
7787
super(blk);
78-
this.storages = new DatanodeStorageInfo[size];
88+
this.triplets = new Object[3*size];
7989
this.bcId = INVALID_INODE_ID;
8090
this.replication = isStriped() ? 0 : size;
8191
}
@@ -105,31 +115,7 @@ public boolean isDeleted() {
105115
}
106116

107117
public Iterator<DatanodeStorageInfo> getStorageInfos() {
108-
return new Iterator<DatanodeStorageInfo>() {
109-
110-
private int index = 0;
111-
112-
@Override
113-
public boolean hasNext() {
114-
while (index < storages.length && storages[index] == null) {
115-
index++;
116-
}
117-
return index < storages.length;
118-
}
119-
120-
@Override
121-
public DatanodeStorageInfo next() {
122-
if (!hasNext()) {
123-
throw new NoSuchElementException();
124-
}
125-
return storages[index++];
126-
}
127-
128-
@Override
129-
public void remove() {
130-
throw new UnsupportedOperationException("Sorry. can't remove.");
131-
}
132-
};
118+
return new BlocksMap.StorageIterator(this);
133119
}
134120

135121
public DatanodeDescriptor getDatanode(int index) {
@@ -138,18 +124,73 @@ public DatanodeDescriptor getDatanode(int index) {
138124
}
139125

140126
DatanodeStorageInfo getStorageInfo(int index) {
141-
assert this.storages != null : "BlockInfo is not initialized";
142-
return storages[index];
127+
assert this.triplets != null : "BlockInfo is not initialized";
128+
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
129+
return (DatanodeStorageInfo)triplets[index*3];
130+
}
131+
132+
BlockInfo getPrevious(int index) {
133+
assert this.triplets != null : "BlockInfo is not initialized";
134+
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
135+
BlockInfo info = (BlockInfo)triplets[index*3+1];
136+
assert info == null ||
137+
info.getClass().getName().startsWith(BlockInfo.class.getName()) :
138+
"BlockInfo is expected at " + index*3;
139+
return info;
140+
}
141+
142+
BlockInfo getNext(int index) {
143+
assert this.triplets != null : "BlockInfo is not initialized";
144+
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
145+
BlockInfo info = (BlockInfo)triplets[index*3+2];
146+
assert info == null || info.getClass().getName().startsWith(
147+
BlockInfo.class.getName()) :
148+
"BlockInfo is expected at " + index*3;
149+
return info;
143150
}
144151

145152
void setStorageInfo(int index, DatanodeStorageInfo storage) {
146-
assert this.storages != null : "BlockInfo is not initialized";
147-
this.storages[index] = storage;
153+
assert this.triplets != null : "BlockInfo is not initialized";
154+
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
155+
triplets[index*3] = storage;
156+
}
157+
158+
/**
159+
* Return the previous block on the block list for the datanode at
160+
* position index. Set the previous block on the list to "to".
161+
*
162+
* @param index - the datanode index
163+
* @param to - block to be set to previous on the list of blocks
164+
* @return current previous block on the list of blocks
165+
*/
166+
BlockInfo setPrevious(int index, BlockInfo to) {
167+
assert this.triplets != null : "BlockInfo is not initialized";
168+
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
169+
BlockInfo info = (BlockInfo) triplets[index*3+1];
170+
triplets[index*3+1] = to;
171+
return info;
172+
}
173+
174+
/**
175+
* Return the next block on the block list for the datanode at
176+
* position index. Set the next block on the list to "to".
177+
*
178+
* @param index - the datanode index
179+
* @param to - block to be set to next on the list of blocks
180+
* @return current next block on the list of blocks
181+
*/
182+
BlockInfo setNext(int index, BlockInfo to) {
183+
assert this.triplets != null : "BlockInfo is not initialized";
184+
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
185+
BlockInfo info = (BlockInfo) triplets[index*3+2];
186+
triplets[index*3+2] = to;
187+
return info;
148188
}
149189

150190
public int getCapacity() {
151-
assert this.storages != null : "BlockInfo is not initialized";
152-
return storages.length;
191+
assert this.triplets != null : "BlockInfo is not initialized";
192+
assert triplets.length % 3 == 0 : "Malformed BlockInfo";
193+
return triplets.length / 3;
153194
}
154195

155196
/**
@@ -210,6 +251,80 @@ int findStorageInfo(DatanodeStorageInfo storageInfo) {
210251
return -1;
211252
}
212253

254+
/**
255+
* Insert this block into the head of the list of blocks
256+
* related to the specified DatanodeStorageInfo.
257+
* If the head is null then form a new list.
258+
* @return current block as the new head of the list.
259+
*/
260+
BlockInfo listInsert(BlockInfo head, DatanodeStorageInfo storage) {
261+
int dnIndex = this.findStorageInfo(storage);
262+
assert dnIndex >= 0 : "Data node is not found: current";
263+
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
264+
"Block is already in the list and cannot be inserted.";
265+
this.setPrevious(dnIndex, null);
266+
this.setNext(dnIndex, head);
267+
if (head != null) {
268+
head.setPrevious(head.findStorageInfo(storage), this);
269+
}
270+
return this;
271+
}
272+
273+
/**
274+
* Remove this block from the list of blocks
275+
* related to the specified DatanodeStorageInfo.
276+
* If this block is the head of the list then return the next block as
277+
* the new head.
278+
* @return the new head of the list or null if the list becomes
279+
* empy after deletion.
280+
*/
281+
BlockInfo listRemove(BlockInfo head, DatanodeStorageInfo storage) {
282+
if (head == null) {
283+
return null;
284+
}
285+
int dnIndex = this.findStorageInfo(storage);
286+
if (dnIndex < 0) { // this block is not on the data-node list
287+
return head;
288+
}
289+
290+
BlockInfo next = this.getNext(dnIndex);
291+
BlockInfo prev = this.getPrevious(dnIndex);
292+
this.setNext(dnIndex, null);
293+
this.setPrevious(dnIndex, null);
294+
if (prev != null) {
295+
prev.setNext(prev.findStorageInfo(storage), next);
296+
}
297+
if (next != null) {
298+
next.setPrevious(next.findStorageInfo(storage), prev);
299+
}
300+
if (this == head) { // removing the head
301+
head = next;
302+
}
303+
return head;
304+
}
305+
306+
/**
307+
* Remove this block from the list of blocks related to the specified
308+
* DatanodeDescriptor. Insert it into the head of the list of blocks.
309+
*
310+
* @return the new head of the list.
311+
*/
312+
public BlockInfo moveBlockToHead(BlockInfo head, DatanodeStorageInfo storage,
313+
int curIndex, int headIndex) {
314+
if (head == this) {
315+
return this;
316+
}
317+
BlockInfo next = this.setNext(curIndex, head);
318+
BlockInfo prev = this.setPrevious(curIndex, null);
319+
320+
head.setPrevious(headIndex, this);
321+
prev.setNext(prev.findStorageInfo(storage), next);
322+
if (next != null) {
323+
next.setPrevious(next.findStorageInfo(storage), prev);
324+
}
325+
return this;
326+
}
327+
213328
@Override
214329
public int hashCode() {
215330
// Super implementation is sufficient

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,20 @@ public BlockInfoContiguous(Block blk, short size) {
3737
}
3838

3939
/**
40-
* Ensure that there is enough space to include num more storages.
41-
* @return first free storage index.
40+
* Ensure that there is enough space to include num more triplets.
41+
* @return first free triplet index.
4242
*/
4343
private int ensureCapacity(int num) {
44-
assert this.storages != null : "BlockInfo is not initialized";
44+
assert this.triplets != null : "BlockInfo is not initialized";
4545
int last = numNodes();
46-
if (storages.length >= (last+num)) {
46+
if (triplets.length >= (last+num)*3) {
4747
return last;
4848
}
4949
/* Not enough space left. Create a new array. Should normally
5050
* happen only when replication is manually increased by the user. */
51-
DatanodeStorageInfo[] old = storages;
52-
storages = new DatanodeStorageInfo[(last+num)];
53-
System.arraycopy(old, 0, storages, 0, last);
51+
Object[] old = triplets;
52+
triplets = new Object[(last+num)*3];
53+
System.arraycopy(old, 0, triplets, 0, last * 3);
5454
return last;
5555
}
5656

@@ -62,6 +62,8 @@ boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) {
6262
// find the last null node
6363
int lastNode = ensureCapacity(1);
6464
setStorageInfo(lastNode, storage);
65+
setNext(lastNode, null);
66+
setPrevious(lastNode, null);
6567
return true;
6668
}
6769

@@ -71,18 +73,25 @@ boolean removeStorage(DatanodeStorageInfo storage) {
7173
if (dnIndex < 0) { // the node is not found
7274
return false;
7375
}
76+
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
77+
"Block is still in the list and must be removed first.";
7478
// find the last not null node
7579
int lastNode = numNodes()-1;
76-
// replace current node entry by the lastNode one
80+
// replace current node triplet by the lastNode one
7781
setStorageInfo(dnIndex, getStorageInfo(lastNode));
78-
// set the last entry to null
82+
setNext(dnIndex, getNext(lastNode));
83+
setPrevious(dnIndex, getPrevious(lastNode));
84+
// set the last triplet to null
7985
setStorageInfo(lastNode, null);
86+
setNext(lastNode, null);
87+
setPrevious(lastNode, null);
8088
return true;
8189
}
8290

8391
@Override
8492
public int numNodes() {
85-
assert this.storages != null : "BlockInfo is not initialized";
93+
assert this.triplets != null : "BlockInfo is not initialized";
94+
assert triplets.length % 3 == 0 : "Malformed BlockInfo";
8695

8796
for (int idx = getCapacity()-1; idx >= 0; idx--) {
8897
if (getDatanode(idx) != null) {

0 commit comments

Comments
 (0)