Skip to content

Commit cc926d2

Browse files
committed
Add a retry to deal with the conflict port in netty server.
1 parent d743732 commit cc926d2

File tree

2 files changed

+35
-3
lines changed

2 files changed

+35
-3
lines changed

network/common/src/main/java/org/apache/spark/network/server/TransportServer.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,7 @@ protected void initChannel(SocketChannel ch) throws Exception {
100100
}
101101
});
102102

103-
channelFuture = bootstrap.bind(new InetSocketAddress(portToBind));
104-
channelFuture.syncUninterruptibly();
103+
bindRightPort(portToBind);
105104

106105
port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
107106
logger.debug("Shuffle server started on port :" + port);
@@ -122,5 +121,31 @@ public void close() {
122121
}
123122
bootstrap = null;
124123
}
125-
124+
private void bindRightPort(int portToBind) {
125+
int maxPortRetries = conf.portMaxRetries();
126+
127+
for (int i = 0; i <= maxPortRetries; i++) {
128+
int tryPort = -1;
129+
if (0 == portToBind) {
130+
tryPort = 0;
131+
} else {
132+
tryPort = ((portToBind + i - 1024) % (65536 - 1024)) + 1024;
133+
}
134+
try {
135+
channelFuture = bootstrap.bind(new InetSocketAddress(tryPort));
136+
channelFuture.syncUninterruptibly();
137+
return;
138+
} catch (Exception e) {
139+
logger.warn("Netty service could not bind on port " + tryPort +
140+
". Attempting the next port.");
141+
if (i >= maxPortRetries) {
142+
logger.error(e.getMessage() + ": Netty server failed after "
143+
+ maxPortRetries + " retries.");
144+
145+
// If it can't find a right port, it should exit directly.
146+
System.exit(-1);
147+
}
148+
}
149+
}
150+
}
126151
}

network/common/src/main/java/org/apache/spark/network/util/TransportConf.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,11 @@ public int memoryMapBytes() {
9898
public boolean lazyFileDescriptor() {
9999
return conf.getBoolean("spark.shuffle.io.lazyFD", true);
100100
}
101+
102+
/**
103+
* Maximum number of retries when binding to a port before giving up.
104+
*/
105+
public int portMaxRetries() {
106+
return conf.getInt("spark.port.maxRetries", 16);
107+
}
101108
}

0 commit comments

Comments
 (0)