Skip to content

Commit e78558f

Browse files
committed
Die with dignity on the network layer
When a fatal error is thrown on the network layer, such an error never makes its way to the uncaught exception handler. This prevents the node from being torn down if an out of memory error or other fatal error is thrown while handling HTTP or transport traffic. This commit adds logic to ensure that such errors bubble their way up to the uncaught exception handler, even though Netty tries really hard to swallow everything. Relates #21720
1 parent e86d5fe commit e78558f

File tree

6 files changed

+56
-2
lines changed

6 files changed

+56
-2
lines changed

modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import io.netty.handler.codec.http.FullHttpRequest;
2828
import org.elasticsearch.common.util.concurrent.ThreadContext;
2929
import org.elasticsearch.http.netty4.pipelining.HttpPipelinedRequest;
30+
import org.elasticsearch.transport.netty4.Netty4Utils;
3031

3132
@ChannelHandler.Sharable
3233
class Netty4HttpRequestHandler extends SimpleChannelInboundHandler<Object> {
@@ -72,6 +73,7 @@ protected void channelRead0(ChannelHandlerContext ctx, Object msg) throws Except
7273

7374
@Override
7475
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
76+
Netty4Utils.maybeDie(cause);
7577
serverTransport.exceptionCaught(ctx, cause);
7678
}
7779

modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,12 @@ protected void initChannel(Channel ch) throws Exception {
579579
ch.pipeline().addLast("handler", requestHandler);
580580
}
581581

582+
@Override
583+
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
584+
Netty4Utils.maybeDie(cause);
585+
super.exceptionCaught(ctx, cause);
586+
}
587+
582588
}
583589

584590
}

modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception
8080

8181
@Override
8282
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
83+
Netty4Utils.maybeDie(cause);
8384
transport.exceptionCaught(ctx, cause);
8485
}
8586

modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,12 @@ protected void initChannel(Channel ch) throws Exception {
509509
ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, ".client"));
510510
}
511511

512+
@Override
513+
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
514+
Netty4Utils.maybeDie(cause);
515+
super.exceptionCaught(ctx, cause);
516+
}
517+
512518
}
513519

514520
protected class ServerChannelInitializer extends ChannelInitializer<Channel> {
@@ -527,6 +533,13 @@ protected void initChannel(Channel ch) throws Exception {
527533
ch.pipeline().addLast("size", new Netty4SizeHeaderFrameDecoder());
528534
ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, name));
529535
}
536+
537+
@Override
538+
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
539+
Netty4Utils.maybeDie(cause);
540+
super.exceptionCaught(ctx, cause);
541+
}
542+
530543
}
531544

532545
}

modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19+
1920
package org.elasticsearch.transport.netty4;
2021

2122
import io.netty.buffer.ByteBuf;
@@ -28,9 +29,13 @@
2829
import org.apache.lucene.util.BytesRef;
2930
import org.apache.lucene.util.BytesRefIterator;
3031
import org.elasticsearch.common.bytes.BytesReference;
32+
import org.elasticsearch.common.logging.ESLoggerFactory;
3133

3234
import java.io.IOException;
35+
import java.io.PrintWriter;
36+
import java.io.StringWriter;
3337
import java.util.ArrayList;
38+
import java.util.Arrays;
3439
import java.util.Collection;
3540
import java.util.List;
3641

@@ -63,8 +68,7 @@ public static ByteBuf toByteBuf(final BytesReference reference) {
6368
return ((ByteBufBytesReference) reference).toByteBuf();
6469
} else {
6570
final BytesRefIterator iterator = reference.iterator();
66-
// usually we have one, two, or three components
67-
// from the header, the message, and a buffer
71+
// usually we have one, two, or three components from the header, the message, and a buffer
6872
final List<ByteBuf> buffers = new ArrayList<>(3);
6973
try {
7074
BytesRef slice;
@@ -118,4 +122,31 @@ public static void closeChannels(final Collection<Channel> channels) throws IOEx
118122
}
119123
}
120124

125+
public static void maybeDie(final Throwable cause) throws IOException {
126+
if (cause instanceof Error) {
127+
/*
128+
* Here be dragons. We want to rethrow this so that it bubbles up to the uncaught exception handler. Yet, Netty wraps too many
129+
* invocations of user-code in try/catch blocks that swallow all throwables. This means that a rethrow here will not bubble up
130+
* to where we want it to. So, we fork a thread and throw the exception from there where Netty can not get to it. We do not wrap
131+
* the exception so as to not lose the original cause during exit, so we give the thread a name based on the previous stack
132+
* frame so that at least we know where it came from (in case logging the current stack trace fails).
133+
*/
134+
try (
135+
final StringWriter sw = new StringWriter();
136+
final PrintWriter pw = new PrintWriter(sw)) {
137+
// try to log the current stack trace
138+
Arrays.stream(Thread.currentThread().getStackTrace()).skip(1).map(e -> "\tat " + e).forEach(pw::println);
139+
ESLoggerFactory.getLogger(Netty4Utils.class).error("fatal error on the network layer\n{}", sw.toString());
140+
} finally {
141+
final StackTraceElement previous = Thread.currentThread().getStackTrace()[2];
142+
new Thread(
143+
() -> {
144+
throw (Error) cause;
145+
},
146+
previous.getClassName() + "#" + previous.getMethodName())
147+
.start();
148+
}
149+
}
150+
}
151+
121152
}

modules/transport-netty4/src/test/java/org/elasticsearch/http/netty4/Netty4HttpClient.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19+
1920
package org.elasticsearch.http.netty4;
2021

2122
import io.netty.bootstrap.Bootstrap;

0 commit comments

Comments
 (0)