From a9d2e11dcbfc0d68e18aeef9b8bc8d19ba244dbe Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 21 Nov 2016 16:27:53 -0500 Subject: [PATCH 1/2] Die with dignity on the network layer When a fatal error is thrown on the network layer, such an error never makes its way to the uncaught exception handler. This prevents the node from being torn down if an out of memory error or other fatal error is thrown while handling HTTP or transport traffic. This commit adds logic to ensure that such errors bubble their way up to the uncaught exception handler, even though Netty tries really hard to swallow everything. --- .../http/netty4/Netty4HttpRequestHandler.java | 2 ++ .../http/netty4/Netty4HttpServerTransport.java | 8 +++++++- .../netty4/Netty4MessageChannelHandler.java | 1 + .../transport/netty4/Netty4Transport.java | 13 +++++++++++++ .../transport/netty4/Netty4Utils.java | 14 ++++++++++++++ .../http/netty4/Netty4HttpClient.java | 1 + 6 files changed, 38 insertions(+), 1 deletion(-) diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java index a4402d4816fc3..6e12970c6df95 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java @@ -27,6 +27,7 @@ import io.netty.handler.codec.http.FullHttpRequest; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.http.netty4.pipelining.HttpPipelinedRequest; +import org.elasticsearch.transport.netty4.Netty4Utils; @ChannelHandler.Sharable class Netty4HttpRequestHandler extends SimpleChannelInboundHandler { @@ -72,6 +73,7 @@ protected void channelRead0(ChannelHandlerContext ctx, Object msg) throws Except @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { + Netty4Utils.maybeDie(cause); serverTransport.exceptionCaught(ctx, cause); } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java index 20cdfe0a128e7..90ca1b9a6a59c 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java @@ -53,9 +53,9 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; -import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.transport.NetworkExceptionHelper; import org.elasticsearch.common.transport.PortsRange; +import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; @@ -578,6 +578,12 @@ protected void initChannel(Channel ch) throws Exception { ch.pipeline().addLast("handler", requestHandler); } + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { + Netty4Utils.maybeDie(cause); + super.exceptionCaught(ctx, cause); + } + } } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java index cddbdac3e761b..9741266a485a4 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java @@ -80,6 +80,7 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { + Netty4Utils.maybeDie(cause); transport.exceptionCaught(ctx, cause); } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java index 7742978831751..bb84c47dc17a8 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java @@ -508,6 +508,12 @@ protected void initChannel(Channel ch) throws Exception { ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, ".client")); } + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { + Netty4Utils.maybeDie(cause); + super.exceptionCaught(ctx, cause); + } + } protected class ServerChannelInitializer extends ChannelInitializer { @@ -526,6 +532,13 @@ protected void initChannel(Channel ch) throws Exception { ch.pipeline().addLast("size", new Netty4SizeHeaderFrameDecoder()); ch.pipeline().addLast("dispatcher", new Netty4MessageChannelHandler(Netty4Transport.this, name)); } + + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { + Netty4Utils.maybeDie(cause); + super.exceptionCaught(ctx, cause); + } + } } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java index 877d50e1674b9..51c170624ad40 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java @@ -118,4 +118,18 @@ public static void closeChannels(final Collection channels) throws IOEx } } + public static void maybeDie(Throwable cause) { + if (cause instanceof Error) { + /* + * Here be dragons. We want to rethrow this so that it bubbles up to the uncaught exception handler. Yet, Netty wraps too many + * invocations of user-code in try/catch blocks that swallow all throwables. This means that a rethrow here will not bubble up + * to where we want it to. So, we fork a thread and throw the exception from there where Netty can not get to it. We do not wrap + * the exception so as to not lose the original cause during exit, so we give the thread a name based on the previous stack + * frame so that at least we know where it came from. + */ + final StackTraceElement previous = Thread.currentThread().getStackTrace()[2]; + new Thread(() -> { throw (Error)cause; }, previous.getClassName() + "#" + previous.getMethodName()).start(); + } + } + } diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/http/netty4/Netty4HttpClient.java b/modules/transport-netty4/src/test/java/org/elasticsearch/http/netty4/Netty4HttpClient.java index f6d3e1fe64f52..ad1fedc49b033 100644 --- a/modules/transport-netty4/src/test/java/org/elasticsearch/http/netty4/Netty4HttpClient.java +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/http/netty4/Netty4HttpClient.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.http.netty4; import io.netty.bootstrap.Bootstrap; From bf479164a9e8161ee8b139b4721686f165c919ba Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 21 Nov 2016 17:26:42 -0500 Subject: [PATCH 2/2] Try to log the current stack trace before dying When preparing to rethrow a fatal error, this commit adds an attempt to log the current stack trace so where know which handler saw the fatal error. --- .../transport/netty4/Netty4Utils.java | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java index 51c170624ad40..774ac59638211 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.transport.netty4; import io.netty.buffer.ByteBuf; @@ -28,9 +29,13 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.logging.ESLoggerFactory; import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -63,8 +68,7 @@ public static ByteBuf toByteBuf(final BytesReference reference) { return ((ByteBufBytesReference) reference).toByteBuf(); } else { final BytesRefIterator iterator = reference.iterator(); - // usually we have one, two, or three components - // from the header, the message, and a buffer + // usually we have one, two, or three components from the header, the message, and a buffer final List buffers = new ArrayList<>(3); try { BytesRef slice; @@ -118,17 +122,30 @@ public static void closeChannels(final Collection channels) throws IOEx } } - public static void maybeDie(Throwable cause) { + public static void maybeDie(final Throwable cause) throws IOException { if (cause instanceof Error) { /* * Here be dragons. We want to rethrow this so that it bubbles up to the uncaught exception handler. Yet, Netty wraps too many * invocations of user-code in try/catch blocks that swallow all throwables. This means that a rethrow here will not bubble up * to where we want it to. So, we fork a thread and throw the exception from there where Netty can not get to it. We do not wrap * the exception so as to not lose the original cause during exit, so we give the thread a name based on the previous stack - * frame so that at least we know where it came from. + * frame so that at least we know where it came from (in case logging the current stack trace fails). */ - final StackTraceElement previous = Thread.currentThread().getStackTrace()[2]; - new Thread(() -> { throw (Error)cause; }, previous.getClassName() + "#" + previous.getMethodName()).start(); + try ( + final StringWriter sw = new StringWriter(); + final PrintWriter pw = new PrintWriter(sw)) { + // try to log the current stack trace + Arrays.stream(Thread.currentThread().getStackTrace()).skip(1).map(e -> "\tat " + e).forEach(pw::println); + ESLoggerFactory.getLogger(Netty4Utils.class).error("fatal error on the network layer\n{}", sw.toString()); + } finally { + final StackTraceElement previous = Thread.currentThread().getStackTrace()[2]; + new Thread( + () -> { + throw (Error) cause; + }, + previous.getClassName() + "#" + previous.getMethodName()) + .start(); + } } }