Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dependencies {
compile "org.apache.lucene:lucene-spatial3d:${versions.lucene}"
compile "org.apache.lucene:lucene-suggest:${versions.lucene}"

compile 'org.elasticsearch:securesm:1.0'
compile 'org.elasticsearch:securesm:1.1'

// utilities
compile 'net.sf.jopt-simple:jopt-simple:4.9'
Expand Down
6 changes: 6 additions & 0 deletions core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ static void init(
// fail if somebody replaced the lucene jars
checkLucene();

// install the default uncaught exception handler; must be done before security is
// initialized as we do not want to grant the runtime permission
// setDefaultUncaughtExceptionHandler
Thread.setDefaultUncaughtExceptionHandler(
new ElasticsearchUncaughtExceptionHandler(() -> Node.NODE_NAME_SETTING.get(settings)));

INSTANCE.setup(true, settings, environment);

INSTANCE.start();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.bootstrap;

import org.apache.lucene.index.MergePolicy;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;

import java.io.IOError;
import java.util.Objects;
import java.util.function.Supplier;

class ElasticsearchUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler {

private final Supplier<String> loggingPrefixSupplier;

ElasticsearchUncaughtExceptionHandler(final Supplier<String> loggingPrefixSupplier) {
this.loggingPrefixSupplier = Objects.requireNonNull(loggingPrefixSupplier);
}

@Override
public void uncaughtException(Thread t, Throwable e) {
if (isFatalUncaught(e)) {
try {
onFatalUncaught(t.getName(), e);
} finally {
// we use specific error codes in case the above notification failed, at least we
// will have some indication of the error bringing us down
if (e instanceof InternalError) {
halt(128);
} else if (e instanceof OutOfMemoryError) {
halt(127);
} else if (e instanceof StackOverflowError) {
halt(126);
} else if (e instanceof UnknownError) {
halt(125);
} else if (e instanceof IOError) {
halt(124);
} else {
halt(1);
}
}
} else {
onNonFatalUncaught(t.getName(), e);
}
}

// visible for testing
static boolean isFatalUncaught(Throwable e) {
return isFatalCause(e) || (e instanceof MergePolicy.MergeException && isFatalCause(e.getCause()));
}

private static boolean isFatalCause(Throwable cause) {
return cause instanceof Error;
}

// visible for testing
void onFatalUncaught(final String threadName, final Throwable t) {
final ESLogger logger = Loggers.getLogger(ElasticsearchUncaughtExceptionHandler.class, loggingPrefixSupplier.get());
logger.error("fatal error in thread [{}], exiting", t, threadName);
}

// visible for testing
void onNonFatalUncaught(final String threadName, final Throwable t) {
final ESLogger logger = Loggers.getLogger(ElasticsearchUncaughtExceptionHandler.class, loggingPrefixSupplier.get());
logger.warn("uncaught exception in thread [{}]", t, threadName);
}

// visible for testing
@SuppressForbidden(reason = "halt")
void halt(int status) {
// we halt to prevent shutdown hooks from running
Runtime.getRuntime().halt(status);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ static void configure(Environment environment, boolean filterBadDefaults) throws
Policy.setPolicy(new ESPolicy(createPermissions(environment), getPluginPermissions(environment), filterBadDefaults));

// enable security manager
System.setSecurityManager(new SecureSM());
System.setSecurityManager(new SecureSM(new String[] { "org.elasticsearch.bootstrap." }));

// do some basic tests
selfTest();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
//// SecurityManager impl:
//// Must have all permissions to properly perform access checks

grant codeBase "${codebase.securesm-1.0.jar}" {
grant codeBase "${codebase.securesm-1.1.jar}" {
permission java.security.AllPermission;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.bootstrap;

import org.apache.lucene.index.MergePolicy;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;

import java.io.IOError;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

import static org.hamcrest.CoreMatchers.equalTo;

public class ElasticsearchUncaughtExceptionHandlerTests extends ESTestCase {

private Map<Class<? extends Error>, Integer> expectedStatus;

@Before
public void setUp() throws Exception {
super.setUp();
Map<Class<? extends Error>, Integer> expectedStatus = new HashMap<>();
expectedStatus.put(InternalError.class, 128);
expectedStatus.put(OutOfMemoryError.class, 127);
expectedStatus.put(StackOverflowError.class, 126);
expectedStatus.put(UnknownError.class, 125);
expectedStatus.put(IOError.class, 124);
this.expectedStatus = Collections.unmodifiableMap(expectedStatus);
}

public void testUncaughtError() throws InterruptedException {
final Error error = randomFrom(
new InternalError(),
new OutOfMemoryError(),
new StackOverflowError(),
new UnknownError(),
new IOError(new IOException("fatal")),
new Error() {});
final Thread thread = new Thread(() -> { throw error; });
final String name = randomAsciiOfLength(10);
thread.setName(name);
final AtomicBoolean halt = new AtomicBoolean();
final AtomicInteger observedStatus = new AtomicInteger();
final AtomicReference<String> threadNameReference = new AtomicReference<>();
final AtomicReference<Throwable> throwableReference = new AtomicReference<>();
thread.setUncaughtExceptionHandler(new ElasticsearchUncaughtExceptionHandler(() -> "testUncaughtError") {

@Override
void halt(int status) {
halt.set(true);
observedStatus.set(status);
}

@Override
void onFatalUncaught(String threadName, Throwable t) {
threadNameReference.set(threadName);
throwableReference.set(t);
}

@Override
void onNonFatalUncaught(String threadName, Throwable t) {
fail();
}

});
thread.start();
thread.join();
assertTrue(halt.get());
final int status;
if (expectedStatus.containsKey(error.getClass())) {
status = expectedStatus.get(error.getClass());
} else {
status = 1;
}
assertThat(observedStatus.get(), equalTo(status));
assertThat(threadNameReference.get(), equalTo(name));
assertThat(throwableReference.get(), equalTo(error));
}

public void testUncaughtException() throws InterruptedException {
final RuntimeException e = new RuntimeException("boom");
final Thread thread = new Thread(() -> { throw e; });
final String name = randomAsciiOfLength(10);
thread.setName(name);
final AtomicReference<String> threadNameReference = new AtomicReference<>();
final AtomicReference<Throwable> throwableReference = new AtomicReference<>();
thread.setUncaughtExceptionHandler(new ElasticsearchUncaughtExceptionHandler(() -> "testUncaughtException") {
@Override
void halt(int status) {
fail();
}

@Override
void onFatalUncaught(String threadName, Throwable t) {
fail();
}

@Override
void onNonFatalUncaught(String threadName, Throwable t) {
threadNameReference.set(threadName);
throwableReference.set(t);
}
});
thread.start();
thread.join();
assertThat(threadNameReference.get(), equalTo(name));
assertThat(throwableReference.get(), equalTo(e));
}

public void testIsFatalCause() {
assertFatal(new MergePolicy.MergeException(new OutOfMemoryError(), null));
assertFatal(new OutOfMemoryError());
assertFatal(new StackOverflowError());
assertFatal(new InternalError());
assertFatal(new UnknownError());
assertFatal(new IOError(new IOException()));
assertNonFatal(new RuntimeException());
assertNonFatal(new UncheckedIOException(new IOException()));
}

private void assertFatal(Throwable cause) {
assertTrue(ElasticsearchUncaughtExceptionHandler.isFatalUncaught(cause));
}

private void assertNonFatal(Throwable cause) {
assertFalse(ElasticsearchUncaughtExceptionHandler.isFatalUncaught(cause));
}

}
1 change: 0 additions & 1 deletion distribution/licenses/securesm-1.0.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions distribution/licenses/securesm-1.1.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1e423447d020041534be94c0f31a49fbdc1f2950
8 changes: 8 additions & 0 deletions docs/reference/migration/migrate_5_0/packaging.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ from Elasticsearch.
Additionally, it was previously possible to set any setting in
Elasticsearch via JVM system properties. This has been removed from
Elasticsearch.

==== Dying on fatal errors

Previous versions of Elasticsearch would not halt the JVM if out of memory errors or other fatal
errors were encountered during the life of the Elasticsearch instance. Because such errors leave
the JVM in a questionable state, the best course of action is to halt the JVM when this occurs.
Starting in Elasticsearch 5.x, this is now the case. Operators should consider configuring their
Elasticsearch services so that they respawn automatically in the case of such a fatal crash.
2 changes: 2 additions & 0 deletions docs/reference/setup.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,5 @@ include::setup/bootstrap-checks.asciidoc[]
include::setup/sysconfig.asciidoc[]

include::setup/upgrade.asciidoc[]

include::setup/stopping.asciidoc[]
58 changes: 58 additions & 0 deletions docs/reference/setup/stopping.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[[stopping-elasticsearch]]
=== Stopping Elasticsearch

An orderly shutdown of Elasticsearch ensures that Elasticsearch has a chance to cleanup and close
outstanding resources. For example, a node that is shutdown in an orderly fashion will remove itself
from the cluster, sync translogs to disk, and perform other related cleanup activities. You can help
ensure an orderly shutdown by properly stopping Elasticsearch.

If you're running Elasticsearch as a service, you can stop Elasticsearch via the service management
functionality provided by your installation.

If you're running Elasticsearch directly, you can stop Elasticsearch by sending control-C if you're
running Elasticsearch in the console, or by sending `SIGTERM` to the Elasticsearch process on a
POSIX system. You can obtain the PID to send the signal to via various tools (e.g., `ps` or `jps`):

[source,sh]
--------------------------------------------------
$ jps | grep Elasticsearch
14542 Elasticsearch
--------------------------------------------------

From the Elasticsearch startup logs:

[source,sh]
--------------------------------------------------
[2016-07-07 12:26:18,908][INFO ][node ] [Reaper] version[5.0.0-alpha4], pid[15399], build[3f5b994/2016-06-27T16:23:46.861Z], OS[Mac OS X/10.11.5/x86_64], JVM[Oracle Corporation/Java HotSpot(TM) 64-Bit Server VM/1.8.0_92/25.92-b14]
--------------------------------------------------

Or by specifying a location to write a PID file to on startup (`-p <path>`):

[source,sh]
--------------------------------------------------
$ ./bin/elasticsearch -p /tmp/elasticsearch-pid -d
$ cat /tmp/elasticsearch-pid && echo
15516
$ kill -SIGTERM 15516
--------------------------------------------------

[[fatal-errors]
[float]
=== Stopping on Fatal Errors

During the life of the Elasticsearch virtual machine, certain fatal errors could arise that put the
virtual machine in a questionable state. Such fatal errors include out of memory errors, internal
errors in virtual machine, and serious I/O errors.

When Elasticsearch detects that the virtual machine has encountered such a fatal error Elasticsearch
will attempt to log the error and then will halt the virtual machine. When Elasticsearch initiates
such a shutdown, it does not go through an orderly shutdown as described above. The Elasticsearch
process will also return with a special status code indicating the nature of the error.

[horizontal]
JVM internal error:: 128
Out of memory error:: 127
Stack overflow error:: 126
Unknown virtual machine error:: 125
Serious I/O error:: 124
Unknown fatal error:: 1
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ public boolean implies(ProtectionDomain domain, Permission permission) {
return esPolicy.implies(domain, permission) || testFramework.implies(domain, permission);
}
});
System.setSecurityManager(new SecureSM(true));
System.setSecurityManager(SecureSM.createTestSecureSM());
Security.selfTest();

// guarantee plugin classes are initialized first, in case they have one-time hacks.
Expand Down