elastic · nik9000 · Aug 1, 2016 · Aug 2, 2016 · nik9000 · Aug 2, 2016
diff --git a/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java b/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java
@@ -175,7 +175,7 @@ public void run() {
         node = new Node(environment) {
             @Override
             protected void validateNodeBeforeAcceptingRequests(Settings settings, BoundTransportAddress boundTransportAddress) {
-                BootstrapCheck.check(settings, boundTransportAddress);
+                BootstrapCheck.check(settings, getNodeEnvironment().getNodeLockId(), boundTransportAddress);
             }
         };
     }

diff --git a/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java b/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java
@@ -28,6 +28,7 @@
 import org.elasticsearch.common.transport.BoundTransportAddress;
 import org.elasticsearch.common.transport.TransportAddress;
 import org.elasticsearch.discovery.zen.elect.ElectMasterService;
+import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.monitor.jvm.JvmInfo;
 import org.elasticsearch.monitor.process.ProcessProbe;
 import org.elasticsearch.node.Node;
@@ -59,13 +60,14 @@ private BootstrapCheck() {
      * checks
      *
      * @param settings              the current node settings
+     * @param dataDirectoryIndex   the index of the directories who's locks under the data directories we were able to take
      * @param boundTransportAddress the node network bindings
      */
-    static void check(final Settings settings, final BoundTransportAddress boundTransportAddress) {
+    static void check(final Settings settings, final int dataDirectoryIndex, final BoundTransportAddress boundTransportAddress) {
         check(
                 enforceLimits(boundTransportAddress),
                 BootstrapSettings.IGNORE_SYSTEM_BOOTSTRAP_CHECKS.get(settings),
-                checks(settings),
+                checks(settings, dataDirectoryIndex),
                 Node.NODE_NAME_SETTING.get(settings));
     }
 
@@ -153,7 +155,7 @@ static boolean enforceLimits(BoundTransportAddress boundTransportAddress) {
     }
 
     // the list of checks to execute
-    static List<Check> checks(final Settings settings) {
+    static List<Check> checks(final Settings settings, final int dataDirectoryIndex) {
         final List<Check> checks = new ArrayList<>();
         checks.add(new HeapSizeCheck());
         final FileDescriptorCheck fileDescriptorCheck
@@ -173,6 +175,12 @@ static List<Check> checks(final Settings settings) {
         checks.add(new ClientJvmCheck());
         checks.add(new OnErrorCheck());
         checks.add(new OnOutOfMemoryErrorCheck());
+        /*
+         * true in the line below means that we always default to the number of nodes for production configurations. This means that the
+         * check will produce consistent results even if Elasticsearch isn't being run in production mode. We rely on the production mode
+         * detection that BootstrapCheck already does decide whether or not to throw an error.
+         */
+        checks.add(new DataDirectoryLockNumberCheck(dataDirectoryIndex, NodeEnvironment.getMaxLocalStorageNodes(settings, true)));
         return Collections.unmodifiableList(checks);
     }
 
@@ -592,4 +600,32 @@ public String errorMessage() {
 
     }
 
+    /**
+     * Checks that the data directory that the node is using is within the production limit defined by
+     * {@link NodeEnvironment#MAX_LOCAL_STORAGE_NODES_SETTING}.
+     */
+    static class DataDirectoryLockNumberCheck implements BootstrapCheck.Check {
+        private final int dataDirectoryIndex;
+        private final int maxLocalStorageNodes;
+
+        DataDirectoryLockNumberCheck(int dataDirectoryIndex, int maxLocalStorageNodes) {
+            this.dataDirectoryIndex = dataDirectoryIndex;
+            this.maxLocalStorageNodes = maxLocalStorageNodes;
+        }
+
+        @Override
+        public boolean check() {
+            return dataDirectoryIndex < maxLocalStorageNodes;
+        }
+
+        @Override
+        public String errorMessage() {
+            return "there are already [" + dataDirectoryIndex + "] Elasticsearch processes running with this data directory";
+        }
+
+        @Override
+        public boolean isSystemCheck() {
+            return false;
+        }
+    }
 }
diff --git a/core/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/core/src/main/java/org/elasticsearch/env/NodeEnvironment.java
@@ -81,9 +81,6 @@
  * A component that holds all data paths for a single node.
  */
 public final class NodeEnvironment  implements Closeable {
-
-    private final ESLogger logger;
-
     public static class NodePath {
         /* ${data.paths}/nodes/{node.id} */
         public final Path path;
@@ -138,21 +135,20 @@ public String toString() {
         }
     }
 
-    private final NodePath[] nodePaths;
-    private final Path sharedDataPath;
-    private final Lock[] locks;
-
-    private final int nodeLockId;
-    private final AtomicBoolean closed = new AtomicBoolean(false);
-    private final Map<ShardId, InternalShardLock> shardLocks = new HashMap<>();
-
-    private final NodeMetaData nodeMetaData;
-
     /**
-     * Maximum number of data nodes that should run in an environment.
+     * Maximum number of data nodes that can run in an environment. {@code 0} is a special value for this setting meaning "use the default
+     * based on the number of allowed local storage nodes".
+     */
+    public static final Setting<Integer> MAX_LOCAL_STORAGE_NODES_SETTING = Setting.intSetting("node.max_local_storage_nodes", 0, 0,
+            Property.NodeScope);
+    /**
+     * Default for {@link MAX_LOCAL_STORAGE_NODES_SETTING} to use when not in production mode.
      */
-    public static final Setting<Integer> MAX_LOCAL_STORAGE_NODES_SETTING = Setting.intSetting("node.max_local_storage_nodes", 50, 1,
-        Property.NodeScope);
+    public static final int MAX_LOCAL_STORAGE_NODES_NON_PRODUCTION_DEFAULT = 50;
+    /**
+     * Default for {@link MAX_LOCAL_STORAGE_NODES_SETTING} to use when in production mode.
+     */
+    public static final int MAX_LOCAL_STORAGE_NODES_PRODUCTION_DEFAULT = 1;
 
     /**
      * If true automatically append node lock id to custom data paths.
@@ -179,6 +175,21 @@ public String toString() {
     public static final String INDICES_FOLDER = "indices";
     public static final String NODE_LOCK_FILENAME = "node.lock";
 
+    private final ESLogger logger;
+    private final NodePath[] nodePaths;
+    private final Path sharedDataPath;
+    private final Lock[] locks;
+
+    /**
+     * The lock number we were able to take. This is <strong>usually</strong> the number of Elasticsearch nodes sharing data directories
+     * with this node.
+     */
+    private final int nodeLockId;
+    private final AtomicBoolean closed = new AtomicBoolean(false);
+    private final Map<ShardId, InternalShardLock> shardLocks = new HashMap<>();
+
+    private final NodeMetaData nodeMetaData;
+
     public NodeEnvironment(Settings settings, Environment environment) throws IOException {
 
         if (!DiscoveryNode.nodeRequiresLocalStorage(settings)) {
@@ -201,7 +212,11 @@ public NodeEnvironment(Settings settings, Environment environment) throws IOExce
             sharedDataPath = environment.sharedDataFile();
             int nodeLockId = -1;
             IOException lastException = null;
-            int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
+            /*
+             * Try and acquire a lock as though we were in non-production mode regardless of what we will bind. If we find out later we are
+             * in production mode we'll fail to startup fully and if we aren't in production mode we'll log a nice little warning.
+             */
+            int maxLocalStorageNodes = getMaxLocalStorageNodes(settings, false);
             for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
                 for (int dirIndex = 0; dirIndex < environment.dataFiles().length; dirIndex++) {
                     Path dataDirWithClusterName = environment.dataWithClusterFiles()[dirIndex];
@@ -419,6 +434,28 @@ private static String toString(Collection<String> items) {
         return b.toString();
     }
 
+    /**
+     * The lock number we were able to take. This is <strong>usually</strong> the number of Elasticsearch nodes sharing data directories
+     * with this node.
+     */
+    public int getNodeLockId() {
+        return nodeLockId;
+    }
+
+    /**
+     * Get the maximum number of local storage nodes allowed.
+     * @param settings the settings object from which to read the settings
+     * @param productionMode whether or not we are running in production mode
+     */
+    public static int getMaxLocalStorageNodes(Settings settings, boolean productionMode) {
+        int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
+        if (maxLocalStorageNodes == 0) {
+            maxLocalStorageNodes = productionMode ? MAX_LOCAL_STORAGE_NODES_PRODUCTION_DEFAULT
+                    : MAX_LOCAL_STORAGE_NODES_NON_PRODUCTION_DEFAULT;
+        }
+        return maxLocalStorageNodes;
+    }
+
     /**
      * Deletes a shard data directory iff the shards locks were successfully acquired.
      *

diff --git a/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java b/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java
@@ -20,10 +20,12 @@
 package org.elasticsearch.bootstrap;
 
 import org.apache.lucene.util.Constants;
+import org.elasticsearch.bootstrap.BootstrapCheck.DataDirectoryLockNumberCheck;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.transport.BoundTransportAddress;
 import org.elasticsearch.common.transport.TransportAddress;
+import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.test.ESTestCase;
 
 import java.util.ArrayList;
@@ -63,7 +65,7 @@ public void testNonProductionMode() {
         BoundTransportAddress boundTransportAddress = mock(BoundTransportAddress.class);
         when(boundTransportAddress.boundAddresses()).thenReturn(transportAddresses.toArray(new TransportAddress[0]));
         when(boundTransportAddress.publishAddress()).thenReturn(publishAddress);
-        BootstrapCheck.check(Settings.EMPTY, boundTransportAddress);
+        BootstrapCheck.check(Settings.EMPTY, between(0, Integer.MAX_VALUE), boundTransportAddress);
     }
 
     public void testNoLogMessageInNonProductionMode() {
@@ -389,7 +391,7 @@ public void testMinMasterNodes() {
         boolean isSet = randomBoolean();
         BootstrapCheck.Check check = new BootstrapCheck.MinMasterNodesCheck(isSet);
         assertThat(check.check(), not(equalTo(isSet)));
-        List<BootstrapCheck.Check> defaultChecks = BootstrapCheck.checks(Settings.EMPTY);
+        List<BootstrapCheck.Check> defaultChecks = BootstrapCheck.checks(Settings.EMPTY, 0);
 
         expectThrows(RuntimeException.class, () -> BootstrapCheck.check(true, false, defaultChecks, "testMinMasterNodes"));
     }
@@ -604,4 +606,21 @@ public boolean alwaysEnforce() {
         assertThat(alwaysEnforced, hasToString(containsString("error")));
     }
 
+    public void testDataDirectoryNumberCheck() {
+        // In production mode this defaults to 1:
+        DataDirectoryLockNumberCheck check = new DataDirectoryLockNumberCheck(0, 1);
+        assertTrue(check.check());
+        check = new DataDirectoryLockNumberCheck(1, 1);
+        assertFalse(check.check());
+        assertEquals("there are already [1] Elasticsearch processes running with this data directory", check.errorMessage());
+
+        // But in development mode it defaults to a whopping 50
+        check = new DataDirectoryLockNumberCheck(0, 50);
+        assertTrue(check.check());
+        check = new DataDirectoryLockNumberCheck(49, 50);
+        assertTrue(check.check());
+        check = new DataDirectoryLockNumberCheck(50, 50);
+        assertFalse(check.check());
+        assertEquals("there are already [50] Elasticsearch processes running with this data directory", check.errorMessage());
+    }
 }
diff --git a/core/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/core/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java
@@ -57,20 +57,25 @@
 public class NodeEnvironmentTests extends ESTestCase {
     private final IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("foo", Settings.EMPTY);
 
-    public void testNodeLockSillySettings() {
-        try {
-            NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.get(Settings.builder()
-                    .put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), between(Integer.MIN_VALUE, 0)).build());
-            fail("expected failure");
-        } catch (IllegalArgumentException e) {
-            assertThat(e.getMessage(), containsString("must be >= 1"));
+    public void testLocalStorageNodesSetting() {
+        {
+            final Settings settings = Settings.builder()
+                    .put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), between(Integer.MIN_VALUE, 0)).build();
+            IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+                    () -> NodeEnvironment.getMaxLocalStorageNodes(settings, randomBoolean()));
+            assertThat(e.getMessage(), containsString("must be >= 0"));
+        }
+        {
+            int value = between(1, Integer.MAX_VALUE); // MAXINT is silly but ok.
+            Settings settings = Settings.builder().put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), value).build();
+            assertEquals(value,NodeEnvironment.getMaxLocalStorageNodes(settings, randomBoolean()));
         }
 
-        // Even though its silly MAXINT nodes is a-ok!
-        int value = between(1, Integer.MAX_VALUE);
-        int max = NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.get(
-                Settings.builder().put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), value).build());
-        assertEquals(value, max);
+        // Defaults for production and non-production mode
+        assertEquals(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_NON_PRODUCTION_DEFAULT,
+                NodeEnvironment.getMaxLocalStorageNodes(Settings.EMPTY, false));
+        assertEquals(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_PRODUCTION_DEFAULT,
+                NodeEnvironment.getMaxLocalStorageNodes(Settings.EMPTY, true));
     }
 
     public void testNodeLockSingleEnvironment() throws IOException {

diff --git a/docs/reference/migration/migrate_5_0/settings.asciidoc b/docs/reference/migration/migrate_5_0/settings.asciidoc
@@ -73,7 +73,7 @@ scope (e.g. `_local_,_site_` for all loopback and private network addresses)
 or by explicit interface names, hostnames, or addresses.
 
 The `netty.epollBugWorkaround` settings is removed. This settings allow people to enable
-a netty work around for https://github.com/netty/netty/issues/327[a high CPU usage issue] with early JVM versions. 
+a netty work around for https://github.com/netty/netty/issues/327[a high CPU usage issue] with early JVM versions.
 This bug was http://bugs.java.com/view_bug.do?bug_id=6403933[fixed in Java 7]. Since Elasticsearch 5.0 requires Java 8 the settings is removed. Note that if the workaround needs to be reintroduced you can still set the `org.jboss.netty.epollBugWorkaround` system property to control Netty directly.
 
 ==== Forbid changing of thread pool types
@@ -259,6 +259,15 @@ a fallback realtime setting for the get and mget APIs when realtime
 wasn't specified. Now if the parameter isn't specified we always
 default to true.
 
+==== `node.max_local_storage_nodes` defaults to 1 in production mode
+
+If you don't explicitly set `node.max_local_storage_nodes` and you start
+multiple Elasticsearch processes in the same source directories then
+Elasticsearch will log warning if it hasn't bound to any non-local addresses.
+If it *has* bound to any non-local addresses then it'll refuse to start,
+with the message
+`there are already [1] Elasticsearch processes running with this data directory`.
+
 === Script settings
 
 ==== Indexed script settings

diff --git a/docs/reference/modules/node.asciidoc b/docs/reference/modules/node.asciidoc
@@ -82,7 +82,7 @@ cluster health to have a stable master node.
 Any master-eligible node (all nodes by default) may be elected to become the
 master node by the <<modules-discovery-zen,master election process>>.
 
-IMPORTANT: Master nodes must have access to the `data/` directory (just like 
+IMPORTANT: Master nodes must have access to the `data/` directory (just like
 `data` nodes) as this is where the cluster state is persisted between node restarts.
 
 Indexing and searching your data is CPU-, memory-, and I/O-intensive work
@@ -284,14 +284,19 @@ your data! The RPM and Debian distributions do this for you already.
 The <<data-path,data path>> can be shared by multiple nodes, even by nodes
 from different clusters.  This is very useful for testing failover and
 different configurations on your development machine.  In production, however,
-it is recommended to run only one node of Elasticsearch per server.
+it is recommended to run only one node of Elasticsearch per server. If you
+don't explicitly set `node.max_local_storage_nodes` then Elasticsearch will
+log a warning if multiple nodes share the same data directory and it hasn't
+bound to any non-local addresses. If it has bound to any non-local addresses
+then it'll refuse to start.
 
-To prevent more than one node from sharing the same data path, add this
-setting to the `elasticsearch.yml` config file:
+You can always override the setting to *allow* more than one node to share the
+same data directory in production mode by setting it in the `elasticsearch.yml`
+config file:
 
 [source,yaml]
 ------------------------------
-node.max_local_storage_nodes: 1
+node.max_local_storage_nodes: 2
 ------------------------------
 
 WARNING: Never run different node types (i.e. master, data) from the

diff --git a/docs/reference/setup/important-settings.asciidoc b/docs/reference/setup/important-settings.asciidoc
@@ -12,7 +12,6 @@ configured before going into production.
 * <<network.host,`network.host`>>
 * <<unicast.hosts,`discovery.zen.ping.unicast.hosts`>>
 * <<minimum_master_nodes,`discovery.zen.minimum_master_nodes`>>
-* <<node.max_local_storage_nodes,`node.max_local_storage_nodes`>>
 
 [float]
 [[path-settings]]
@@ -185,32 +184,3 @@ discovery.zen.minimum_master_nodes: 2
 IMPORTANT: If `discovery.zen.minimum_master_nodes` is not set when
 Elasticsearch is running in <<dev-vs-prod,production mode>>, an exception will
 be thrown which will prevent the node from starting.
-
-[float]
-[[node.max_local_storage_nodes]]
-=== `node.max_local_storage_nodes`
-
-It is possible to start more than one node on the same server from the same
-`$ES_HOME`, just by doing the following:
-
-[source,sh]
---------------------------------------------------
-./bin/elasticsearch -d
-./bin/elasticsearch -d
---------------------------------------------------
-
-This works just fine: the data directory structure is designed to let multiple
-nodes coexist.  However, a single instance of Elasticsearch is able to use all
-of the resources of a single server and it seldom makes sense to run multiple
-nodes on the same server in production.
-
-It is, however, possible to start more than one node on the same server by
-mistake and to be completely unaware that this problem exists. To prevent more
-than one node from sharing the same data directory, it is advisable to add the
-following setting:
-
-[source,yaml]
---------------------------------------------------
-node.max_local_storage_nodes: 1
---------------------------------------------------
-