From 426fad3af93009bf169af67c8f22075d6caed0d5 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Wed, 28 Aug 2019 10:35:21 +0200 Subject: [PATCH 1/2] Search with failing nodes test Add a test case that verifies that searches running while nodes are failing behave correctly, in particular with allow_partial_search_results=false. Fixed MockSearchService concurrency, assertNoInFlightContext could have false negative result (rarely). Split out from #45739 --- .../search/SearchWithFailingNodesIT.java | 135 ++++++++++++++++++ .../search/MockSearchService.java | 2 +- 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java diff --git a/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java b/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java new file mode 100644 index 0000000000000..3fc23ecf51f2d --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java @@ -0,0 +1,135 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search; + + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.test.ESIntegTestCase; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; + + +/** + * This test is a disruption style test that restarts data nodes to see if search behaves well under extreme conditions. + */ +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, minNumDataNodes = 2) +public class SearchWithFailingNodesIT extends ESIntegTestCase { + + public void testDisallowPartialsWithRedStateRecovering() throws Exception { + int docCount = scaledRandomIntBetween(10, 1000); + logger.info("Using docCount [{}]", docCount); + assertAcked(prepareCreate("test").setSettings(Settings.builder() + .put("index.number_of_shards", cluster().numDataNodes() + 2).put("index.number_of_replicas", 1))); + ensureGreen(); + for (int i = 0; i < docCount; i++) { + client().prepareIndex("test", "_doc", ""+i).setSource("field1", i).get(); + } + refresh("test"); + + AtomicBoolean stop = new AtomicBoolean(); + List searchThreads = new ArrayList<>(); + // this is a little extreme, but necessary to provoke spectacular timings like hitting a recovery on a replica + for (int i = 0; i < 100; ++i) { + Thread searchThread = new Thread() { + { + setDaemon(true); + } + + @Override + public void run() { + while (stop.get() == false) { + // todo: the timeouts below should not be necessary, but this test sometimes hangs without, to be fixed (or + // explained) + verify(() -> client().prepareSearch("test").setQuery(new RangeQueryBuilder("field1").gte(0)) + .setSize(100).setAllowPartialSearchResults(false).get(TimeValue.timeValueSeconds(10))); + verify(() -> client().prepareSearch("test") + .setSize(100).setAllowPartialSearchResults(false).get(TimeValue.timeValueSeconds(10))); + } + } + + void verify(Supplier call) { + try { + SearchResponse response = call.get(); + assertThat(response.getHits().getHits().length, equalTo(Math.min(100, docCount))); + assertThat(response.getHits().getTotalHits().value, equalTo((long) docCount)); + } catch (Exception e) { + // this is OK. + logger.info("Failed with : " + e); + } + } + }; + searchThreads.add(searchThread); + searchThread.start(); + } + try { + // have two threads do restarts, with a replica of 1, this means we will sometimes have no copies (RED) + Thread restartThread = new Thread() { + { + setDaemon(true); + } + + @Override + public void run() { + try { + for (int i = 0; i < 5; ++i) { + internalCluster().restartRandomDataNode(); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + restartThread.start(); + for (int i = 0; i < 5; ++i) { + internalCluster().restartRandomDataNode(); + } + restartThread.join(30000); + assertFalse(restartThread.isAlive()); + } finally { + stop.set(true); + searchThreads.forEach(thread -> { + try { + thread.join(30000); + if (thread.isAlive()) { + logger.warn("Thread: " + thread + " is still alive"); + // do not continue unless thread terminates to avoid getting other confusing test errors. Please kill me... + thread.join(); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + } + + // hack to ensure all search contexts are removed, seems we risk leaked search contexts when coordinator dies. + client().admin().indices().prepareDelete("test").get(); + } + + +} diff --git a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java index b9d9ff3cfc9bb..c1ebb1213495b 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java @@ -74,8 +74,8 @@ public MockSearchService(ClusterService clusterService, @Override protected void putContext(SearchContext context) { - super.putContext(context); addActiveContext(context); + super.putContext(context); } @Override From 1626100053ac1456d09ccdd407bdb0e118841021 Mon Sep 17 00:00:00 2001 From: Henning Andersen Date: Mon, 9 Mar 2020 07:59:12 +0100 Subject: [PATCH 2/2] Fix compilation --- .../java/org/elasticsearch/search/SearchWithFailingNodesIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java b/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java index 3fc23ecf51f2d..97087467f6d55 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java +++ b/server/src/test/java/org/elasticsearch/search/SearchWithFailingNodesIT.java @@ -48,7 +48,7 @@ public void testDisallowPartialsWithRedStateRecovering() throws Exception { .put("index.number_of_shards", cluster().numDataNodes() + 2).put("index.number_of_replicas", 1))); ensureGreen(); for (int i = 0; i < docCount; i++) { - client().prepareIndex("test", "_doc", ""+i).setSource("field1", i).get(); + client().prepareIndex("test").setId(""+i).setSource("field1", i).get(); } refresh("test");