diff --git a/docs/reference/query-dsl/combined-fields-query.asciidoc b/docs/reference/query-dsl/combined-fields-query.asciidoc new file mode 100644 index 0000000000000..390d71276cf3d --- /dev/null +++ b/docs/reference/query-dsl/combined-fields-query.asciidoc @@ -0,0 +1,185 @@ +[[query-dsl-combined-fields-query]] +=== Combined fields +++++ +Combined fields +++++ + +The `combined_fields` query supports searching multiple text fields as if their +contents had been indexed into one combined field. It takes a term-centric +view of the query: first it analyzes the query string into individual terms, +then looks for each term in any of the fields. This query is particularly +useful when a match could span multiple text fields, for example the `title`, +`abstract` and `body` of an article: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "query": { + "combined_fields" : { + "query": "database systems", + "fields": [ "title", "abstract", "body"], + "operator": "and" + } + } +} +-------------------------------------------------- + +The `combined_fields` query takes a principled approach to scoring based on the +simple BM25F formula described in +http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf[The Probabilistic Relevance Framework: BM25 and Beyond]. +When scoring matches, the query combines term and collection statistics across +fields. This allows it to score each match as if the specified fields had been +indexed into a single combined field. (Note that this is a best attempt -- +`combined_fields` makes some approximations and scores will not obey this +model perfectly.) + +[WARNING] +.Field number limit +=================================================== +There is a limit on the number of fields that can be queried at once. It is +defined by the `indices.query.bool.max_clause_count` <> +which defaults to 1024. +=================================================== + +==== Per-field boosting + +Individual fields can be boosted with the caret (`^`) notation: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "query": { + "combined_fields" : { + "query" : "distributed consensus", + "fields" : [ "title^2", "body" ] <1> + } + } +} +-------------------------------------------------- + +Field boosts are interpreted according to the combined field model. For example, +if the `title` field has a boost of 2, the score is calculated as if each term +in the title appeared twice in the synthetic combined field. + +NOTE: The `combined_fields` query requires that field boosts are greater than +or equal to 1.0. Field boosts are allowed to be fractional. + +[[combined-field-top-level-params]] +==== Top-level parameters for `combined_fields` + +`fields`:: +(Required, array of strings) List of fields to search. Field wildcard patterns +are allowed. Only <> fields are supported, and they must all have +the same search <>. + +`query`:: ++ +-- +(Required, string) Text to search for in the provided ``. + +The `combined_fields` query <> the provided text before +performing a search. +-- + +`auto_generate_synonyms_phrase_query`:: ++ +-- +(Optional, Boolean) If `true`, <> +queries are automatically created for multi-term synonyms. Defaults to `true`. + +See <> for an +example. +-- + +`operator`:: ++ +-- +(Optional, string) Boolean logic used to interpret text in the `query` value. +Valid values are: + +`or` (Default):: +For example, a `query` value of `database systems` is interpreted as `database +OR systems`. + +`and`:: +For example, a `query` value of `database systems` is interpreted as `database +AND systems`. +-- + +`minimum_should_match`:: ++ +-- +(Optional, string) Minimum number of clauses that must match for a document to +be returned. See the <> for valid values and more information. +-- + +`zero_terms_query`:: ++ +-- +(Optional, string) Indicates whether no documents are returned if the `analyzer` +removes all tokens, such as when using a `stop` filter. Valid values are: + +`none` (Default):: +No documents are returned if the `analyzer` removes all tokens. + +`all`:: +Returns all documents, similar to a <> +query. + +See <> for an example. +-- + +===== Comparison to `multi_match` query + +The `combined_fields` query provides a principled way of matching and scoring +across multiple <> fields. To support this, it requires that all +fields have the same search <>. + +If you want a single query that handles fields of different types like +keywords or numbers, then the <> +query may be a better fit. It supports both text and non-text fields, and +accepts text fields that do not share the same analyzer. + +The main `multi_match` modes `best_fields` and `most_fields` take a +field-centric view of the query. In contrast, `combined_fields` is +term-centric: `operator` and `minimum_should_match` are applied per-term, +instead of per-field. Concretely, a query like + +[source,console] +-------------------------------------------------- +GET /_search +{ + "query": { + "combined_fields" : { + "query": "database systems", + "fields": [ "title", "abstract"], + "operator": "and" + } + } +} +-------------------------------------------------- + +is executed as + + +(combined("database", fields:["title" "abstract"])) + +(combined("systems", fields:["title", "abstract"])) + +In other words, each term must be present in at least one field for a +document to match. + +The `cross_fields` `multi_match` mode also takes a term-centric approach and +applies `operator` and `minimum_should_match per-term`. The main advantage of +`combined_fields` over `cross_fields` is its robust and interpretable approach +to scoring based on the BM25F algorithm. + +[NOTE] +.Custom similarities +=================================================== +The `combined_fields` query currently only supports the `BM25` similarity +(which is the default unless a <> +is configured). <> are also not allowed. +Using `combined_fields` in either of these cases will result in an error. +=================================================== diff --git a/docs/reference/query-dsl/full-text-queries.asciidoc b/docs/reference/query-dsl/full-text-queries.asciidoc index e649fbae6f270..0a7caa56e2b54 100644 --- a/docs/reference/query-dsl/full-text-queries.asciidoc +++ b/docs/reference/query-dsl/full-text-queries.asciidoc @@ -1,9 +1,9 @@ [[full-text-queries]] == Full text queries -The full text queries enable you to search <> such as the -body of an email. The query string is processed using the same analyzer that was applied to -the field during indexing. +The full text queries enable you to search <> such as the +body of an email. The query string is processed using the same analyzer that was applied to +the field during indexing. The queries in this group are: @@ -21,13 +21,16 @@ the last term, which is matched as a `prefix` query <>:: Like the `match` query but used for matching exact phrases or word proximity matches. - + <>:: Like the `match_phrase` query, but does a wildcard search on the final word. - + <>:: The multi-field version of the `match` query. +<>:: +Matches over multiple fields as if they had been indexed into one combined field. + <>:: Supports the compact Lucene <>, allowing you to specify AND|OR|NOT conditions and multi-field search @@ -48,8 +51,10 @@ include::match-phrase-query.asciidoc[] include::match-phrase-prefix-query.asciidoc[] +include::combined-fields-query.asciidoc[] + include::multi-match-query.asciidoc[] include::query-string-query.asciidoc[] -include::simple-query-string-query.asciidoc[] \ No newline at end of file +include::simple-query-string-query.asciidoc[] diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index f11d3f2140f7b..d155e5a05c1a9 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -192,7 +192,10 @@ This query is executed as: In other words, *all terms* must be present *in a single field* for a document to match. -See <> for a better solution. +The <> query offers a +term-centric approach that handles `operator` and `minimum_should_match` on a +per-term basis. The other multi-match mode <> also +addresses this issue. =================================================== @@ -385,6 +388,12 @@ explanation: Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`, `lenient` and `zero_terms_query`. +WARNING: The `cross_fields` type blends field statistics in a way that does +not always produce well-formed scores (for example scores can become +negative). As an alternative, you can consider the +<> query, which is also +term-centric but combines field statistics in a more robust way. + [[cross-field-analysis]] ===== `cross_field` and analysis diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml index edb1ba1b05934..9e9380cfc2f89 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml @@ -24,11 +24,27 @@ setup: indices.refresh: {} --- -"Basic": +"Basic multi_match query": - do: search: - rest_total_hits_as_int: true - body: { "query" : {"multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } }, "highlight" : { "type" : "unified", "fields" : { "*" : {} } } } + body: { + "query" : { "multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } }, + "highlight" : { "type" : "unified", "fields" : { "*" : {} } } } + + - match: {hits.hits.0.highlight.text.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.postings.0: "The quick brown fox is brown."} + +--- +"Basic combined_fields query": + - skip: + version: " - 7.99.99" + reason: "combined fields query is not yet backported" + - do: + search: + body: { + "query" : { "combined_fields" : { "query" : "quick brown fox", "fields" : [ "text*"] } }, + "highlight" : { "type" : "unified", "fields" : { "*" : {} } } } - match: {hits.hits.0.highlight.text.0: "The quick brown fox is brown."} - match: {hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown."} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/360_combined_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/360_combined_fields.yml new file mode 100644 index 0000000000000..777c72e49923a --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/360_combined_fields.yml @@ -0,0 +1,42 @@ +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + title: + type: text + abstract: + type: text + body: + type: text + + - do: + index: + index: test + id: 1 + body: + title: "Time, Clocks and the Ordering of Events in a Distributed System" + abstract: "The concept of one event happening before another..." + body: "The concept of time is fundamental to our way of thinking..." + refresh: true + +--- +"Test combined_fields query": + - skip: + version: " - 7.99.99" + reason: "combined fields query is not yet backported" + - do: + search: + index: test + body: + query: + combined_fields: + query: "time event" + fields: ["abstract", "body"] + operator: "and" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java index 9fa0047f79fd4..6eb399d934711 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java @@ -13,7 +13,7 @@ import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; -import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery; +import org.elasticsearch.index.query.ZeroTermsQueryOption; import org.elasticsearch.test.ESIntegTestCase; import org.junit.Before; @@ -47,11 +47,11 @@ public void testZeroTermsQuery() throws ExecutionException, InterruptedException MatchPhraseQueryBuilder baseQuery = matchPhraseQuery("name", "the who") .analyzer("standard_stopwords"); - MatchPhraseQueryBuilder matchNoneQuery = baseQuery.zeroTermsQuery(ZeroTermsQuery.NONE); + MatchPhraseQueryBuilder matchNoneQuery = baseQuery.zeroTermsQuery(ZeroTermsQueryOption.NONE); SearchResponse matchNoneResponse = client().prepareSearch(INDEX).setQuery(matchNoneQuery).get(); assertHitCount(matchNoneResponse, 0L); - MatchPhraseQueryBuilder matchAllQuery = baseQuery.zeroTermsQuery(ZeroTermsQuery.ALL); + MatchPhraseQueryBuilder matchAllQuery = baseQuery.zeroTermsQuery(ZeroTermsQueryOption.ALL); SearchResponse matchAllResponse = client().prepareSearch(INDEX).setQuery(matchAllQuery).get(); assertHitCount(matchAllResponse, 2L); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index ec4ebf3a8d019..49a70d94aae33 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -30,6 +30,7 @@ import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.CombinedFieldsQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; @@ -69,6 +70,7 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; import static org.elasticsearch.index.query.QueryBuilders.boostingQuery; +import static org.elasticsearch.index.query.QueryBuilders.combinedFieldsQuery; import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery; import static org.elasticsearch.index.query.QueryBuilders.existsQuery; import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery; @@ -2113,6 +2115,44 @@ public void testMultiMatchQueryHighlight() throws IOException { } } + public void testCombinedFieldsQueryHighlight() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties") + .startObject("field1") + .field("type", "text") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .endObject() + .startObject("field2") + .field("type", "text") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .endObject() + .endObject() + .endObject().endObject(); + assertAcked(prepareCreate("test").setMapping(mapping)); + ensureGreen(); + + client().prepareIndex("test") + .setSource("field1", "The quick brown fox jumps over", "field2", "The quick brown fox jumps over") + .get(); + refresh(); + + for (String highlighterType : ALL_TYPES) { + CombinedFieldsQueryBuilder multiMatchQueryBuilder = combinedFieldsQuery("the quick brown fox", "field1", "field2"); + SearchSourceBuilder source = searchSource() + .query(multiMatchQueryBuilder) + .highlighter(highlight() + .highlighterType(highlighterType) + .field(new Field("field1").requireFieldMatch(true).preTags("").postTags(""))); + + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHitCount(searchResponse, 1L); + assertHighlight(searchResponse, 0, "field1", 0, + equalTo("The quick brown fox jumps over")); + } + } + public void testPostingsHighlighterOrderByScore() throws Exception { assertAcked(prepareCreate("test").setMapping(type1PostingsffsetsMapping())); ensureGreen(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java index 08703c8e41042..6216cf25f5543 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -22,8 +22,8 @@ import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.bootstrap.JavaVersion; import org.elasticsearch.common.document.DocumentField; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.unit.Fuzziness; @@ -43,8 +43,8 @@ import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; +import org.elasticsearch.index.query.ZeroTermsQueryOption; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; -import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.TermsLookup; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -669,18 +669,18 @@ public void testMatchQueryZeroTermsQuery() { refresh(); BoolQueryBuilder boolQuery = boolQuery() - .must(matchQuery("field1", "a").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NONE)) - .must(matchQuery("field1", "value1").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NONE)); + .must(matchQuery("field1", "a").zeroTermsQuery(ZeroTermsQueryOption.NONE)) + .must(matchQuery("field1", "value1").zeroTermsQuery(ZeroTermsQueryOption.NONE)); SearchResponse searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 0L); boolQuery = boolQuery() - .must(matchQuery("field1", "a").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)) - .must(matchQuery("field1", "value1").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)); + .must(matchQuery("field1", "a").zeroTermsQuery(ZeroTermsQueryOption.ALL)) + .must(matchQuery("field1", "value1").zeroTermsQuery(ZeroTermsQueryOption.ALL)); searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 1L); - boolQuery = boolQuery().must(matchQuery("field1", "a").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)); + boolQuery = boolQuery().must(matchQuery("field1", "a").zeroTermsQuery(ZeroTermsQueryOption.ALL)); searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 2L); } @@ -694,19 +694,19 @@ public void testMultiMatchQueryZeroTermsQuery() { BoolQueryBuilder boolQuery = boolQuery() - .must(multiMatchQuery("a", "field1", "field2").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NONE)) + .must(multiMatchQuery("a", "field1", "field2").zeroTermsQuery(ZeroTermsQueryOption.NONE)) // Fields are ORed together - .must(multiMatchQuery("value1", "field1", "field2").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NONE)); + .must(multiMatchQuery("value1", "field1", "field2").zeroTermsQuery(ZeroTermsQueryOption.NONE)); SearchResponse searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 0L); boolQuery = boolQuery() - .must(multiMatchQuery("a", "field1", "field2").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)) - .must(multiMatchQuery("value4", "field1", "field2").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)); + .must(multiMatchQuery("a", "field1", "field2").zeroTermsQuery(ZeroTermsQueryOption.ALL)) + .must(multiMatchQuery("value4", "field1", "field2").zeroTermsQuery(ZeroTermsQueryOption.ALL)); searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 1L); - boolQuery = boolQuery().must(multiMatchQuery("a", "field1").zeroTermsQuery(MatchQueryParser.ZeroTermsQuery.ALL)); + boolQuery = boolQuery().must(multiMatchQuery("a", "field1").zeroTermsQuery(ZeroTermsQueryOption.ALL)); searchResponse = client().prepareSearch().setQuery(boolQuery).get(); assertHitCount(searchResponse, 2L); } diff --git a/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java b/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java new file mode 100644 index 0000000000000..03daa65223950 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java @@ -0,0 +1,466 @@ +/* @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.TermStates; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.DFRSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.SimilarityBase; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.SmallFloat; + +/** + * A {@link Query} that treats multiple fields as a single stream and scores terms as if you had + * indexed them as a single term in a single field. + * + *

The query works as follows: + * + *

    + *
  1. Given a list of fields and weights, it pretends there is a synthetic combined field where + * all terms have been indexed. It computes new term and collection statistics for this + * combined field. + *
  2. It uses a disjunction iterator and {@link IndexSearcher#getSimilarity} to score documents. + *
+ * + *

In order for a similarity to be compatible, {@link Similarity#computeNorm} must be additive: + * the norm of the combined field is the sum of norms for each individual field. The norms must also + * be encoded using {@link SmallFloat#intToByte4}. These requirements hold for all similarities that + * compute norms the same way as {@link SimilarityBase#computeNorm}, which includes {@link + * BM25Similarity} and {@link DFRSimilarity}. Per-field similarities are not supported. + * + *

The scoring is based on BM25F's simple formula described in: + * http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf. This query implements the + * same approach but allows other similarities besides {@link + * org.apache.lucene.search.similarities.BM25Similarity}. + * + * TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9. + */ +public final class XCombinedFieldQuery extends Query implements Accountable { + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(XCombinedFieldQuery.class); + + /** A builder for {@link XCombinedFieldQuery}. */ + public static class Builder { + private final Map fieldAndWeights = new HashMap<>(); + private final Set termsSet = new HashSet<>(); + + /** + * Adds a field to this builder. + * + * @param field The field name. + */ + public Builder addField(String field) { + return addField(field, 1f); + } + + /** + * Adds a field to this builder. + * + * @param field The field name. + * @param weight The weight associated to this field. + */ + public Builder addField(String field, float weight) { + if (weight < 1) { + throw new IllegalArgumentException("weight must be greater or equal to 1"); + } + fieldAndWeights.put(field, new FieldAndWeight(field, weight)); + return this; + } + + /** Adds a term to this builder. */ + public Builder addTerm(BytesRef term) { + if (termsSet.size() > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + termsSet.add(term); + return this; + } + + /** Builds the {@link XCombinedFieldQuery}. */ + public XCombinedFieldQuery build() { + int size = fieldAndWeights.size() * termsSet.size(); + if (size > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + BytesRef[] terms = termsSet.toArray(new BytesRef[0]); + return new XCombinedFieldQuery(new TreeMap<>(fieldAndWeights), terms); + } + } + + static class FieldAndWeight { + final String field; + final float weight; + + FieldAndWeight(String field, float weight) { + this.field = field; + this.weight = weight; + } + } + + // sorted map for fields. + private final TreeMap fieldAndWeights; + // array of terms, sorted. + private final BytesRef terms[]; + // array of terms per field, sorted + private final Term fieldTerms[]; + + private final long ramBytesUsed; + + private XCombinedFieldQuery(TreeMap fieldAndWeights, BytesRef[] terms) { + this.fieldAndWeights = fieldAndWeights; + this.terms = terms; + int numFieldTerms = fieldAndWeights.size() * terms.length; + if (numFieldTerms > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + this.fieldTerms = new Term[numFieldTerms]; + Arrays.sort(terms); + int pos = 0; + for (String field : fieldAndWeights.keySet()) { + for (BytesRef term : terms) { + fieldTerms[pos++] = new Term(field, term); + } + } + + this.ramBytesUsed = + BASE_RAM_BYTES + + RamUsageEstimator.sizeOfObject(fieldAndWeights) + + RamUsageEstimator.sizeOfObject(fieldTerms) + + RamUsageEstimator.sizeOfObject(terms); + } + + public List getTerms() { + return Collections.unmodifiableList(Arrays.asList(fieldTerms)); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder("CombinedFieldQuery(("); + int pos = 0; + for (FieldAndWeight fieldWeight : fieldAndWeights.values()) { + if (pos++ != 0) { + builder.append(" "); + } + builder.append(fieldWeight.field); + if (fieldWeight.weight != 1f) { + builder.append("^"); + builder.append(fieldWeight.weight); + } + } + builder.append(")("); + pos = 0; + for (BytesRef term : terms) { + if (pos++ != 0) { + builder.append(" "); + } + builder.append(term.utf8ToString()); + } + builder.append("))"); + return builder.toString(); + } + + @Override + public int hashCode() { + return 31 * classHash() + Arrays.hashCode(terms); + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && Arrays.equals(terms, ((XCombinedFieldQuery) other).terms); + } + + @Override + public long ramBytesUsed() { + return ramBytesUsed; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + // optimize zero and single field cases + if (terms.length == 0) { + return new BooleanQuery.Builder().build(); + } + // single field and one term + if (fieldTerms.length == 1) { + return new TermQuery(fieldTerms[0]); + } + // single field and multiple terms + if (fieldAndWeights.size() == 1) { + SynonymQuery.Builder builder = new SynonymQuery.Builder(fieldTerms[0].field()); + for (Term term : fieldTerms) { + builder.addTerm(term); + } + return builder.build(); + } + return this; + } + + @Override + public void visit(QueryVisitor visitor) { + Term[] selectedTerms = + Arrays.stream(fieldTerms).filter(t -> visitor.acceptField(t.field())).toArray(Term[]::new); + if (selectedTerms.length > 0) { + QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this); + v.consumeTerms(this, selectedTerms); + } + } + + private BooleanQuery rewriteToBoolean() { + // rewrite to a simple disjunction if the score is not needed. + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + for (Term term : fieldTerms) { + bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + return bq.build(); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + throws IOException { + if (scoreMode.needsScores()) { + return new CombinedFieldWeight(this, searcher, scoreMode, boost); + } else { + // rewrite to a simple disjunction if the score is not needed. + Query bq = rewriteToBoolean(); + return searcher.rewrite(bq).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); + } + } + + class CombinedFieldWeight extends Weight { + private final IndexSearcher searcher; + private final TermStates termStates[]; + private final Similarity.SimScorer simWeight; + + CombinedFieldWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) + throws IOException { + super(query); + assert scoreMode.needsScores(); + this.searcher = searcher; + long docFreq = 0; + long totalTermFreq = 0; + termStates = new TermStates[fieldTerms.length]; + for (int i = 0; i < termStates.length; i++) { + FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field()); + TermStates ts = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true); + termStates[i] = ts; + if (ts.docFreq() > 0) { + TermStatistics termStats = + searcher.termStatistics(fieldTerms[i], ts.docFreq(), ts.totalTermFreq()); + docFreq = Math.max(termStats.docFreq(), docFreq); + totalTermFreq += (double) field.weight * termStats.totalTermFreq(); + } + } + if (docFreq > 0) { + CollectionStatistics pseudoCollectionStats = mergeCollectionStatistics(searcher); + TermStatistics pseudoTermStatistics = + new TermStatistics(new BytesRef("pseudo_term"), docFreq, Math.max(1, totalTermFreq)); + this.simWeight = + searcher.getSimilarity().scorer(boost, pseudoCollectionStats, pseudoTermStatistics); + } else { + this.simWeight = null; + } + } + + private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher) + throws IOException { + long maxDoc = searcher.getIndexReader().maxDoc(); + long docCount = 0; + long sumTotalTermFreq = 0; + long sumDocFreq = 0; + for (FieldAndWeight fieldWeight : fieldAndWeights.values()) { + CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field); + if (collectionStats != null) { + docCount = Math.max(collectionStats.docCount(), docCount); + sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq); + sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq(); + } + } + + return new CollectionStatistics( + "pseudo_field", maxDoc, docCount, sumTotalTermFreq, sumDocFreq); + } + + @Override + public void extractTerms(Set termSet) { + termSet.addAll(Arrays.asList(fieldTerms)); + } + + @Override + public Matches matches(LeafReaderContext context, int doc) throws IOException { + Weight weight = + searcher.rewrite(rewriteToBoolean()).createWeight(searcher, ScoreMode.COMPLETE, 1f); + return weight.matches(context, doc); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context); + if (scorer != null) { + int newDoc = scorer.iterator().advance(doc); + if (newDoc == doc) { + final float freq; + if (scorer instanceof CombinedFieldScorer) { + freq = ((CombinedFieldScorer) scorer).freq(); + } else { + assert scorer instanceof TermScorer; + freq = ((TermScorer) scorer).freq(); + } + final XMultiNormsLeafSimScorer docScorer = + new XMultiNormsLeafSimScorer( + simWeight, context.reader(), fieldAndWeights.values(), true); + Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); + Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); + return Explanation.match( + scoreExplanation.getValue(), + "weight(" + getQuery() + " in " + doc + "), result of:", + scoreExplanation); + } + } + return Explanation.noMatch("no matching term"); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + List iterators = new ArrayList<>(); + List fields = new ArrayList<>(); + for (int i = 0; i < fieldTerms.length; i++) { + TermState state = termStates[i].get(context); + if (state != null) { + TermsEnum termsEnum = context.reader().terms(fieldTerms[i].field()).iterator(); + termsEnum.seekExact(fieldTerms[i].bytes(), state); + PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS); + iterators.add(postingsEnum); + fields.add(fieldAndWeights.get(fieldTerms[i].field())); + } + } + + if (iterators.isEmpty()) { + return null; + } + + // we must optimize this case (term not in segment), disjunctions require >= 2 subs + if (iterators.size() == 1) { + final LeafSimScorer scoringSimScorer = + new LeafSimScorer(simWeight, context.reader(), fields.get(0).field, true); + return new TermScorer(this, iterators.get(0), scoringSimScorer); + } + final XMultiNormsLeafSimScorer scoringSimScorer = + new XMultiNormsLeafSimScorer(simWeight, context.reader(), fields, true); + LeafSimScorer nonScoringSimScorer = + new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false); + // we use termscorers + disjunction as an impl detail + DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size()); + for (int i = 0; i < iterators.size(); i++) { + float weight = fields.get(i).weight; + queue.add( + new WeightedDisiWrapper( + new TermScorer(this, iterators.get(i), nonScoringSimScorer), weight)); + } + // Even though it is called approximation, it is accurate since none of + // the sub iterators are two-phase iterators. + DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue); + return new CombinedFieldScorer(this, queue, iterator, scoringSimScorer); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + } + + private static class WeightedDisiWrapper extends DisiWrapper { + final float weight; + + WeightedDisiWrapper(Scorer scorer, float weight) { + super(scorer); + this.weight = weight; + } + + float freq() throws IOException { + return weight * ((PostingsEnum) iterator).freq(); + } + } + + private static class CombinedFieldScorer extends Scorer { + private final DisiPriorityQueue queue; + private final DocIdSetIterator iterator; + private final XMultiNormsLeafSimScorer simScorer; + + CombinedFieldScorer( + Weight weight, + DisiPriorityQueue queue, + DocIdSetIterator iterator, + XMultiNormsLeafSimScorer simScorer) { + super(weight); + this.queue = queue; + this.iterator = iterator; + this.simScorer = simScorer; + } + + @Override + public int docID() { + return iterator.docID(); + } + + float freq() throws IOException { + DisiWrapper w = queue.topList(); + float freq = ((WeightedDisiWrapper) w).freq(); + for (w = w.next; w != null; w = w.next) { + freq += ((WeightedDisiWrapper) w).freq(); + if (freq < 0) { // overflow + return Integer.MAX_VALUE; + } + } + return freq; + } + + @Override + public float score() throws IOException { + return simScorer.score(iterator.docID(), freq()); + } + + @Override + public DocIdSetIterator iterator() { + return iterator; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } +} diff --git a/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java b/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java new file mode 100644 index 0000000000000..bb3e34b4e410b --- /dev/null +++ b/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java @@ -0,0 +1,161 @@ +/* @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.util.SmallFloat; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; + +import static org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight; + +/** + * Copy of {@link LeafSimScorer} that sums document's norms from multiple fields. + * + * TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9. + */ +final class XMultiNormsLeafSimScorer { + /** + * Cache of decoded norms. + */ + private static final float[] LENGTH_TABLE = new float[256]; + + static { + for (int i = 0; i < 256; i++) { + LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i); + } + } + + private final SimScorer scorer; + private final NumericDocValues norms; + + /** + * Sole constructor: Score documents of {@code reader} with {@code scorer}. + * + */ + XMultiNormsLeafSimScorer(SimScorer scorer, + LeafReader reader, + Collection normFields, + boolean needsScores) throws IOException { + this.scorer = Objects.requireNonNull(scorer); + if (needsScores) { + final List normsList = new ArrayList<>(); + final List weightList = new ArrayList<>(); + for (FieldAndWeight field : normFields) { + NumericDocValues norms = reader.getNormValues(field.field); + if (norms != null) { + normsList.add(norms); + weightList.add(field.weight); + } + } + if (normsList.isEmpty()) { + norms = null; + } else if (normsList.size() == 1) { + norms = normsList.get(0); + } else { + final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]); + final float[] weightArr = new float[normsList.size()]; + for (int i = 0; i < weightList.size(); i++) { + weightArr[i] = weightList.get(i); + } + norms = new XMultiNormsLeafSimScorer.MultiFieldNormValues(normsArr, weightArr); + } + } else { + norms = null; + } + } + + private long getNormValue(int doc) throws IOException { + if (norms != null) { + boolean found = norms.advanceExact(doc); + assert found; + return norms.longValue(); + } else { + return 1L; // default norm + } + } + + /** Score the provided document assuming the given term document frequency. + * This method must be called on non-decreasing sequences of doc ids. + * @see SimScorer#score(float, long) */ + public float score(int doc, float freq) throws IOException { + return scorer.score(freq, getNormValue(doc)); + } + + /** Explain the score for the provided document assuming the given term document frequency. + * This method must be called on non-decreasing sequences of doc ids. + * @see SimScorer#explain(Explanation, long) */ + public Explanation explain(int doc, Explanation freqExpl) throws IOException { + return scorer.explain(freqExpl, getNormValue(doc)); + } + + private static class MultiFieldNormValues extends NumericDocValues { + private final NumericDocValues[] normsArr; + private final float[] weightArr; + private long current; + private int docID = -1; + + MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) { + this.normsArr = normsArr; + this.weightArr = weightArr; + } + + @Override + public long longValue() { + return current; + } + + @Override + public boolean advanceExact(int target) throws IOException { + float normValue = 0; + for (int i = 0; i < normsArr.length; i++) { + boolean found = normsArr[i].advanceExact(target); + assert found; + normValue += weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())]; + } + current = SmallFloat.intToByte4(Math.round(normValue)); + return true; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int nextDoc() { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + throw new UnsupportedOperationException(); + } + } +} diff --git a/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index 3833eec8a93ff..dd12837fce8c0 100644 --- a/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/server/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -18,6 +18,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.XCombinedFieldQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; @@ -74,6 +75,11 @@ protected void flatten(Query sourceQuery, IndexReader reader, Collection for (Term term : synQuery.getTerms()) { flatten(new TermQuery(term), reader, flatQueries, boost); } + } else if (sourceQuery instanceof XCombinedFieldQuery) { + XCombinedFieldQuery combinedFieldQuery = (XCombinedFieldQuery) sourceQuery; + for (Term term : combinedFieldQuery.getTerms()) { + flatten(new TermQuery(term), reader, flatQueries, boost); + } } else if (sourceQuery instanceof ESToParentBlockJoinQuery) { Query childQuery = ((ESToParentBlockJoinQuery) sourceQuery).getChildQuery(); if (childQuery != null) { diff --git a/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java new file mode 100644 index 0000000000000..5bfd24761a595 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilder.java @@ -0,0 +1,436 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostAttribute; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.XCombinedFieldQuery; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.QueryBuilder; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.search.QueryParserHelper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; + +/** + * A query that matches on multiple text fields, as if the field contents had been indexed + * into a single combined field. + */ +public class CombinedFieldsQueryBuilder extends AbstractQueryBuilder { + public static final String NAME = "combined_fields"; + + private static final ParseField QUERY_FIELD = new ParseField("query"); + private static final ParseField FIELDS_FIELD = new ParseField("fields"); + private static final ParseField OPERATOR_FIELD = new ParseField("operator"); + private static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match"); + private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query"); + private static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); + + private final Object value; + private final Map fieldsAndBoosts; + private Operator operator = Operator.OR; + private String minimumShouldMatch; + private ZeroTermsQueryOption zeroTermsQuery = ZeroTermsQueryOption.NONE; + private boolean autoGenerateSynonymsPhraseQuery = true; + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + a -> new CombinedFieldsQueryBuilder(a[0])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), QUERY_FIELD); + PARSER.declareStringArray((builder, values) -> { + Map fieldsAndBoosts = QueryParserHelper.parseFieldsAndWeights(values); + builder.fields(fieldsAndBoosts); + }, FIELDS_FIELD); + + PARSER.declareString(CombinedFieldsQueryBuilder::operator, Operator::fromString, OPERATOR_FIELD); + PARSER.declareString(CombinedFieldsQueryBuilder::minimumShouldMatch, MINIMUM_SHOULD_MATCH_FIELD); + PARSER.declareBoolean(CombinedFieldsQueryBuilder::autoGenerateSynonymsPhraseQuery, GENERATE_SYNONYMS_PHRASE_QUERY); + PARSER.declareString(CombinedFieldsQueryBuilder::zeroTermsQuery, value -> { + if ("none".equalsIgnoreCase(value)) { + return ZeroTermsQueryOption.NONE; + } else if ("all".equalsIgnoreCase(value)) { + return ZeroTermsQueryOption.ALL; + } else { + throw new IllegalArgumentException("Unsupported [" + ZERO_TERMS_QUERY_FIELD.getPreferredName() + "] value [" + value + "]"); + } + }, ZERO_TERMS_QUERY_FIELD); + + PARSER.declareFloat(CombinedFieldsQueryBuilder::boost, BOOST_FIELD); + PARSER.declareString(CombinedFieldsQueryBuilder::queryName, NAME_FIELD); + } + + /** + * Constructs a new text query. + */ + public CombinedFieldsQueryBuilder(Object value, String... fields) { + if (value == null) { + throw new IllegalArgumentException("[" + NAME + "] requires query value"); + } + if (fields == null) { + throw new IllegalArgumentException("[" + NAME + "] requires field list"); + } + this.value = value; + this.fieldsAndBoosts = new TreeMap<>(); + for (String field : fields) { + field(field); + } + } + + /** + * Read from a stream. + */ + public CombinedFieldsQueryBuilder(StreamInput in) throws IOException { + super(in); + value = in.readGenericValue(); + int size = in.readVInt(); + fieldsAndBoosts = new TreeMap<>(); + for (int i = 0; i < size; i++) { + String field = in.readString(); + float boost = in.readFloat(); + fieldsAndBoosts.put(field, boost); + } + operator = Operator.readFromStream(in); + minimumShouldMatch = in.readOptionalString(); + zeroTermsQuery = ZeroTermsQueryOption.readFromStream(in); + autoGenerateSynonymsPhraseQuery = in.readBoolean(); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeGenericValue(value); + out.writeVInt(fieldsAndBoosts.size()); + for (Map.Entry fieldsEntry : fieldsAndBoosts.entrySet()) { + out.writeString(fieldsEntry.getKey()); + out.writeFloat(fieldsEntry.getValue()); + } + operator.writeTo(out); + out.writeOptionalString(minimumShouldMatch); + zeroTermsQuery.writeTo(out); + out.writeBoolean(autoGenerateSynonymsPhraseQuery); + } + + public Object value() { + return value; + } + + /** + * Adds a field to run the query against. + */ + public CombinedFieldsQueryBuilder field(String field) { + if (Strings.isEmpty(field)) { + throw new IllegalArgumentException("supplied field is null or empty."); + } + this.fieldsAndBoosts.put(field, AbstractQueryBuilder.DEFAULT_BOOST); + return this; + } + + /** + * Adds a field to run the query against with a specific boost. + */ + public CombinedFieldsQueryBuilder field(String field, float boost) { + if (Strings.isEmpty(field)) { + throw new IllegalArgumentException("supplied field is null or empty."); + } + validateFieldBoost(boost); + this.fieldsAndBoosts.put(field, boost); + return this; + } + + /** + * Add several fields to run the query against with a specific boost. + */ + public CombinedFieldsQueryBuilder fields(Map fields) { + for (float fieldBoost : fields.values()) { + validateFieldBoost(fieldBoost); + } + this.fieldsAndBoosts.putAll(fields); + return this; + } + + public Map fields() { + return fieldsAndBoosts; + } + + /** + * Sets the operator to use for the top-level boolean query. Defaults to {@code OR}. + */ + public CombinedFieldsQueryBuilder operator(Operator operator) { + if (operator == null) { + throw new IllegalArgumentException("[" + NAME + "] requires operator to be non-null"); + } + this.operator = operator; + return this; + } + + public Operator operator() { + return operator; + } + + public CombinedFieldsQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + public String minimumShouldMatch() { + return minimumShouldMatch; + } + + public CombinedFieldsQueryBuilder zeroTermsQuery(ZeroTermsQueryOption zeroTermsQuery) { + if (zeroTermsQuery == null) { + throw new IllegalArgumentException("[" + NAME + "] requires zero terms query to be non-null"); + } + this.zeroTermsQuery = zeroTermsQuery; + return this; + } + + public ZeroTermsQueryOption zeroTermsQuery() { + return zeroTermsQuery; + } + + public CombinedFieldsQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) { + this.autoGenerateSynonymsPhraseQuery = enable; + return this; + } + + /** + * Whether phrase queries should be automatically generated for multi terms synonyms. + * Defaults to {@code true}. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return autoGenerateSynonymsPhraseQuery; + } + + private void validateFieldBoost(float boost) { + if (boost < 1.0f) { + throw new IllegalArgumentException("[" + NAME + "] requires field boosts to be >= 1.0"); + } + } + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field(QUERY_FIELD.getPreferredName(), value); + builder.startArray(FIELDS_FIELD.getPreferredName()); + for (Map.Entry fieldEntry : this.fieldsAndBoosts.entrySet()) { + builder.value(fieldEntry.getKey() + "^" + fieldEntry.getValue()); + } + builder.endArray(); + builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); + if (minimumShouldMatch != null) { + builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatch); + } + builder.field(ZERO_TERMS_QUERY_FIELD.getPreferredName(), zeroTermsQuery.toString()); + builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery); + printBoostAndQueryName(builder); + builder.endObject(); + } + + public static CombinedFieldsQueryBuilder fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + protected Query doToQuery(SearchExecutionContext context) throws IOException { + if (fieldsAndBoosts.isEmpty()) { + throw new IllegalArgumentException("In [" + NAME + "] query, at least one field must be provided"); + } + + Map fields = QueryParserHelper.resolveMappingFields(context, fieldsAndBoosts); + // If all fields are unmapped, then return an 'unmapped field query'. + boolean hasMappedField = fields.keySet().stream() + .anyMatch(k -> context.getFieldType(k) != null); + if (hasMappedField == false) { + return Queries.newUnmappedFieldsQuery(fields.keySet()); + } + + validateSimilarity(context, fields); + + Analyzer sharedAnalyzer = null; + List fieldsAndBoosts = new ArrayList<>(); + for (Map.Entry entry : fields.entrySet()) { + String name = entry.getKey(); + MappedFieldType fieldType = context.getFieldType(name); + if (fieldType == null) { + continue; + } + + if (fieldType.familyTypeName().equals(TextFieldMapper.CONTENT_TYPE) == false) { + throw new IllegalArgumentException("Field [" + fieldType.name() + "] of type [" + + fieldType.typeName() + "] does not support [" + NAME + "] queries"); + } + + float boost = entry.getValue() == null ? 1.0f : entry.getValue(); + fieldsAndBoosts.add(new FieldAndBoost(fieldType, boost)); + + Analyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + if (sharedAnalyzer != null && analyzer.equals(sharedAnalyzer) == false) { + throw new IllegalArgumentException("All fields in [" + NAME + "] query must have the same search analyzer"); + } + sharedAnalyzer = analyzer; + } + + assert fieldsAndBoosts.isEmpty() == false; + String placeholderFieldName = fieldsAndBoosts.get(0).fieldType.name(); + boolean canGenerateSynonymsPhraseQuery = autoGenerateSynonymsPhraseQuery; + for (FieldAndBoost fieldAndBoost : fieldsAndBoosts) { + TextSearchInfo textSearchInfo = fieldAndBoost.fieldType.getTextSearchInfo(); + canGenerateSynonymsPhraseQuery &= textSearchInfo.hasPositions(); + } + + CombinedFieldsBuilder builder = new CombinedFieldsBuilder(fieldsAndBoosts, + sharedAnalyzer, canGenerateSynonymsPhraseQuery); + Query query = builder.createBooleanQuery(placeholderFieldName, value.toString(), operator.toBooleanClauseOccur()); + + query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + if (query == null) { + query = zeroTermsQuery.asQuery(); + } + return query; + } + + private void validateSimilarity(SearchExecutionContext context, Map fields) { + for (Map.Entry entry : fields.entrySet()) { + String name = entry.getKey(); + MappedFieldType fieldType = context.getFieldType(name); + if (fieldType != null && fieldType.getTextSearchInfo().getSimilarity() != null) { + throw new IllegalArgumentException("["+ NAME + "] queries cannot be used with per-field similarities"); + } + } + + Similarity defaultSimilarity = context.getDefaultSimilarity(); + if ((defaultSimilarity instanceof LegacyBM25Similarity + || defaultSimilarity instanceof BM25Similarity) == false) { + throw new IllegalArgumentException("["+ NAME + "] queries can only be used with the [BM25] similarity"); + } + } + + private static final class FieldAndBoost { + final MappedFieldType fieldType; + final float boost; + + FieldAndBoost(MappedFieldType fieldType, float boost) { + this.fieldType = Objects.requireNonNull(fieldType); + this.boost = boost; + } + } + + private static class CombinedFieldsBuilder extends QueryBuilder { + private final List fields; + + CombinedFieldsBuilder(List fields, + Analyzer analyzer, + boolean autoGenerateSynonymsPhraseQuery) { + super(analyzer); + this.fields = fields; + setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); + } + + @Override + protected Query createFieldQuery(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) { + if (source.hasAttribute(DisableGraphAttribute.class)) { + /* + * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid + * paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. + */ + setEnableGraphQueries(false); + } + try { + return super.createFieldQuery(source, operator, field, quoted, phraseSlop); + } finally { + setEnableGraphQueries(true); + } + } + + @Override + public Query createPhraseQuery(String field, String queryText, int phraseSlop) { + throw new IllegalArgumentException("[combined_fields] queries don't support phrases"); + } + + @Override + protected Query newSynonymQuery(TermAndBoost[] terms) { + XCombinedFieldQuery.Builder query = new XCombinedFieldQuery.Builder(); + for (TermAndBoost termAndBoost : terms) { + assert termAndBoost.boost == BoostAttribute.DEFAULT_BOOST; + BytesRef bytes = termAndBoost.term.bytes(); + query.addTerm(bytes); + } + for (FieldAndBoost fieldAndBoost : fields) { + MappedFieldType fieldType = fieldAndBoost.fieldType; + float fieldBoost = fieldAndBoost.boost; + query.addField(fieldType.name(), fieldBoost); + } + return query.build(); + } + + @Override + protected Query newTermQuery(Term term, float boost) { + TermAndBoost termAndBoost = new TermAndBoost(term, boost); + return newSynonymQuery(new TermAndBoost[]{termAndBoost}); + } + + @Override + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (FieldAndBoost fieldAndBoost : fields) { + Query query = fieldAndBoost.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + if (fieldAndBoost.boost != 1f) { + query = new BoostQuery(query, fieldAndBoost.boost); + } + builder.add(query, BooleanClause.Occur.SHOULD); + } + return builder.build(); + } + } + + @Override + protected int doHashCode() { + return Objects.hash(value, fieldsAndBoosts, operator, minimumShouldMatch, zeroTermsQuery, autoGenerateSynonymsPhraseQuery); + } + + @Override + protected boolean doEquals(CombinedFieldsQueryBuilder other) { + return Objects.equals(value, other.value) && + Objects.equals(fieldsAndBoosts, other.fieldsAndBoosts) && + Objects.equals(operator, other.operator) && + Objects.equals(minimumShouldMatch, other.minimumShouldMatch) && + Objects.equals(zeroTermsQuery, other.zeroTermsQuery) && + Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilder.java index 13c7896bd730e..1ae7b5450484e 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilder.java @@ -18,7 +18,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.search.MatchQueryParser; -import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery; import java.io.IOException; import java.util.Objects; @@ -42,7 +41,7 @@ public class MatchPhrasePrefixQueryBuilder extends AbstractQueryBuilder { private boolean lenient = MatchQueryParser.DEFAULT_LENIENCY; - private MatchQueryParser.ZeroTermsQuery zeroTermsQuery = MatchQueryParser.DEFAULT_ZERO_TERMS_QUERY; + private ZeroTermsQueryOption zeroTermsQuery = MatchQueryParser.DEFAULT_ZERO_TERMS_QUERY; private boolean autoGenerateSynonymsPhraseQuery = true; @@ -103,7 +102,7 @@ public MatchQueryBuilder(StreamInput in) throws IOException { maxExpansions = in.readVInt(); fuzzyTranspositions = in.readBoolean(); lenient = in.readBoolean(); - zeroTermsQuery = MatchQueryParser.ZeroTermsQuery.readFromStream(in); + zeroTermsQuery = ZeroTermsQueryOption.readFromStream(in); // optional fields analyzer = in.readOptionalString(); minimumShouldMatch = in.readOptionalString(); @@ -284,10 +283,10 @@ public boolean lenient() { /** * Sets query to use in case no query terms are available, e.g. after analysis removed them. - * Defaults to {@link MatchQueryParser.ZeroTermsQuery#NONE}, but can be set to - * {@link MatchQueryParser.ZeroTermsQuery#ALL} instead. + * Defaults to {@link ZeroTermsQueryOption#NONE}, but can be set to + * {@link ZeroTermsQueryOption#ALL} instead. */ - public MatchQueryBuilder zeroTermsQuery(MatchQueryParser.ZeroTermsQuery zeroTermsQuery) { + public MatchQueryBuilder zeroTermsQuery(ZeroTermsQueryOption zeroTermsQuery) { if (zeroTermsQuery == null) { throw new IllegalArgumentException("[" + NAME + "] requires zeroTermsQuery to be non-null"); } @@ -298,7 +297,7 @@ public MatchQueryBuilder zeroTermsQuery(MatchQueryParser.ZeroTermsQuery zeroTerm /** * Returns the setting for handling zero terms queries. */ - public MatchQueryParser.ZeroTermsQuery zeroTermsQuery() { + public ZeroTermsQueryOption zeroTermsQuery() { return this.zeroTermsQuery; } @@ -414,7 +413,7 @@ public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOExc boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; String fuzzyRewrite = null; boolean lenient = MatchQueryParser.DEFAULT_LENIENCY; - ZeroTermsQuery zeroTermsQuery = MatchQueryParser.DEFAULT_ZERO_TERMS_QUERY; + ZeroTermsQueryOption zeroTermsQuery = MatchQueryParser.DEFAULT_ZERO_TERMS_QUERY; boolean autoGenerateSynonymsPhraseQuery = true; String queryName = null; String currentFieldName = null; @@ -454,9 +453,9 @@ public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOExc } else if (ZERO_TERMS_QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { String zeroTermsValue = parser.text(); if ("none".equalsIgnoreCase(zeroTermsValue)) { - zeroTermsQuery = MatchQueryParser.ZeroTermsQuery.NONE; + zeroTermsQuery = ZeroTermsQueryOption.NONE; } else if ("all".equalsIgnoreCase(zeroTermsValue)) { - zeroTermsQuery = MatchQueryParser.ZeroTermsQuery.ALL; + zeroTermsQuery = ZeroTermsQueryOption.ALL; } else { throw new ParsingException(parser.getTokenLocation(), "Unsupported zero_terms_query value [" + zeroTermsValue + "]"); diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 146d383195a41..45595ad7d5cb5 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -48,7 +48,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder defaultFields() { return indexSettings.getDefaultFields(); } diff --git a/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java b/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java new file mode 100644 index 0000000000000..4cc6c31f49a94 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.Query; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.lucene.search.Queries; + +import java.io.IOException; + +public enum ZeroTermsQueryOption implements Writeable { + NONE(0) { + public Query asQuery() { + return Queries.newMatchNoDocsQuery("Matching no documents because no terms present"); + } + }, + ALL(1) { + public Query asQuery() { + return Queries.newMatchAllQuery(); + } + }, + // this is used internally to make sure that query_string and simple_query_string + // ignores query part that removes all tokens. + NULL(2) { + public Query asQuery() { + return null; + } + }; + + private final int ordinal; + + ZeroTermsQueryOption(int ordinal) { + this.ordinal = ordinal; + } + + public abstract Query asQuery(); + + public static ZeroTermsQueryOption readFromStream(StreamInput in) throws IOException { + int ord = in.readVInt(); + for (ZeroTermsQueryOption zeroTermsQuery : ZeroTermsQueryOption.values()) { + if (zeroTermsQuery.ordinal == ord) { + return zeroTermsQuery; + } + } + throw new ElasticsearchException("unknown serialized type [" + ord + "]"); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(this.ordinal); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java index 7a3b060f6485c..8a5d676c01aae 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQueryParser.java @@ -35,7 +35,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -43,6 +42,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.query.ZeroTermsQueryOption; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; @@ -97,40 +97,11 @@ public void writeTo(StreamOutput out) throws IOException { } } - public enum ZeroTermsQuery implements Writeable { - NONE(0), - ALL(1), - // this is used internally to make sure that query_string and simple_query_string - // ignores query part that removes all tokens. - NULL(2); - - private final int ordinal; - - ZeroTermsQuery(int ordinal) { - this.ordinal = ordinal; - } - - public static ZeroTermsQuery readFromStream(StreamInput in) throws IOException { - int ord = in.readVInt(); - for (ZeroTermsQuery zeroTermsQuery : ZeroTermsQuery.values()) { - if (zeroTermsQuery.ordinal == ord) { - return zeroTermsQuery; - } - } - throw new ElasticsearchException("unknown serialized type [" + ord + "]"); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVInt(this.ordinal); - } - } - public static final int DEFAULT_PHRASE_SLOP = 0; public static final boolean DEFAULT_LENIENCY = false; - public static final ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQuery.NONE; + public static final ZeroTermsQueryOption DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQueryOption.NONE; protected final SearchExecutionContext context; @@ -157,7 +128,7 @@ public void writeTo(StreamOutput out) throws IOException { protected boolean lenient = DEFAULT_LENIENCY; - protected ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY; + protected ZeroTermsQueryOption zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY; protected boolean autoGenerateSynonymsPhraseQuery = true; @@ -213,7 +184,7 @@ public void setLenient(boolean lenient) { this.lenient = lenient; } - public void setZeroTermsQuery(ZeroTermsQuery zeroTermsQuery) { + public void setZeroTermsQuery(ZeroTermsQueryOption zeroTermsQuery) { this.zeroTermsQuery = zeroTermsQuery; } @@ -277,7 +248,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException default: throw new IllegalStateException("No type found for [" + type + "]"); } - return query == null ? zeroTermsQuery() : query; + return query == null ? zeroTermsQuery.asQuery() : query; } protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { @@ -290,19 +261,6 @@ protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { } } - protected Query zeroTermsQuery() { - switch (zeroTermsQuery) { - case NULL: - return null; - case NONE: - return Queries.newMatchNoDocsQuery("Matching no documents because no terms present"); - case ALL: - return Queries.newMatchAllQuery(); - default: - throw new IllegalStateException("unknown zeroTermsQuery " + zeroTermsQuery); - } - } - class MatchQueryBuilder extends QueryBuilder { private final MappedFieldType fieldType; diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java index ff255c4eca278..c53a64294ba55 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQueryParser.java @@ -78,7 +78,7 @@ public Query parse(MultiMatchQueryBuilder.Type type, Map fieldNam private Query combineGrouped(List groupQuery, float tieBreaker) { if (groupQuery.isEmpty()) { - return zeroTermsQuery(); + return zeroTermsQuery.asQuery(); } if (groupQuery.size() == 1) { return groupQuery.get(0); @@ -144,7 +144,7 @@ private List buildCrossFieldQuery(Map fieldNames, String representativeField = group.getValue().get(0).fieldType.name(); Query query = builder.createBooleanQuery(representativeField, value.toString(), occur); if (query == null) { - query = zeroTermsQuery(); + query = zeroTermsQuery.asQuery(); } query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); diff --git a/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java index 5d6ed686963fb..c08069b4a8749 100644 --- a/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java @@ -46,6 +46,7 @@ import org.elasticsearch.index.query.ExistsQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.query.ZeroTermsQueryOption; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; @@ -141,7 +142,7 @@ private QueryStringQueryParser(SearchExecutionContext context, String defaultFie this.context = context; this.fieldsAndWeights = Collections.unmodifiableMap(fieldsAndWeights); this.queryBuilder = new MultiMatchQueryParser(context); - queryBuilder.setZeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NULL); + queryBuilder.setZeroTermsQuery(ZeroTermsQueryOption.NULL); queryBuilder.setLenient(lenient); this.lenient = lenient; } diff --git a/server/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java b/server/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java index dd7601b648ffc..da844a4e63a3c 100644 --- a/server/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java +++ b/server/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java @@ -30,6 +30,7 @@ import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.SimpleQueryStringBuilder; +import org.elasticsearch.index.query.ZeroTermsQueryOption; import java.io.IOException; import java.util.ArrayList; @@ -64,7 +65,7 @@ public SimpleQueryStringQueryParser(Analyzer analyzer, Map weight this.queryBuilder = new MultiMatchQueryParser(context); this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery()); this.queryBuilder.setLenient(settings.lenient()); - this.queryBuilder.setZeroTermsQuery(MatchQueryParser.ZeroTermsQuery.NULL); + this.queryBuilder.setZeroTermsQuery(ZeroTermsQueryOption.NULL); if (analyzer != null) { this.queryBuilder.setAnalyzer(analyzer); } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 63c85bbcb708c..4020169ed668f 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -118,12 +118,14 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic } } + /** + * The similarity to use in searches, which takes into account per-field configuration. + */ public Similarity similarity(@Nullable Function fieldTypeLookup) { return (fieldTypeLookup != null) ? new PerFieldSimilarity(defaultSimilarity, fieldTypeLookup) : defaultSimilarity; } - public SimilarityProvider getSimilarity(String name) { Supplier sim = similarities.get(name); if (sim == null) { @@ -132,8 +134,10 @@ public SimilarityProvider getSimilarity(String name) { return new SimilarityProvider(name, sim.get()); } - // for testing - Similarity getDefaultSimilarity() { + /** + * The default similarity configured in the index settings. + */ + public Similarity getDefaultSimilarity() { return defaultSimilarity; } diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 06b229009f0f4..533c2c0005857 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; +import org.elasticsearch.index.query.CombinedFieldsQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.DistanceFeatureQueryBuilder; @@ -764,6 +765,8 @@ private void registerQueryParsers(List plugins) { registerQuery(new QuerySpec<>(MatchPhrasePrefixQueryBuilder.NAME, MatchPhrasePrefixQueryBuilder::new, MatchPhrasePrefixQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(MultiMatchQueryBuilder.NAME, MultiMatchQueryBuilder::new, MultiMatchQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(CombinedFieldsQueryBuilder.NAME, CombinedFieldsQueryBuilder::new, + CombinedFieldsQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(NestedQueryBuilder.NAME, NestedQueryBuilder::new, NestedQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(DisMaxQueryBuilder.NAME, DisMaxQueryBuilder::new, DisMaxQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(IdsQueryBuilder.NAME, IdsQueryBuilder::new, IdsQueryBuilder::fromXContent)); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java index ce67a16d13c8e..7b7491e77374f 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java @@ -65,9 +65,7 @@ private static class CustomWeightedSpanTermExtractor extends WeightedSpanTermExt @Override protected void extractUnknownQuery(Query query, Map terms) throws IOException { - if (terms.isEmpty()) { - extractWeightedTerms(terms, query, 1F); - } + extractWeightedTerms(terms, query, 1F); } protected void extract(Query query, float boost, Map terms) throws IOException { diff --git a/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java b/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java new file mode 100644 index 0000000000000..4f79b51b21022 --- /dev/null +++ b/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java @@ -0,0 +1,343 @@ +/* @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import java.io.IOException; +import java.util.Arrays; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BooleanSimilarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.LMDirichletSimilarity; +import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +/** + * TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9. + */ +public class XCombinedFieldQueryTests extends LuceneTestCase { + public void testInvalid() { + XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); + IllegalArgumentException exc = + expectThrows(IllegalArgumentException.class, () -> builder.addField("foo", 0.5f)); + assertEquals(exc.getMessage(), "weight must be greater or equal to 1"); + } + + public void testRewrite() throws IOException { + XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); + IndexReader reader = new MultiReader(); + IndexSearcher searcher = new IndexSearcher(reader); + Query actual = searcher.rewrite(builder.build()); + assertEquals(actual, new MatchNoDocsQuery()); + builder.addField("field", 1f); + actual = searcher.rewrite(builder.build()); + assertEquals(actual, new MatchNoDocsQuery()); + builder.addTerm(new BytesRef("foo")); + actual = searcher.rewrite(builder.build()); + assertEquals(actual, new TermQuery(new Term("field", "foo"))); + builder.addTerm(new BytesRef("bar")); + actual = searcher.rewrite(builder.build()); + assertEquals( + actual, + new SynonymQuery.Builder("field") + .addTerm(new Term("field", "foo")) + .addTerm(new Term("field", "bar")) + .build()); + builder.addField("another_field", 1f); + Query query = builder.build(); + actual = searcher.rewrite(query); + assertEquals(actual, query); + } + + public void testToString() { + assertEquals("CombinedFieldQuery(()())", new XCombinedFieldQuery.Builder().build().toString()); + XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); + builder.addField("foo", 1f); + assertEquals("CombinedFieldQuery((foo)())", builder.build().toString()); + builder.addTerm(new BytesRef("bar")); + assertEquals("CombinedFieldQuery((foo)(bar))", builder.build().toString()); + builder.addField("title", 3f); + assertEquals("CombinedFieldQuery((foo title^3.0)(bar))", builder.build().toString()); + builder.addTerm(new BytesRef("baz")); + assertEquals("CombinedFieldQuery((foo title^3.0)(bar baz))", builder.build().toString()); + } + + public void testSameScore() throws IOException { + Directory dir = newDirectory(); + Similarity similarity = randomCompatibleSimilarity(); + + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setSimilarity(similarity); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + Document doc = new Document(); + doc.add(new StringField("f", "a", Store.NO)); + w.addDocument(doc); + + doc = new Document(); + doc.add(new StringField("g", "a", Store.NO)); + for (int i = 0; i < 10; ++i) { + w.addDocument(doc); + } + + IndexReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarity(similarity); + XCombinedFieldQuery query = + new XCombinedFieldQuery.Builder() + .addField("f", 1f) + .addField("g", 1f) + .addTerm(new BytesRef("a")) + .build(); + TopScoreDocCollector collector = + TopScoreDocCollector.create( + Math.min(reader.numDocs(), Integer.MAX_VALUE), null, Integer.MAX_VALUE); + searcher.search(query, collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(new TotalHits(11, TotalHits.Relation.EQUAL_TO), topDocs.totalHits); + // All docs must have the same score + for (int i = 0; i < topDocs.scoreDocs.length; ++i) { + assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f); + } + + reader.close(); + w.close(); + dir.close(); + } + + public void testCopyField() throws IOException { + Directory dir = newDirectory(); + Similarity similarity = randomCompatibleSimilarity(); + + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setSimilarity(similarity); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numMatch = atLeast(10); + int boost1 = Math.max(1, random().nextInt(5)); + int boost2 = Math.max(1, random().nextInt(5)); + for (int i = 0; i < numMatch; i++) { + Document doc = new Document(); + if (random().nextBoolean()) { + doc.add(new TextField("a", "baz", Store.NO)); + doc.add(new TextField("b", "baz", Store.NO)); + for (int k = 0; k < boost1 + boost2; k++) { + doc.add(new TextField("ab", "baz", Store.NO)); + } + w.addDocument(doc); + doc.clear(); + } + int freqA = random().nextInt(5) + 1; + for (int j = 0; j < freqA; j++) { + doc.add(new TextField("a", "foo", Store.NO)); + } + int freqB = random().nextInt(5) + 1; + for (int j = 0; j < freqB; j++) { + doc.add(new TextField("b", "foo", Store.NO)); + } + int freqAB = freqA * boost1 + freqB * boost2; + for (int j = 0; j < freqAB; j++) { + doc.add(new TextField("ab", "foo", Store.NO)); + } + w.addDocument(doc); + } + IndexReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + + searcher.setSimilarity(similarity); + XCombinedFieldQuery query = + new XCombinedFieldQuery.Builder() + .addField("a", (float) boost1) + .addField("b", (float) boost2) + .addTerm(new BytesRef("foo")) + .build(); + + checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo"))); + + reader.close(); + w.close(); + dir.close(); + } + + public void testCopyFieldWithMultipleTerms() throws IOException { + Directory dir = newDirectory(); + Similarity similarity = randomCompatibleSimilarity(); + + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setSimilarity(similarity); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numMatch = atLeast(10); + int boost1 = Math.max(1, random().nextInt(5)); + int boost2 = Math.max(1, random().nextInt(5)); + for (int i = 0; i < numMatch; i++) { + Document doc = new Document(); + + int freqA = random().nextInt(5) + 1; + for (int j = 0; j < freqA; j++) { + doc.add(new TextField("a", "foo", Store.NO)); + } + int freqB = random().nextInt(5) + 1; + for (int j = 0; j < freqB; j++) { + doc.add(new TextField("b", "bar", Store.NO)); + } + int freqAB = freqA * boost1 + freqB * boost2; + for (int j = 0; j < freqAB; j++) { + doc.add(new TextField("ab", "foo", Store.NO)); + } + w.addDocument(doc); + } + IndexReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + + searcher.setSimilarity(similarity); + XCombinedFieldQuery query = + new XCombinedFieldQuery.Builder() + .addField("a", (float) boost1) + .addField("b", (float) boost2) + .addTerm(new BytesRef("foo")) + .addTerm(new BytesRef("bar")) + .build(); + + checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo"))); + + reader.close(); + w.close(); + dir.close(); + } + + private static Similarity randomCompatibleSimilarity() { + return RandomPicks.randomFrom( + random(), + Arrays.asList( + new BM25Similarity(), + new BooleanSimilarity(), + new ClassicSimilarity(), + new LMDirichletSimilarity(), + new LMJelinekMercerSimilarity(0.1f))); + } + + private void checkExpectedHits( + IndexSearcher searcher, int numHits, Query firstQuery, Query secondQuery) throws IOException { + TopScoreDocCollector firstCollector = + TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); + searcher.search(firstQuery, firstCollector); + TopDocs firstTopDocs = firstCollector.topDocs(); + assertEquals(numHits, firstTopDocs.totalHits.value); + + TopScoreDocCollector secondCollector = + TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); + searcher.search(secondQuery, secondCollector); + TopDocs secondTopDocs = secondCollector.topDocs(); + CheckHits.checkEqual(firstQuery, secondTopDocs.scoreDocs, firstTopDocs.scoreDocs); + } + + public void testDocWithNegativeNorms() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setSimilarity(new NegativeNormSimilarity()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + String queryString = "foo"; + + Document doc = new Document(); + // both fields must contain tokens that match the query string "foo" + doc.add(new TextField("f", "foo", Store.NO)); + doc.add(new TextField("g", "foo baz", Store.NO)); + w.addDocument(doc); + + IndexReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarity(new BM25Similarity()); + XCombinedFieldQuery query = + new XCombinedFieldQuery.Builder() + .addField("f") + .addField("g") + .addTerm(new BytesRef(queryString)) + .build(); + TopDocs topDocs = searcher.search(query, 10); + CheckHits.checkDocIds("queried docs do not match", new int[] {0}, topDocs.scoreDocs); + + reader.close(); + w.close(); + dir.close(); + } + + public void testMultipleDocsNegativeNorms() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setSimilarity(new NegativeNormSimilarity()); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + String queryString = "foo"; + + Document doc0 = new Document(); + doc0.add(new TextField("f", "foo", Store.NO)); + doc0.add(new TextField("g", "foo baz", Store.NO)); + w.addDocument(doc0); + + Document doc1 = new Document(); + // add another match on the query string to the second doc + doc1.add(new TextField("f", "foo is foo", Store.NO)); + doc1.add(new TextField("g", "foo baz", Store.NO)); + w.addDocument(doc1); + + IndexReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarity(new BM25Similarity()); + XCombinedFieldQuery query = + new XCombinedFieldQuery.Builder() + .addField("f") + .addField("g") + .addTerm(new BytesRef(queryString)) + .build(); + TopDocs topDocs = searcher.search(query, 10); + // Return doc1 ahead of doc0 since its tf is higher + CheckHits.checkDocIds("queried docs do not match", new int[] {1, 0}, topDocs.scoreDocs); + + reader.close(); + w.close(); + dir.close(); + } + + private static final class NegativeNormSimilarity extends Similarity { + @Override + public long computeNorm(FieldInvertState state) { + return -128; + } + + @Override + public SimScorer scorer( + float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return new BM25Similarity().scorer(boost, collectionStats, termStats); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java new file mode 100644 index 0000000000000..120474622bb57 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryBuilderTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.XCombinedFieldQuery; +import org.elasticsearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.Arrays; + +import static org.elasticsearch.index.query.QueryBuilders.combinedFieldsQuery; +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.instanceOf; + +public class CombinedFieldsQueryBuilderTests extends AbstractQueryTestCase { + private static final String MISSING_WILDCARD_FIELD_NAME = "missing_*"; + private static final String MISSING_FIELD_NAME = "missing"; + + @Override + protected CombinedFieldsQueryBuilder doCreateTestQueryBuilder() { + Object value = getRandomQueryText(); + String field = randomFrom(TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME, MISSING_FIELD_NAME, MISSING_WILDCARD_FIELD_NAME); + CombinedFieldsQueryBuilder query = combinedFieldsQuery(value, field); + + if (randomBoolean()) { + query.field(field); + } else { + query.field(field, 1.0f + randomFloat()); + } + + if (randomBoolean()) { + query.operator(randomFrom(Operator.values())); + } + if (randomBoolean()) { + query.minimumShouldMatch(randomMinimumShouldMatch()); + } + if (randomBoolean()) { + query.zeroTermsQuery(randomFrom(ZeroTermsQueryOption.NONE, ZeroTermsQueryOption.ALL)); + } + if (randomBoolean()) { + query.autoGenerateSynonymsPhraseQuery(randomBoolean()); + } + return query; + } + + /** + * This check is very light, instead the parsing is tested in detail in {@link CombinedFieldsQueryParsingTests}. + */ + @Override + protected void doAssertLuceneQuery(CombinedFieldsQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { + assertThat(query, anyOf(Arrays.asList( + instanceOf(BooleanQuery.class), + instanceOf(TermQuery.class), + instanceOf(MatchAllDocsQuery.class), + instanceOf(MatchNoDocsQuery.class), + instanceOf(XCombinedFieldQuery.class) + ))); + } + + public void testValuesFromXContent() throws IOException { + String json = "{\n" + + " \"combined_fields\" : {\n" + + " \"query\" : \"quick brown fox\",\n" + + " \"fields\" : [ \"abstract^1.0\", \"body^1.0\", \"title^1.0\" ],\n" + + " \"operator\" : \"OR\",\n" + + " \"zero_terms_query\" : \"NONE\",\n" + + " \"auto_generate_synonyms_phrase_query\" : true,\n" + + " \"boost\" : 2.0\n" + + " }\n" + + "}"; + + CombinedFieldsQueryBuilder parsed = (CombinedFieldsQueryBuilder) parseQuery(json); + checkGeneratedJson(json, parsed); + + assertEquals(json, "quick brown fox", parsed.value()); + assertEquals(json, 3, parsed.fields().size()); + assertEquals(json, Operator.OR, parsed.operator()); + assertEquals(json, 2.0, parsed.boost, 1e-6); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java new file mode 100644 index 0000000000000..8161b9917368d --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/CombinedFieldsQueryParsingTests.java @@ -0,0 +1,368 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.MockSynonymAnalyzer; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.XCombinedFieldQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.hamcrest.CoreMatchers; +import org.junit.Before; + +import java.io.IOException; +import java.util.Map; + +import static org.elasticsearch.index.query.QueryBuilders.combinedFieldsQuery; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class CombinedFieldsQueryParsingTests extends MapperServiceTestCase { + private SearchExecutionContext context; + + @Before + public void createSearchExecutionContext() throws IOException { + MapperService mapperService = createMapperService( + XContentFactory.jsonBuilder().startObject().startObject(MapperService.SINGLE_MAPPING_NAME) + .startObject("properties") + .startObject("field1").field("type", "text").endObject() + .startObject("field2").field("type", "text").endObject() + .startObject("synonym1").field("type", "text").field("analyzer", "mock_synonym").endObject() + .startObject("synonym2").field("type", "text").field("analyzer", "mock_synonym").endObject() + .startObject("stopwords1").field("type", "text").field("analyzer", "stop").endObject() + .startObject("stopwords2").field("type", "text").field("analyzer", "stop").endObject() + .endObject() + .endObject().endObject()); + context = createSearchExecutionContext(mapperService); + } + + @Override + protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { + return new IndexAnalyzers( + Map.of("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer()), + "mock_synonym", new NamedAnalyzer("mock_synonym", AnalyzerScope.INDEX, new MockSynonymAnalyzer()), + "stop", new NamedAnalyzer("stop", AnalyzerScope.INDEX, new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET))), + Map.of(), + Map.of()); + } + + public void testEmptyArguments() { + expectThrows(IllegalArgumentException.class, () -> combinedFieldsQuery(null, "field")); + expectThrows(IllegalArgumentException.class, () -> combinedFieldsQuery("value", (String[]) null)); + expectThrows(IllegalArgumentException.class, () -> combinedFieldsQuery("value", new String[]{""})); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> combinedFieldsQuery("value").toQuery(context)); + assertThat(e.getMessage(), equalTo("In [combined_fields] query, at least one field must be provided")); + } + + public void testInvalidFieldBoosts() { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> combinedFieldsQuery("the quick fox") + .field("field1", -1.0f) + .field("field2") + .toQuery(context)); + assertThat(e.getMessage(), containsString("[combined_fields] requires field boosts to be >= 1.0")); + + e = expectThrows(IllegalArgumentException.class, + () -> combinedFieldsQuery("the quick fox") + .field("field1", 0.42f) + .field("field2") + .toQuery(context)); + assertThat(e.getMessage(), containsString("[combined_fields] requires field boosts to be >= 1.0")); + + e = expectThrows(IllegalArgumentException.class, + () -> combinedFieldsQuery("the quick fox") + .fields(Map.of("field1", 2.0f, "field2", 0.3f)) + .toQuery(context)); + assertThat(e.getMessage(), containsString("[combined_fields] requires field boosts to be >= 1.0")); + } + + public void testMissingFields() throws Exception { + assertThat(combinedFieldsQuery("test").field("missing").toQuery(context), + instanceOf(MatchNoDocsQuery.class)); + assertThat(combinedFieldsQuery("test").field("missing*").toQuery(context), + instanceOf(MatchNoDocsQuery.class)); + } + + public void testWildcardFieldPattern() throws Exception { + Query query = combinedFieldsQuery("quick fox") + .field("field*") + .toQuery(context); + assertThat(query, instanceOf(BooleanQuery.class)); + + BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(booleanQuery.clauses().size(), equalTo(2)); + assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(XCombinedFieldQuery.class)); + assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(XCombinedFieldQuery.class)); + } + + public void testOperator() throws Exception { + Operator operator = randomFrom(Operator.values()); + BooleanClause.Occur occur = operator.toBooleanClauseOccur(); + int minimumShouldMatch = randomIntBetween(0, 2); + + Query query = combinedFieldsQuery("quick fox") + .field("field1") + .field("field2") + .operator(operator) + .minimumShouldMatch(String.valueOf(minimumShouldMatch)) + .toQuery(context); + assertThat(query, instanceOf(BooleanQuery.class)); + + BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(booleanQuery.getMinimumNumberShouldMatch(), equalTo(minimumShouldMatch)); + + assertThat(booleanQuery.clauses().size(), equalTo(2)); + assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(occur)); + assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(occur)); + } + + public void testQueryBoost() throws IOException { + CombinedFieldsQueryBuilder builder = combinedFieldsQuery("test") + .field("field1", 5.0f) + .boost(2.0f); + Query query = builder.toQuery(context); + assertThat(query, instanceOf(BoostQuery.class)); + + BoostQuery boostQuery = (BoostQuery) query; + assertThat(boostQuery.getBoost(), equalTo(2.0f)); + assertThat(boostQuery.getQuery(), instanceOf(XCombinedFieldQuery.class)); + } + + public void testInconsistentAnalyzers() { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> combinedFieldsQuery("the quick fox") + .field("field1", 1.2f) + .field("stopwords1") + .toQuery(context)); + assertThat(e.getMessage(), CoreMatchers.equalTo("All fields in [combined_fields] query must have the same search analyzer")); + } + + public void testInvalidDefaultSimilarity() throws IOException { + Settings settings = Settings.builder() + .put("index.similarity.default.type", "boolean") + .build(); + + MapperService mapperService = createMapperService(settings, + XContentFactory.jsonBuilder().startObject().startObject(MapperService.SINGLE_MAPPING_NAME) + .startObject("properties") + .startObject("field").field("type", "text").endObject() + .endObject() + .endObject().endObject()); + SearchExecutionContext context = createSearchExecutionContext(mapperService); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + combinedFieldsQuery("value", "field") + .toQuery(context)); + assertThat(e.getMessage(), equalTo( + "[combined_fields] queries can only be used with the [BM25] similarity")); + } + + public void testPerFieldSimilarity() throws IOException { + Settings settings = Settings.builder() + .put("index.similarity.tuned_bm25.type", "BM25") + .put("index.similarity.tuned_bm25.k1", "1.4") + .put("index.similarity.tuned_bm25.b", "0.8") + .build(); + + MapperService mapperService = createMapperService(settings, + XContentFactory.jsonBuilder().startObject().startObject(MapperService.SINGLE_MAPPING_NAME) + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("similarity", "tuned_bm25") + .endObject() + .endObject() + .endObject().endObject()); + SearchExecutionContext context = createSearchExecutionContext(mapperService); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + combinedFieldsQuery("value", "field") + .operator(Operator.AND) + .toQuery(context)); + assertThat(e.getMessage(), equalTo( + "[combined_fields] queries cannot be used with per-field similarities")); + } + + public void testCombinedFieldsWithSynonyms() throws IOException { + Query actual = combinedFieldsQuery("dogs cats", "synonym1", "synonym2") + .operator(Operator.AND) + .toQuery(context); + + Query expected = new BooleanQuery.Builder() + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("dog")) + .addTerm(new BytesRef("dogs")) + .build(), BooleanClause.Occur.MUST) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("cats")) + .build(), BooleanClause.Occur.MUST) + .build(); + + assertThat(actual, equalTo(expected)); + } + + public void testSynonymsPhrase() throws IOException { + Query actual = combinedFieldsQuery("guinea pig cats", "synonym1", "synonym2") + .operator(Operator.AND) + .toQuery(context); + + Query expected = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new PhraseQuery.Builder() + .add(new Term("synonym1", "guinea")) + .add(new Term("synonym1", "pig")) + .build(), BooleanClause.Occur.SHOULD) + .add(new PhraseQuery.Builder() + .add(new Term("synonym2", "guinea")) + .add(new Term("synonym2", "pig")) + .build(), BooleanClause.Occur.SHOULD) + .build(), BooleanClause.Occur.SHOULD) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("cavy")) + .build(), BooleanClause.Occur.SHOULD) + .build(), BooleanClause.Occur.MUST) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("cats")) + .build(), BooleanClause.Occur.MUST) + .build(); + + assertEquals(expected, actual); + } + + public void testDisabledSynonymsPhrase() throws IOException { + Query actual = combinedFieldsQuery("guinea pig cats", "synonym1", "synonym2") + .operator(Operator.AND) + .autoGenerateSynonymsPhraseQuery(false) + .toQuery(context); + + Query expected = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("guinea")) + .build(), BooleanClause.Occur.MUST) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("pig")) + .build(), BooleanClause.Occur.MUST) + .build(), BooleanClause.Occur.SHOULD) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("cavy")) + .build(), BooleanClause.Occur.SHOULD) + .build(), BooleanClause.Occur.MUST) + .add(new XCombinedFieldQuery.Builder() + .addField("synonym1") + .addField("synonym2") + .addTerm(new BytesRef("cats")) + .build(), BooleanClause.Occur.MUST) + .build(); + + assertEquals(expected, actual); + } + + public void testStopwords() throws Exception { + ZeroTermsQueryOption zeroTermsQuery = randomFrom(ZeroTermsQueryOption.ALL, + ZeroTermsQueryOption.NONE); + Query expectedEmptyQuery = zeroTermsQuery.asQuery(); + + BytesRef quickTerm = new BytesRef("quick"); + BytesRef foxTerm = new BytesRef("fox"); + + Query query = combinedFieldsQuery("the quick fox") + .field("stopwords1") + .zeroTermsQuery(zeroTermsQuery) + .toQuery(context); + Query expected = new BooleanQuery.Builder() + .add(new XCombinedFieldQuery.Builder().addField("stopwords1").addTerm(quickTerm).build(), BooleanClause.Occur.SHOULD) + .add(new XCombinedFieldQuery.Builder().addField("stopwords1").addTerm(foxTerm).build(), BooleanClause.Occur.SHOULD) + .build(); + assertEquals(expected, query); + + query = combinedFieldsQuery("the quick fox") + .field("stopwords1") + .field("stopwords2") + .zeroTermsQuery(zeroTermsQuery) + .toQuery(context); + expected = new BooleanQuery.Builder() + .add(new XCombinedFieldQuery.Builder() + .addField("stopwords1") + .addField("stopwords2") + .addTerm(quickTerm) + .build(), BooleanClause.Occur.SHOULD) + .add(new XCombinedFieldQuery.Builder() + .addField("stopwords1") + .addField("stopwords2") + .addTerm(foxTerm) + .build(), BooleanClause.Occur.SHOULD) + .build(); + assertEquals(expected, query); + + query = combinedFieldsQuery("the") + .field("stopwords1") + .field("stopwords2") + .zeroTermsQuery(zeroTermsQuery) + .toQuery(context); + assertEquals(expectedEmptyQuery, query); + + query = new BoolQueryBuilder() + .should(combinedFieldsQuery("the") + .field("stopwords1") + .zeroTermsQuery(zeroTermsQuery)) + .toQuery(context); + expected = new BooleanQuery.Builder() + .add(expectedEmptyQuery, BooleanClause.Occur.SHOULD) + .build(); + assertEquals(expected, query); + + query = new BoolQueryBuilder() + .should(combinedFieldsQuery("the") + .field("stopwords1") + .field("stopwords2") + .zeroTermsQuery(zeroTermsQuery)) + .toQuery(context); + expected = new BooleanQuery.Builder() + .add(expectedEmptyQuery, BooleanClause.Occur.SHOULD) + .build(); + assertEquals(expected, query); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index abda10a0286b6..7f3a328b98503 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -14,7 +14,6 @@ import org.apache.lucene.search.SynonymQuery; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; -import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -57,7 +56,7 @@ protected MatchPhrasePrefixQueryBuilder doCreateTestQueryBuilder() { matchQuery.maxExpansions(randomIntBetween(1, 10000)); } if (randomBoolean()) { - matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE)); + matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQueryOption.ALL, ZeroTermsQueryOption.NONE)); } return matchQuery; } @@ -82,7 +81,7 @@ protected void doAssertLuceneQuery(MatchPhrasePrefixQueryBuilder queryBuilder, Q assertThat(query, notNullValue()); if (query instanceof MatchAllDocsQuery) { - assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQuery.ALL)); + assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQueryOption.ALL)); return; } @@ -118,11 +117,11 @@ public void testPhraseOnFieldWithNoTerms() { public void testPhrasePrefixZeroTermsQuery() throws IOException { MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(TEXT_FIELD_NAME, ""); - matchQuery.zeroTermsQuery(ZeroTermsQuery.NONE); + matchQuery.zeroTermsQuery(ZeroTermsQueryOption.NONE); assertEquals(new MatchNoDocsQuery(), matchQuery.doToQuery(createSearchExecutionContext())); matchQuery = new MatchPhrasePrefixQueryBuilder(TEXT_FIELD_NAME, ""); - matchQuery.zeroTermsQuery(ZeroTermsQuery.ALL); + matchQuery.zeroTermsQuery(ZeroTermsQueryOption.ALL); assertEquals(new MatchAllDocsQuery(), matchQuery.doToQuery(createSearchExecutionContext())); } diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java index cd373c1cc15c4..3e83472f8d915 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java @@ -17,7 +17,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.ParsingException; -import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -58,7 +57,7 @@ protected MatchPhraseQueryBuilder doCreateTestQueryBuilder() { } if (randomBoolean()) { - matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE)); + matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQueryOption.ALL, ZeroTermsQueryOption.NONE)); } return matchQuery; @@ -84,7 +83,7 @@ protected void doAssertLuceneQuery(MatchPhraseQueryBuilder queryBuilder, Query q assertThat(query, notNullValue()); if (query instanceof MatchAllDocsQuery) { - assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQuery.ALL)); + assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQueryOption.ALL)); return; } @@ -146,7 +145,7 @@ public void testFromJson() throws IOException { assertEquals(json, "this is a test", parsed.value()); assertEquals(json, 2, parsed.slop()); - assertEquals(json, ZeroTermsQuery.ALL, parsed.zeroTermsQuery()); + assertEquals(json, ZeroTermsQueryOption.ALL, parsed.zeroTermsQuery()); } public void testParseFailsWithMultipleFields() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index fd0af30367990..6fbfa88a61b35 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -37,7 +37,6 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.index.search.MatchQueryParser.Type; -import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery; import org.elasticsearch.test.AbstractQueryTestCase; import org.hamcrest.Matcher; import org.hamcrest.Matchers; @@ -109,7 +108,7 @@ protected MatchQueryBuilder doCreateTestQueryBuilder() { } if (randomBoolean()) { - matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE)); + matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQueryOption.ALL, ZeroTermsQueryOption.NONE)); } if (randomBoolean()) { @@ -136,7 +135,7 @@ protected void doAssertLuceneQuery(MatchQueryBuilder queryBuilder, Query query, assertThat(query, notNullValue()); if (query instanceof MatchAllDocsQuery) { - assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQuery.ALL)); + assertThat(queryBuilder.zeroTermsQuery(), equalTo(ZeroTermsQueryOption.ALL)); return; } diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 8ba9623f13d84..67b1b002d8982 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -28,7 +28,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type; -import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.test.AbstractQueryTestCase; import org.hamcrest.Matchers; @@ -118,7 +117,7 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { query.tieBreaker(randomFloat()); } if (randomBoolean()) { - query.zeroTermsQuery(randomFrom(MatchQueryParser.ZeroTermsQuery.NONE, MatchQueryParser.ZeroTermsQuery.ALL)); + query.zeroTermsQuery(randomFrom(ZeroTermsQueryOption.NONE, ZeroTermsQueryOption.ALL)); } if (randomBoolean()) { query.autoGenerateSynonymsPhraseQuery(randomBoolean()); diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index fc3dd9b3a87af..cb9c1283a1fc6 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -300,6 +300,7 @@ public List> getRescorers() { "bool", "boosting", "constant_score", + "combined_fields", "dis_max", "exists", "field_masking_span", diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 50b6132738d4e..3f09f37aae8c6 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -520,6 +520,10 @@ protected SearchExecutionContext createSearchExecutionContext(MapperService mapp when(searchExecutionContext.lookup()).thenReturn(new SearchLookup(mapperService::fieldType, (ft, s) -> { throw new UnsupportedOperationException("search lookup not available"); })); + + SimilarityService similarityService = new SimilarityService(mapperService.getIndexSettings(), null, Map.of()); + when(searchExecutionContext.getDefaultSimilarity()).thenReturn(similarityService.getDefaultSimilarity()); + return searchExecutionContext; } }