Skip to content

Commit c378432

Browse files
authored
Refactor field expansion for match, multi_match and query_string query (#25726)
This commit changes the way we handle field expansion in `match`, `multi_match` and `query_string` query. The main changes are: - For exact field name, the new behavior is to rewrite to a matchnodocs query when the field name is not found in the mapping. - For partial field names (with `*` suffix), the expansion is done only on `keyword`, `text`, `date`, `ip` and `number` field types. Other field types are simply ignored. - For all fields (`*`), the expansion is done on accepted field types only (see above) and metadata fields are also filtered. - The `*` notation can also be used to set `default_field` option on`query_string` query. This should replace the needs for the extra option `use_all_fields` which is deprecated in this change. This commit also rewrites simple `*` query to matchalldocs query when all fields are requested (Fixes #25556). The same change should be done on `simple_query_string` for completeness. `use_all_fields` option in `query_string` is also deprecated in this change, `default_field` should be set to `*` instead. Relates #25551
1 parent 47f92d7 commit c378432

File tree

16 files changed

+406
-438
lines changed

16 files changed

+406
-438
lines changed

core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.search.PrefixQuery;
3131
import org.apache.lucene.search.Query;
3232
import org.apache.lucene.util.BytesRef;
33+
import org.elasticsearch.ElasticsearchException;
3334
import org.elasticsearch.common.Nullable;
3435
import org.elasticsearch.index.mapper.TypeFieldMapper;
3536

@@ -47,6 +48,16 @@ public static Query newMatchNoDocsQuery(String reason) {
4748
return new MatchNoDocsQuery(reason);
4849
}
4950

51+
52+
public static Query newUnmappedFieldQuery(String field) {
53+
return Queries.newMatchNoDocsQuery("unmapped field [" + (field != null ? field : "null") + "]");
54+
}
55+
56+
public static Query newLenientFieldQuery(String field, RuntimeException e) {
57+
String message = ElasticsearchException.getExceptionName(e) + ":[" + e.getMessage() + "]";
58+
return Queries.newMatchNoDocsQuery("failed [" + field + "] query, caused by " + message);
59+
}
60+
5061
public static Query newNestedFilter() {
5162
return new PrefixQuery(new Term(TypeFieldMapper.NAME, new BytesRef("__")));
5263
}

core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.index.query.support.QueryParsers;
3737
import org.elasticsearch.index.search.MatchQuery;
3838
import org.elasticsearch.index.search.MultiMatchQuery;
39+
import org.elasticsearch.index.search.QueryStringQueryParser;
3940

4041
import java.io.IOException;
4142
import java.util.HashMap;
@@ -739,27 +740,10 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
739740
}
740741
}
741742

742-
Map<String, Float> newFieldsBoosts = handleFieldsMatchPattern(context.getMapperService(), fieldsBoosts);
743-
743+
Map<String, Float> newFieldsBoosts = QueryStringQueryParser.resolveMappingFields(context, fieldsBoosts);
744744
return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch);
745745
}
746746

747-
private static Map<String, Float> handleFieldsMatchPattern(MapperService mapperService, Map<String, Float> fieldsBoosts) {
748-
Map<String, Float> newFieldsBoosts = new TreeMap<>();
749-
for (Map.Entry<String, Float> fieldBoost : fieldsBoosts.entrySet()) {
750-
String fField = fieldBoost.getKey();
751-
Float fBoost = fieldBoost.getValue();
752-
if (Regex.isSimpleMatchPattern(fField)) {
753-
for (String field : mapperService.simpleMatchToIndexNames(fField)) {
754-
newFieldsBoosts.put(field, fBoost);
755-
}
756-
} else {
757-
newFieldsBoosts.put(fField, fBoost);
758-
}
759-
}
760-
return newFieldsBoosts;
761-
}
762-
763747
@Override
764748
protected int doHashCode() {
765749
return Objects.hash(value, fieldsBoosts, type, operator, analyzer, slop, fuzziness,

core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java

Lines changed: 32 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,17 @@
3434
import org.elasticsearch.common.xcontent.XContentBuilder;
3535
import org.elasticsearch.common.xcontent.XContentParser;
3636
import org.elasticsearch.index.analysis.NamedAnalyzer;
37-
import org.elasticsearch.index.mapper.DateFieldMapper;
38-
import org.elasticsearch.index.mapper.IpFieldMapper;
39-
import org.elasticsearch.index.mapper.KeywordFieldMapper;
40-
import org.elasticsearch.index.mapper.MappedFieldType;
41-
import org.elasticsearch.index.mapper.MapperService;
42-
import org.elasticsearch.index.mapper.NumberFieldMapper;
43-
import org.elasticsearch.index.mapper.ScaledFloatFieldMapper;
44-
import org.elasticsearch.index.mapper.TextFieldMapper;
4537
import org.elasticsearch.index.query.support.QueryParsers;
4638
import org.elasticsearch.index.search.QueryStringQueryParser;
4739
import org.joda.time.DateTimeZone;
4840

4941
import java.io.IOException;
5042
import java.util.ArrayList;
51-
import java.util.Collection;
5243
import java.util.HashMap;
53-
import java.util.HashSet;
5444
import java.util.List;
5545
import java.util.Locale;
5646
import java.util.Map;
5747
import java.util.Objects;
58-
import java.util.Set;
5948
import java.util.TreeMap;
6049

6150
/**
@@ -110,24 +99,10 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
11099
private static final ParseField TIME_ZONE_FIELD = new ParseField("time_zone");
111100
private static final ParseField SPLIT_ON_WHITESPACE = new ParseField("split_on_whitespace")
112101
.withAllDeprecated("This setting is ignored, the parser always splits on logical operator");
113-
private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields");
102+
private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields")
103+
.withAllDeprecated("Set [default_field] to `*` instead");
114104
private static final ParseField TYPE_FIELD = new ParseField("type");
115105

116-
// Mapping types the "all-ish" query can be executed against
117-
public static final Set<String> ALLOWED_QUERY_MAPPER_TYPES;
118-
119-
static {
120-
ALLOWED_QUERY_MAPPER_TYPES = new HashSet<>();
121-
ALLOWED_QUERY_MAPPER_TYPES.add(DateFieldMapper.CONTENT_TYPE);
122-
ALLOWED_QUERY_MAPPER_TYPES.add(IpFieldMapper.CONTENT_TYPE);
123-
ALLOWED_QUERY_MAPPER_TYPES.add(KeywordFieldMapper.CONTENT_TYPE);
124-
for (NumberFieldMapper.NumberType nt : NumberFieldMapper.NumberType.values()) {
125-
ALLOWED_QUERY_MAPPER_TYPES.add(nt.typeName());
126-
}
127-
ALLOWED_QUERY_MAPPER_TYPES.add(ScaledFloatFieldMapper.CONTENT_TYPE);
128-
ALLOWED_QUERY_MAPPER_TYPES.add(TextFieldMapper.CONTENT_TYPE);
129-
}
130-
131106
private final String queryString;
132107

133108
private String defaultField;
@@ -179,8 +154,6 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
179154

180155
private DateTimeZone timeZone;
181156

182-
private Boolean useAllFields;
183-
184157
/** To limit effort spent determinizing regexp queries. */
185158
private int maxDeterminizedStates = DEFAULT_MAX_DETERMINED_STATES;
186159

@@ -240,8 +213,11 @@ public QueryStringQueryBuilder(StreamInput in) throws IOException {
240213
if (in.getVersion().onOrAfter(Version.V_5_1_1)) {
241214
if (in.getVersion().before(Version.V_6_0_0_beta1)) {
242215
in.readBoolean(); // split_on_whitespace
216+
Boolean useAllField = in.readOptionalBoolean();
217+
if (useAllField != null && useAllField) {
218+
defaultField = "*";
219+
}
243220
}
244-
useAllFields = in.readOptionalBoolean();
245221
}
246222
}
247223

@@ -291,8 +267,9 @@ protected void doWriteTo(StreamOutput out) throws IOException {
291267
if (out.getVersion().onOrAfter(Version.V_5_1_1)) {
292268
if (out.getVersion().before(Version.V_6_0_0_beta1)) {
293269
out.writeBoolean(false); // split_on_whitespace
270+
Boolean useAllFields = defaultField == null ? null : Regex.isMatchAllPattern(defaultField);
271+
out.writeOptionalBoolean(useAllFields);
294272
}
295-
out.writeOptionalBoolean(this.useAllFields);
296273
}
297274
}
298275

@@ -314,17 +291,19 @@ public String defaultField() {
314291
}
315292

316293
/**
317-
* Tell the query_string query to use all fields explicitly, even if _all is
318-
* enabled. If the "default_field" parameter or "fields" are specified, they
319-
* will be ignored.
294+
* This setting is deprecated, set {@link #defaultField(String)} to "*" instead.
320295
*/
296+
@Deprecated
321297
public QueryStringQueryBuilder useAllFields(Boolean useAllFields) {
322-
this.useAllFields = useAllFields;
298+
if (useAllFields) {
299+
this.defaultField = "*";
300+
}
323301
return this;
324302
}
325303

304+
@Deprecated
326305
public Boolean useAllFields() {
327-
return this.useAllFields;
306+
return defaultField == null ? null : Regex.isMatchAllPattern(defaultField);
328307
}
329308

330309
/**
@@ -703,9 +682,6 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
703682
builder.field(TIME_ZONE_FIELD.getPreferredName(), this.timeZone.getID());
704683
}
705684
builder.field(ESCAPE_FIELD.getPreferredName(), this.escape);
706-
if (this.useAllFields != null) {
707-
builder.field(ALL_FIELDS_FIELD.getPreferredName(), this.useAllFields);
708-
}
709685
printBoostAndQueryName(builder);
710686
builder.endObject();
711687
}
@@ -737,7 +713,6 @@ public static QueryStringQueryBuilder fromXContent(XContentParser parser) throws
737713
Fuzziness fuzziness = QueryStringQueryBuilder.DEFAULT_FUZZINESS;
738714
String fuzzyRewrite = null;
739715
String rewrite = null;
740-
Boolean useAllFields = null;
741716
Map<String, Float> fieldsAndWeights = new HashMap<>();
742717
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
743718
if (token == XContentParser.Token.FIELD_NAME) {
@@ -812,7 +787,7 @@ public static QueryStringQueryBuilder fromXContent(XContentParser parser) throws
812787
} else if (LENIENT_FIELD.match(currentFieldName)) {
813788
lenient = parser.booleanValue();
814789
} else if (ALL_FIELDS_FIELD.match(currentFieldName)) {
815-
useAllFields = parser.booleanValue();
790+
defaultField = "*";
816791
} else if (MAX_DETERMINIZED_STATES_FIELD.match(currentFieldName)) {
817792
maxDeterminizedStates = parser.intValue();
818793
} else if (TIME_ZONE_FIELD.match(currentFieldName)) {
@@ -847,12 +822,6 @@ public static QueryStringQueryBuilder fromXContent(XContentParser parser) throws
847822
throw new ParsingException(parser.getTokenLocation(), "[" + QueryStringQueryBuilder.NAME + "] must be provided with a [query]");
848823
}
849824

850-
if ((useAllFields != null && useAllFields) &&
851-
(defaultField != null || fieldsAndWeights.size() != 0)) {
852-
throw new ParsingException(parser.getTokenLocation(),
853-
"cannot use [all_fields] parameter in conjunction with [default_field] or [fields]");
854-
}
855-
856825
QueryStringQueryBuilder queryStringQuery = new QueryStringQueryBuilder(queryString);
857826
queryStringQuery.fields(fieldsAndWeights);
858827
queryStringQuery.defaultField(defaultField);
@@ -880,7 +849,6 @@ public static QueryStringQueryBuilder fromXContent(XContentParser parser) throws
880849
queryStringQuery.timeZone(timeZone);
881850
queryStringQuery.boost(boost);
882851
queryStringQuery.queryName(queryName);
883-
queryStringQuery.useAllFields(useAllFields);
884852
return queryStringQuery;
885853
}
886854

@@ -914,8 +882,7 @@ protected boolean doEquals(QueryStringQueryBuilder other) {
914882
timeZone == null ? other.timeZone == null : other.timeZone != null &&
915883
Objects.equals(timeZone.getID(), other.timeZone.getID()) &&
916884
Objects.equals(escape, other.escape) &&
917-
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) &&
918-
Objects.equals(useAllFields, other.useAllFields);
885+
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates);
919886
}
920887

921888
@Override
@@ -924,72 +891,37 @@ protected int doHashCode() {
924891
quoteFieldSuffix, allowLeadingWildcard, analyzeWildcard,
925892
enablePositionIncrements, fuzziness, fuzzyPrefixLength,
926893
fuzzyMaxExpansions, fuzzyRewrite, phraseSlop, type, tieBreaker, rewrite, minimumShouldMatch, lenient,
927-
timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates, useAllFields);
928-
}
929-
930-
/**
931-
* Given a shard context, return a map of all fields in the mappings that
932-
* can be queried. The map will be field name to a float of 1.0f.
933-
*/
934-
public static Map<String, Float> allQueryableDefaultFields(QueryShardContext context) {
935-
Collection<String> allFields = context.simpleMatchToIndexNames("*");
936-
Map<String, Float> fields = new HashMap<>();
937-
for (String fieldName : allFields) {
938-
if (MapperService.isMetadataField(fieldName)) {
939-
// Ignore our metadata fields
940-
continue;
941-
}
942-
MappedFieldType mft = context.fieldMapper(fieldName);
943-
assert mft != null : "should never have a null mapper for an existing field";
944-
945-
// Ignore fields that are not in the allowed mapper types. Some
946-
// types do not support term queries, and thus we cannot generate
947-
// a special query for them.
948-
String mappingType = mft.typeName();
949-
if (ALLOWED_QUERY_MAPPER_TYPES.contains(mappingType)) {
950-
fields.put(fieldName, 1.0f);
951-
}
952-
}
953-
return fields;
894+
timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates);
954895
}
955896

956897
@Override
957898
protected Query doToQuery(QueryShardContext context) throws IOException {
958899
String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString;
959-
if ((useAllFields != null && useAllFields) && (fieldsAndWeights.size() != 0 || this.defaultField != null)) {
960-
throw addValidationError("cannot use [all_fields] parameter in conjunction with [default_field] or [fields]", null);
900+
if (fieldsAndWeights.size() > 0 && this.defaultField != null) {
901+
throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null);
961902
}
962903

963904
QueryStringQueryParser queryParser;
964905
boolean isLenient = lenient == null ? context.queryStringLenient() : lenient;
965906
if (defaultField != null) {
966-
queryParser = new QueryStringQueryParser(context, defaultField, isLenient);
967-
} else if (fieldsAndWeights.size() > 0) {
968-
final Map<String, Float> resolvedFields = new TreeMap<>();
969-
for (Map.Entry<String, Float> fieldsEntry : fieldsAndWeights.entrySet()) {
970-
String fieldName = fieldsEntry.getKey();
971-
Float weight = fieldsEntry.getValue();
972-
if (Regex.isSimpleMatchPattern(fieldName)) {
973-
for (String resolvedFieldName : context.getMapperService().simpleMatchToIndexNames(fieldName)) {
974-
resolvedFields.put(resolvedFieldName, weight);
975-
}
976-
} else {
977-
resolvedFields.put(fieldName, weight);
978-
}
907+
if (Regex.isMatchAllPattern(defaultField)) {
908+
queryParser = new QueryStringQueryParser(context, lenient == null ? true : lenient);
909+
} else {
910+
queryParser = new QueryStringQueryParser(context, defaultField, isLenient);
979911
}
912+
} else if (fieldsAndWeights.size() > 0) {
913+
final Map<String, Float> resolvedFields = QueryStringQueryParser.resolveMappingFields(context, fieldsAndWeights);
980914
queryParser = new QueryStringQueryParser(context, resolvedFields, isLenient);
981915
} else {
982-
// If explicitly required to use all fields, use all fields, OR:
983-
// Automatically determine the fields (to replace the _all field) if all of the following are true:
984-
// - The _all field is disabled,
985-
// - and the default_field has not been changed in the settings
986-
// - and default_field is not specified in the request
987-
// - and no fields are specified in the request
988-
if ((useAllFields != null && useAllFields) ||
916+
// Expand to all fields if:
917+
// - The index default search field is "*"
918+
// - The index default search field is "_all" and _all is disabled
919+
// TODO the index default search field should be "*" for new indices.
920+
if (Regex.isMatchAllPattern(context.defaultField()) ||
989921
(context.getMapperService().allEnabled() == false && "_all".equals(context.defaultField()))) {
990922
// Automatically determine the fields from the index mapping.
991923
// Automatically set leniency to "true" if unset so mismatched fields don't cause exceptions;
992-
queryParser = new QueryStringQueryParser(context, allQueryableDefaultFields(context), lenient == null ? true : lenient);
924+
queryParser = new QueryStringQueryParser(context, lenient == null ? true : lenient);
993925
} else {
994926
queryParser = new QueryStringQueryParser(context, context.defaultField(), isLenient);
995927
}

core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.common.xcontent.XContentParser;
3434
import org.elasticsearch.index.mapper.MappedFieldType;
3535
import org.elasticsearch.index.query.SimpleQueryParser.Settings;
36+
import org.elasticsearch.index.search.QueryStringQueryParser;
3637

3738
import java.io.IOException;
3839
import java.util.HashMap;
@@ -376,7 +377,8 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
376377
(context.getMapperService().allEnabled() == false &&
377378
"_all".equals(context.defaultField()) &&
378379
this.fieldsAndWeights.isEmpty())) {
379-
resolvedFieldsAndWeights = QueryStringQueryBuilder.allQueryableDefaultFields(context);
380+
resolvedFieldsAndWeights = QueryStringQueryParser.resolveMappingField(context, "*", 1.0f,
381+
false, false);
380382
// Need to use lenient mode when using "all-mode" so exceptions aren't thrown due to mismatched types
381383
newSettings.lenient(lenientSet ? settings.lenient() : true);
382384
} else {

core/src/main/java/org/elasticsearch/index/search/ExistsFieldQueryExtension.java

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)