Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ protected KeywordFieldType(KeywordFieldType ref) {
this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace;
}

@Override
public KeywordFieldType clone() {
return new KeywordFieldType(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand All @@ -31,13 +29,16 @@
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;

Expand All @@ -47,6 +48,8 @@
* can be implemented. */
public abstract class StringFieldType extends TermBasedFieldType {

private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)");

public StringFieldType() {}

protected StringFieldType(MappedFieldType ref) {
Expand Down Expand Up @@ -92,16 +95,41 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer

@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
Query termQuery = termQuery(value, context);
if (termQuery instanceof MatchNoDocsQuery || termQuery instanceof MatchAllDocsQuery) {
return termQuery;
}

failIfNotIndexed();
if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
}
Term term = MappedFieldType.extractTerm(termQuery);

Term term;
if (searchAnalyzer() != null) {
// we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
// is a char_filter that would otherwise remove them
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
BytesRefBuilder sb = new BytesRefBuilder();
int last = 0;

while (wildcardMatcher.find()) {
if (wildcardMatcher.start() > 0) {
String chunk = value.substring(last, wildcardMatcher.start());

BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
sb.append(normalized);
}
// append the matched group - without normalizing
sb.append(new BytesRef(wildcardMatcher.group()));

last = wildcardMatcher.end();
}
if (last < value.length()) {
String chunk = value.substring(last);
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
sb.append(normalized);
}
term = new Term(name(), sb.toBytesRef());
} else {
term = new Term(name(), indexedValueForSearch(value));
}

WildcardQuery query = new WildcardQuery(term);
QueryParsers.setRewriteMethod(query, method);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,21 @@
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.plain.ConstantIndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -51,6 +55,8 @@
import java.util.Set;
import java.util.function.Function;

import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;

public class TypeFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_type";
Expand Down Expand Up @@ -170,6 +176,25 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
}
return result;
}

@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
Query termQuery = termQuery(value, context);
if (termQuery instanceof MatchNoDocsQuery || termQuery instanceof MatchAllDocsQuery) {
return termQuery;
}

if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
}
Term term = MappedFieldType.extractTerm(termQuery);

WildcardQuery query = new WildcardQuery(term);
QueryParsers.setRewriteMethod(query, method);
return query;
}

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -560,13 +560,6 @@ public void testParseRelation() {
assertEquals(ShapeRelation.INTERSECTS, builder.relation());
}

public void testTypeField() throws IOException {
RangeQueryBuilder builder = QueryBuilders.rangeQuery("_type")
.from("value1");
builder.doToQuery(createShardContext());
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
}

/**
* Range queries should generally be cacheable, at least the ones we create randomly.
* This test makes sure we also test the non-cacheable cases regularly.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -75,7 +76,9 @@ protected void doAssertLuceneQuery(WildcardQueryBuilder queryBuilder, Query quer

assertThat(wildcardQuery.getField(), equalTo(expectedFieldName));
assertThat(wildcardQuery.getTerm().field(), equalTo(expectedFieldName));
assertThat(wildcardQuery.getTerm().text(), equalTo(queryBuilder.value()));
// wildcard queries get normalized
String text = wildcardQuery.getTerm().text().toLowerCase(Locale.ROOT);
assertThat(text, equalTo(text));
} else {
Query expected = new MatchNoDocsQuery("unknown field [" + expectedFieldName + "]");
assertEquals(expected, query);
Expand Down Expand Up @@ -138,14 +141,14 @@ public void testTypeField() throws IOException {
builder.doToQuery(createShardContext());
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this test for wildcard queries on "_type" fields. These don't return anything useful at the moment already, however before the change in this PR we still got an empty response while now we get an error because "_type" is not indexed. I don't know if we can tolerate this, otherwise I need to add a very simple wildcard implementation to TypeFieldType again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not following, _type should still be indexed in 7x? It's only in 8x that it will be gone as a field.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_type should still be indexed in 7x

Thats true, the test above only checks for the deprecation message though. Currently if you do a wildcard query on a "_type" field, it returns no hits (even for exact values like "value": "_doc"). Thats why I think nobody should really do that kind of query on a "_type" field. With the changes in this PR we would error in those cases though. If you want I can try to work around that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Erroring is better than silent weirdness, let's leave it as is.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I meant to write "except for exact values like "_doc" above. So that works on 7.x, but I hardly believe anybody relies on that? If you change your mind after this clarification let me know.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

… starting to think better safe than sorry, I think its a small addition that will keep the existing (weird) behaviour
I’ll look into that for a bit, leave it if it gets too ugly


public void testRewriteIndexQueryToMatchNone() throws IOException {
WildcardQueryBuilder query = new WildcardQueryBuilder("_index", "does_not_exist");
QueryShardContext queryShardContext = createShardContext();
QueryBuilder rewritten = query.rewrite(queryShardContext);
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
}
}

public void testRewriteIndexQueryNotMatchNone() throws IOException {
String fullIndexName = getIndex().getName();
String firstHalfOfIndexName = fullIndexName.substring(0,fullIndexName.length()/2);
Expand Down
118 changes: 116 additions & 2 deletions server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.search.query;

import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.join.ScoreMode;
Expand All @@ -32,11 +34,15 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
Expand All @@ -45,11 +51,14 @@
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WildcardQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.TermsLookup;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
Expand All @@ -60,16 +69,20 @@
import org.elasticsearch.test.junit.annotations.TestIssueLogging;

import java.io.IOException;
import java.io.Reader;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;

import static java.util.Collections.singletonMap;
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
Expand Down Expand Up @@ -120,7 +133,7 @@ public class SearchQueryIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(InternalSettingsPlugin.class);
return Arrays.asList(InternalSettingsPlugin.class, MockAnalysisPlugin.class);
}

@Override
Expand Down Expand Up @@ -1897,6 +1910,107 @@ public void testFieldAliasesForMetaFields() throws Exception {

}

/**
* Test that wildcard queries on keyword fields get normalized
*/
public void testWildcardQueryNormalizationOnKeywordField() {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put("index.analysis.normalizer.lowercase_normalizer.type", "custom")
.putList("index.analysis.normalizer.lowercase_normalizer.filter", "lowercase")
.build())
.addMapping("_doc", "field1", "type=keyword,normalizer=lowercase_normalizer"));
client().prepareIndex("test", "_doc", "1").setSource("field1", "Bbb Aaa").get();
refresh();

{
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);

wildCardQuery = wildcardQuery("field1", "bb*");
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);
}
}

/**
* Test that wildcard queries on text fields get normalized
*/
public void testWildcardQueryNormalizationOnTextField() {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put("index.analysis.analyzer.lowercase_analyzer.type", "custom")
.put("index.analysis.analyzer.lowercase_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.lowercase_analyzer.filter", "lowercase")
.build())
.addMapping("_doc", "field1", "type=text,analyzer=lowercase_analyzer"));
client().prepareIndex("test", "_doc", "1").setSource("field1", "Bbb Aaa").get();
refresh();

{
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);

wildCardQuery = wildcardQuery("field1", "bb*");
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);
}
}

/**
* Reserved characters should be excluded when the normalization is applied for keyword fields.
* See https://github.com/elastic/elasticsearch/issues/46300 for details.
*/
public void testWildcardQueryNormalizationKeywordSpecialCharacters() {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder().put("index.analysis.char_filter.no_wildcard.type", "mock_pattern_replace")
.put("index.analysis.normalizer.no_wildcard.type", "custom")
.put("index.analysis.normalizer.no_wildcard.char_filter", "no_wildcard").build())
.addMapping("_doc", "field", "type=keyword,normalizer=no_wildcard"));
client().prepareIndex("test", "_doc", "1").setSource("field", "label-1").get();
refresh();

WildcardQueryBuilder wildCardQuery = wildcardQuery("field", "la*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);

wildCardQuery = wildcardQuery("field", "la*el-?");
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);
}

public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {

@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("mock_pattern_replace", (indexSettings, env, name, settings) -> {
class Factory implements NormalizingCharFilterFactory {

private final Pattern pattern = Regex.compile("[\\*\\?]", null);

@Override
public String name() {
return name;
}

@Override
public Reader create(Reader reader) {
return new PatternReplaceCharFilter(pattern, "", reader);
}
}
return new Factory();
});
}

@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory(name,
() -> new MockTokenizer(MockTokenizer.KEYWORD, false)));
}
}

/**
* Test correct handling {@link SpanBooleanQueryRewriteWithMaxClause#rewrite(IndexReader, MultiTermQuery)}. That rewrite method is e.g.
* set for fuzzy queries with "constant_score" rewrite nested inside a `span_multi` query and would cause NPEs due to an unset
Expand Down