diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 67bbdaf79ce76..1a80aa80a30b0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -368,14 +368,14 @@ public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int } public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { - throw new QueryShardException(context, "Can only use prefix queries on keyword and text fields - not on [" + name + throw new QueryShardException(context, "Can only use prefix queries on keyword, text and wildcard fields - not on [" + name + "] which is of type [" + typeName() + "]"); } public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { - throw new QueryShardException(context, "Can only use wildcard queries on keyword and text fields - not on [" + name + throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 3aa6118936e4d..3933eb26a7415 100644 --- a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -239,7 +239,7 @@ public static RangeQueryBuilder rangeQuery(String name) { * which matches any single character. Note this query can be slow, as it * needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, * a Wildcard term should not start with one of the wildcards {@code *} or - * {@code ?}. + * {@code ?}. (The new wildcard field type however, is optimised for leading wildcards)_ * * @param name The field name * @param query The wildcard query string diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml index c67a79c8218da..e827e1bbb8062 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml @@ -1,8 +1,8 @@ setup: - skip: features: headers - version: " - 7.9.99" - reason: "wildcard fields were added from 8.0" + version: " - 7.6.99" + reason: "wildcard fields were added from 7.7" - do: indices.create: @@ -26,6 +26,12 @@ setup: id: 2 body: my_wildcard: goodbye world + - do: + index: + index: test-index + id: 3 + body: + my_wildcard: cAsE iNsEnSiTiVe World - do: indices.refresh: {} @@ -82,6 +88,19 @@ setup: - match: {hits.total.value: 1} +--- +"Case insensitive query": + - do: + search: + body: + track_total_hits: true + query: + wildcard: + my_wildcard._case_insensitive: {value: "*Worl*" } + + + - match: {hits.total.value: 3} + --- "Short suffix query": - do: @@ -93,7 +112,7 @@ setup: my_wildcard: {value: "*ld" } - - match: {hits.total.value: 2} + - match: {hits.total.value: 3} --- "Long suffix query": @@ -199,10 +218,11 @@ setup: track_total_hits: true sort: [ { "my_wildcard": "desc" } ] - - match: { hits.total.value: 2 } - - length: { hits.hits: 2 } + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } - match: { hits.hits.0._id: "1" } - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } - do: search: @@ -210,9 +230,10 @@ setup: track_total_hits: true sort: [ { "my_wildcard": "asc" } ] - - match: { hits.total.value: 2 } - - length: { hits.hits: 2 } - - match: { hits.hits.0._id: "2" } - - match: { hits.hits.1._id: "1" } + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "1" } diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java index 648fbc7e0cdc3..53e7e9597f660 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.wildcard.mapper; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; @@ -23,6 +24,7 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Objects; /** @@ -34,11 +36,13 @@ public class AutomatonQueryOnBinaryDv extends Query { private final String field; private final String matchPattern; private final Automaton automaton; + private Analyzer normalizer; - public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) { + public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton, Analyzer normalizer) { this.field = field; this.matchPattern = matchPattern; this.automaton = automaton; + this.normalizer = normalizer; } @Override @@ -62,14 +66,24 @@ public boolean matches() throws IOException { int size = badi.readVInt(); for (int i=0; i< size; i++) { int valLength = badi.readVInt(); - if (bytesMatcher.run(arrayOfValues.bytes, badi.getPosition(), valLength)) { + if (valueMatches(arrayOfValues.bytes, badi.getPosition(), valLength)) { return true; - } + } badi.skipBytes(valLength); } return false; } + private boolean valueMatches(byte[] bytes, int position, int valLength) { + if (normalizer == null) { + return bytesMatcher.run(bytes, badi.getPosition(), valLength); + } else { + String s = new String(bytes, position, valLength, StandardCharsets.UTF_8); + BytesRef normalized = normalizer.normalize(null, s); + return (bytesMatcher.run(normalized.bytes, normalized.offset, normalized.length)); + } + } + @Override public float matchCost() { // TODO: how can we compute this? diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index e489d8a35bb9f..367d40549a014 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.wildcard.mapper; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ngram.NGramTokenizer; @@ -28,8 +29,10 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automaton; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; @@ -79,9 +82,18 @@ public class WildcardFieldMapper extends FieldMapper { @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new NGramTokenizer(NGRAM_SIZE, NGRAM_SIZE); - return new TokenStreamComponents(tokenizer); + // Lower case all ngram content + TokenStream tok = new LowerCaseFilter(tokenizer); + return new TokenStreamComponents(r -> { + tokenizer.setReader(r); + },tok); } - }); + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + return new LowerCaseFilter(in); + } + }); public static class Defaults { public static final MappedFieldType FIELD_TYPE = new WildcardFieldType(); @@ -167,12 +179,117 @@ public WildcardFieldType fieldType() { @Override public WildcardFieldMapper build(BuilderContext context) { - setupFieldType(context); + setupFieldType(context); + + String fullName = buildFullName(context); + CaseInsensitiveFieldType caseInsensitiveFieldType = + new CaseInsensitiveFieldType(fullName, fullName + "._case_insensitive"); + CaseInsensitiveFieldMapper caseInsensitiveFieldMapper = + new CaseInsensitiveFieldMapper(caseInsensitiveFieldType, context.indexSettings()); + return new WildcardFieldMapper( name, fieldType, defaultFieldType, ignoreAbove, - context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); + context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, caseInsensitiveFieldMapper); } } + + + @SuppressWarnings("unchecked") + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + if (caseInsensitiveFieldMapper != null) { + subIterators.add(caseInsensitiveFieldMapper); + } + return Iterators.concat(super.iterator(), subIterators.iterator()); + } + + private static final class CaseInsensitiveFieldMapper extends FieldMapper { + + protected CaseInsensitiveFieldMapper(CaseInsensitiveFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return "caseInsensitive"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + + static final class CaseInsensitiveFieldType extends MappedFieldType { + + final String parentField; + + CaseInsensitiveFieldType(String parentField, String name) { + setName(name); + this.parentField = parentField; + } + + void doXContent(XContentBuilder builder) throws IOException { + builder.startObject("index_caseInsensitive"); + builder.endObject(); + } + + private WildcardFieldType getParent(QueryShardContext context) { + return (WildcardFieldType) context.fieldMapper(this.parentField); + } + + + @Override + public Query wildcardQuery(String value, RewriteMethod method, QueryShardContext context) { + // Delegate to parent with case sensitivity turned off. + return getParent(context).wildcardQuery(value, method, context, false); + } + + @Override + public Query termsQuery(List values, QueryShardContext context) { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + for (Object value : values) { + bq.add(termQuery(value, context), Occur.SHOULD); + } + return new ConstantScoreQuery(bq.build()); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + return wildcardQuery(BytesRefs.toString(value), MultiTermQuery.CONSTANT_SCORE_REWRITE, context); + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + return wildcardQuery(value + "*", method, context); + } + + @Override + public CaseInsensitiveFieldType clone() { + return new CaseInsensitiveFieldType(parentField, name()); + } + + @Override + public String typeName() { + return "caseInsensitive"; + } + + @Override + public String toString() { + return super.toString() + ",caseInsensitive"; + } + + @Override + public Query existsQuery(QueryShardContext context) { + return getParent(context).existsQuery(context); + } + } public static class TypeParser implements Mapper.TypeParser { @Override @@ -320,15 +437,29 @@ public boolean equals(Object obj) { PatternStructure other = (PatternStructure) obj; return pattern.equals(other.pattern); } - - } - + + public static BytesRef toLower(BytesRef value) { + return WILDCARD_ANALYZER.normalize(null, value.utf8ToString()); + } + + public static String toLower(String value) { + return WILDCARD_ANALYZER.normalize(null, value).utf8ToString(); + } @Override public Query wildcardQuery(String wildcardPattern, RewriteMethod method, QueryShardContext context) { + return wildcardQuery(wildcardPattern, method, context, true); + } + + public Query wildcardQuery(String wildcardPattern, RewriteMethod method, QueryShardContext context, boolean caseSensitive) { + if (caseSensitive == false) { + wildcardPattern = toLower(wildcardPattern); + } + PatternStructure patternStructure = new PatternStructure(wildcardPattern); ArrayList tokens = new ArrayList<>(); + for (int i = 0; i < patternStructure.fragments.length; i++) { String fragment = patternStructure.fragments[i]; @@ -389,7 +520,9 @@ public Query wildcardQuery(String wildcardPattern, RewriteMethod method, QuerySh BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder(); verifyingBuilder.add(new BooleanClause(approximation, Occur.MUST)); Automaton automaton = WildcardQuery.toAutomaton(new Term(name(), wildcardPattern)); - verifyingBuilder.add(new BooleanClause(new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton), Occur.MUST)); + Analyzer normalizer = caseSensitive ? null: WILDCARD_ANALYZER; + verifyingBuilder.add(new BooleanClause( + new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton, normalizer), Occur.MUST)); return verifyingBuilder.build(); } return approximation; @@ -486,9 +619,11 @@ public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested } private int ignoreAbove; + private CaseInsensitiveFieldMapper caseInsensitiveFieldMapper; private WildcardFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, - int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { + int ignoreAbove, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, + CaseInsensitiveFieldMapper caseInsensitiveFieldMapper) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); this.ignoreAbove = ignoreAbove; assert fieldType.indexOptions() == IndexOptions.DOCS; @@ -496,6 +631,7 @@ private WildcardFieldMapper(String simpleName, MappedFieldType fieldType, Mapped ngramFieldType = fieldType.clone(); ngramFieldType.setTokenized(true); ngramFieldType.freeze(); + this.caseInsensitiveFieldMapper = caseInsensitiveFieldMapper; } /** Values that have more chars than the return value of this method will @@ -570,6 +706,8 @@ protected String contentType() { @Override protected void doMerge(Mapper mergeWith) { super.doMerge(mergeWith); + WildcardFieldMapper mw = (WildcardFieldMapper) mergeWith; + this.caseInsensitiveFieldMapper = (CaseInsensitiveFieldMapper) this.caseInsensitiveFieldMapper.merge(mw.caseInsensitiveFieldMapper); this.ignoreAbove = ((WildcardFieldMapper) mergeWith).ignoreAbove; } }