From e745bfadc4e002ad53ba607ced1a69dc789eb5d1 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 27 May 2020 20:46:46 -0700 Subject: [PATCH] Respect the ignore_above option. For keyword-style fields, if the source value is larger than ignore_above then we don't retrieve the field. --- .../ICUCollationKeywordFieldMapper.java | 7 +- .../ICUCollationKeywordFieldMapperTests.java | 7 + .../index/mapper/FieldMapper.java | 4 +- .../index/mapper/KeywordFieldMapper.java | 22 ++- .../index/mapper/KeywordFieldMapperTests.java | 9 +- .../subphase/FieldValueRetrieverTests.java | 27 ++++ .../wildcard/mapper/WildcardFieldMapper.java | 7 +- .../mapper/WildcardFieldMapperTests.java | 137 ++++++++++-------- 8 files changed, 148 insertions(+), 72 deletions(-) diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java index 92b7317d2d47d..1339950cb77f1 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java @@ -750,6 +750,11 @@ protected String parseSourceValue(Object value, String format) { if (format != null) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - return value.toString(); + + String keywordValue = value.toString(); + if (keywordValue.length() > ignoreAbove) { + return null; + } + return keywordValue; } } diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java index 1648259a03089..a7779a3c560f8 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java @@ -488,5 +488,12 @@ public void testParseSourceValue() { assertEquals("value", mapper.parseSourceValue("value", null)); assertEquals("42", mapper.parseSourceValue(42L, null)); assertEquals("true", mapper.parseSourceValue(true, null)); + + ICUCollationKeywordFieldMapper ignoreAboveMapper = new ICUCollationKeywordFieldMapper.Builder("field") + .ignoreAbove(4) + .build(context); + assertNull(ignoreAboveMapper.parseSourceValue("value", null)); + assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null)); + assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null)); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 23440270fe3b0..2c6d985e090b1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -349,7 +349,9 @@ public List lookupValues(SourceLookup lookup, @Nullable String format) { List sourceValues = sourceValue instanceof List ? (List) sourceValue : List.of(sourceValue); for (Object value : sourceValues) { Object parsedValue = parseSourceValue(value, format); - values.add(parsedValue); + if (parsedValue != null) { + values.add(parsedValue); + } } } return values; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index ce752233047c7..20ad6aeefb784 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -186,14 +186,6 @@ public Mapper.Builder parse(String name, Map node, ParserCont } } - @Override - protected String parseSourceValue(Object value, String format) { - if (format != null) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); - } - return value.toString(); - } - public static final class KeywordFieldType extends StringFieldType { private NamedAnalyzer normalizer = null; @@ -382,6 +374,20 @@ protected void parseCreateField(ParseContext context) throws IOException { context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); } } + + @Override + protected String parseSourceValue(Object value, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + + String keywordValue = value.toString(); + if (keywordValue.length() > ignoreAbove) { + return null; + } + return keywordValue; + } + @Override protected String contentType() { return CONTENT_TYPE; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 5bbdd649ccf5b..3707371e880ff 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -604,13 +604,20 @@ public void testMeta() throws Exception { public void testParseSourceValue() { Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); - KeywordFieldMapper mapper = new KeywordFieldMapper.Builder("field").build(context); + KeywordFieldMapper mapper = new KeywordFieldMapper.Builder("field").build(context); assertEquals("value", mapper.parseSourceValue("value", null)); assertEquals("42", mapper.parseSourceValue(42L, null)); assertEquals("true", mapper.parseSourceValue(true, null)); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> mapper.parseSourceValue(true, "format")); assertEquals("Field [field] of type [keyword] doesn't support formats.", e.getMessage()); + + KeywordFieldMapper ignoreAboveMapper = new KeywordFieldMapper.Builder("field") + .ignoreAbove(4) + .build(context); + assertNull(ignoreAboveMapper.parseSourceValue("value", null)); + assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null)); + assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null)); } } diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldValueRetrieverTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldValueRetrieverTests.java index a132be77fc593..8b043fd506f33 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldValueRetrieverTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldValueRetrieverTests.java @@ -178,6 +178,33 @@ public void testDateFormat() throws IOException { assertThat(dateField.getValue(), equalTo("1990/12/29")); } + public void testIgnoreAbove() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .field("ignore_above", 20) + .endObject() + .endObject() + .endObject(); + + IndexService indexService = createIndex("index", Settings.EMPTY, mapping); + MapperService mapperService = indexService.mapperService(); + + XContentBuilder source = XContentFactory.jsonBuilder().startObject() + .array("field", "value", "other_value", "really_really_long_value") + .endObject(); + Map fields = retrieveFields(mapperService, source, "field"); + DocumentField field = fields.get("field"); + assertThat(field.getValues().size(), equalTo(2)); + + source = XContentFactory.jsonBuilder().startObject() + .array("field", "really_really_long_value") + .endObject(); + fields = retrieveFields(mapperService, source, "field"); + assertFalse(fields.containsKey("field")); + } + public void testFieldAliases() throws IOException { XContentBuilder mapping = XContentFactory.jsonBuilder().startObject() .startObject("properties") diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index bf370ec1a879c..6ade256d0ce05 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -893,7 +893,12 @@ protected String parseSourceValue(Object value, String format) { if (format != null) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - return value.toString(); + + String keywordValue = value.toString(); + if (keywordValue.length() > ignoreAbove) { + return null; + } + return keywordValue; } // For internal use by Lucene only - used to define ngram index diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index 6af2e499c677c..5892b8e32d7fc 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -71,17 +71,17 @@ import static org.mockito.Mockito.when; public class WildcardFieldMapperTests extends ESTestCase { - + static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) { QueryShardContext queryShardContext = mock(QueryShardContext.class); when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); return queryShardContext; - } + } private static final String KEYWORD_FIELD_NAME = "keyword_field"; private static final String WILDCARD_FIELD_NAME = "wildcard_field"; public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true); - + static final int MAX_FIELD_LENGTH = 30; static WildcardFieldMapper wildcardFieldType; static KeywordFieldMapper keywordFieldType; @@ -168,8 +168,8 @@ public void testTooBigQueryField() throws IOException { wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(queryString, RegExp.ALL, 20000, null, MOCK_QSC); wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER); assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L)); - - + + reader.close(); dir.close(); } @@ -216,7 +216,7 @@ public void testSearchResultsVersusKeywordField() throws IOException { String pattern = null; switch (randomInt(3)) { case 0: - pattern = getRandomWildcardPattern(); + pattern = getRandomWildcardPattern(); wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_QSC); keywordFieldQuery = keywordFieldType.fieldType().wildcardQuery(pattern, null, MOCK_QSC); break; @@ -251,12 +251,12 @@ public void testSearchResultsVersusKeywordField() throws IOException { // Prefix length shouldn't be longer than selected search string // BUT keyword field has a bug with prefix length when equal - see https://github.com/elastic/elasticsearch/issues/55790 // so we opt for one less - prefixLength = Math.min(pattern.length() - 1 , prefixLength); + prefixLength = Math.min(pattern.length() - 1 , prefixLength); boolean transpositions = randomBoolean(); - - wildcardFieldQuery = wildcardFieldType.fieldType().fuzzyQuery(pattern, fuzziness, prefixLength, 50, + + wildcardFieldQuery = wildcardFieldType.fieldType().fuzzyQuery(pattern, fuzziness, prefixLength, 50, transpositions, MOCK_QSC); - keywordFieldQuery = keywordFieldType.fieldType().fuzzyQuery(pattern, fuzziness, prefixLength, 50, + keywordFieldQuery = keywordFieldType.fieldType().fuzzyQuery(pattern, fuzziness, prefixLength, 50, transpositions, MOCK_QSC); break; } @@ -293,29 +293,29 @@ public void testSearchResultsVersusKeywordField() throws IOException { reader.close(); dir.close(); } - + public void testRegexAcceleration() throws IOException, ParseException { // All these expressions should rewrite to a match all with no verification step required at all String superfastRegexes[]= { ".*", "...*..", "(foo|bar|.*)", "@"}; for (String regex : superfastRegexes) { Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); assertTrue(wildcardFieldQuery instanceof DocValuesFieldExistsQuery); - } + } String matchNoDocsRegexes[]= { ""}; for (String regex : matchNoDocsRegexes) { Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); assertTrue(wildcardFieldQuery instanceof MatchNoDocsQuery); - } + } - // All of these regexes should be accelerated as the equivalent of the given QueryString query - String acceleratedTests[][] = { - {".*foo.*", "foo"}, + // All of these regexes should be accelerated as the equivalent of the given QueryString query + String acceleratedTests[][] = { + {".*foo.*", "foo"}, {"..foobar","+foo +oba +ar_ +r__"}, {"(maynotexist)?foobar","+foo +oba +ar_ +r__"}, - {".*/etc/passw.*", "+\\/et +tc\\/ +\\/pa +ass +ssw"}, - {".*etc/passwd", "+etc +c\\/p +pas +ssw +wd_ +d__"}, - {"(http|ftp)://foo.*", "+((+htt +ttp) ftp) +(+\\:\\/\\/ +\\/fo +foo)"}, - {"[Pp][Oo][Ww][Ee][Rr][Ss][Hh][Ee][Ll][Ll]\\.[Ee][Xx][Ee]", "+_po +owe +ers +she +ell +l\\.e +exe +e__"}, + {".*/etc/passw.*", "+\\/et +tc\\/ +\\/pa +ass +ssw"}, + {".*etc/passwd", "+etc +c\\/p +pas +ssw +wd_ +d__"}, + {"(http|ftp)://foo.*", "+((+htt +ttp) ftp) +(+\\:\\/\\/ +\\/fo +foo)"}, + {"[Pp][Oo][Ww][Ee][Rr][Ss][Hh][Ee][Ll][Ll]\\.[Ee][Xx][Ee]", "+_po +owe +ers +she +ell +l\\.e +exe +e__"}, {"foo<1-100>bar", "+(+_fo +foo) +(+bar +r__ )"}, {"(aaa.+&.+bbb)cat", "+cat +t__"}, {".a", "a__"} @@ -323,24 +323,24 @@ public void testRegexAcceleration() throws IOException, ParseException { for (String[] test : acceleratedTests) { String regex = test[0]; String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR); - Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); + Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString); } - - // All these expressions should rewrite to just the verification query (there's no ngram acceleration) - // TODO we can possibly improve on some of these + + // All these expressions should rewrite to just the verification query (there's no ngram acceleration) + // TODO we can possibly improve on some of these String matchAllButVerifyTests[]= { "..", "(a)?","(a|b){0,3}", "((foo)?|(foo|bar)?)", "@&~(abc.+)", "aaa.+&.+bbb"}; for (String regex : matchAllButVerifyTests) { Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); - assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery), + assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery), wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv); - } - - - - // Documentation - regexes that do try accelerate but we would like to improve in future versions. - String suboptimalTests[][] = { - // TODO short wildcards like a* OR b* aren't great so we just drop them. + } + + + + // Documentation - regexes that do try accelerate but we would like to improve in future versions. + String suboptimalTests[][] = { + // TODO short wildcards like a* OR b* aren't great so we just drop them. // Ideally we would attach to successors to create (acd OR bcd) { "[ab]cd", "+cd_ +d__"} }; @@ -348,18 +348,18 @@ public void testRegexAcceleration() throws IOException, ParseException { String regex = test[0]; String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); - + testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString); - } + } - } + } // Make error messages more readable String formatQuery(Query q) { return q.toString().replaceAll(WILDCARD_FIELD_NAME+":", "").replaceAll(WildcardFieldMapper.TOKEN_START_STRING, "_"); } - + public void testWildcardAcceleration() throws IOException, ParseException { - + // All these expressions should rewrite to MatchAll with no verification step required at all String superfastPattern[] = { "*", "**", "*?" }; for (String pattern : superfastPattern) { @@ -399,7 +399,7 @@ public void testWildcardAcceleration() throws IOException, ParseException { } } - + static class FuzzyTest { String pattern; int prefixLength; @@ -470,7 +470,7 @@ Query getExpectedApproxQuery() throws ParseException { return bq.build(); } } - + public void testFuzzyAcceleration() throws IOException, ParseException { FuzzyTest[] tests = { @@ -483,8 +483,8 @@ public void testFuzzyAcceleration() throws IOException, ParseException { Query wildcardFieldQuery = test.getFuzzyQuery(); testExpectedAccelerationQuery(test.pattern, wildcardFieldQuery, test.getExpectedApproxQuery()); } - } - + } + void testExpectedAccelerationQuery(String regex, Query combinedQuery, String expectedAccelerationQueryString) throws ParseException { QueryParser qsp = new QueryParser(WILDCARD_FIELD_NAME, new KeywordAnalyzer()); @@ -505,35 +505,35 @@ void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expe } } assert verifyQueryFound; - - String message = "regex: "+ regex +"\nactual query: " + formatQuery(approximationQuery) + + + String message = "regex: "+ regex +"\nactual query: " + formatQuery(approximationQuery) + "\nexpected query: " + formatQuery(expectedAccelerationQuery) + "\n"; assertEquals(message, expectedAccelerationQuery, approximationQuery); - } - + } + private String getRandomFuzzyPattern(HashSet values, int edits, int prefixLength) { assert edits >=0 && edits <=2; // Pick one of the indexed document values to focus our queries on. String randomValue = values.toArray(new String[0])[randomIntBetween(0, values.size()-1)]; - + if (edits == 0) { return randomValue; } - + if (randomValue.length() > prefixLength) { randomValue = randomValue.substring(0,prefixLength) + "C" + randomValue.substring(prefixLength); edits--; } - + if(edits > 0) { randomValue = randomValue + "a"; } return randomValue; - } + } private String getRandomRegexPattern(HashSet values) { // Pick one of the indexed document values to focus our queries on. - String randomValue = values.toArray(new String[0])[randomIntBetween(0, values.size()-1)]; + String randomValue = values.toArray(new String[0])[randomIntBetween(0, values.size()-1)]; return convertToRandomRegex(randomValue); } @@ -548,38 +548,38 @@ protected String convertToRandomRegex(String randomValue) { if(substitutionPoint >0) { result.append(randomValue.substring(0,substitutionPoint)); } - + // Modify the middle... String replacementPart = randomValue.substring(substitutionPoint, substitutionPoint+substitutionLength); int mutation = randomIntBetween(0, 11); switch (mutation) { case 0: // OR with random alpha of same length - result.append("("+replacementPart+"|c"+ randomABString(replacementPart.length())+")"); + result.append("("+replacementPart+"|c"+ randomABString(replacementPart.length())+")"); break; case 1: // OR with non-existant value - result.append("("+replacementPart+"|doesnotexist)"); + result.append("("+replacementPart+"|doesnotexist)"); break; case 2: // OR with another randomised regex (used to create nested levels of expression). - result.append("(" + convertToRandomRegex(replacementPart) +"|doesnotexist)"); + result.append("(" + convertToRandomRegex(replacementPart) +"|doesnotexist)"); break; case 3: // Star-replace all ab sequences. - result.append(replacementPart.replaceAll("ab", ".*")); + result.append(replacementPart.replaceAll("ab", ".*")); break; case 4: // .-replace all b chars - result.append(replacementPart.replaceAll("b", ".")); + result.append(replacementPart.replaceAll("b", ".")); break; case 5: // length-limited stars {1,2} - result.append(".{1,"+replacementPart.length()+"}"); + result.append(".{1,"+replacementPart.length()+"}"); break; case 6: // replace all chars with . - result.append(replacementPart.replaceAll(".", ".")); + result.append(replacementPart.replaceAll(".", ".")); break; case 7: // OR with uppercase chars eg [aA] (many of these sorts of expression in the wild.. @@ -611,7 +611,7 @@ protected String convertToRandomRegex(String randomValue) { if(substitutionPoint + substitutionLength <= randomValue.length()-1) { result.append(randomValue.substring(substitutionPoint + substitutionLength)); } - + //Assert our randomly generated regex actually matches the provided raw input. RegExp regex = new RegExp(result.toString()); Automaton automaton = regex.toAutomaton(); @@ -622,6 +622,23 @@ protected String convertToRandomRegex(String randomValue) { return result.toString(); } + public void testParseSourceValue() { + Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); + Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); + + WildcardFieldMapper mapper = new WildcardFieldMapper.Builder("field").build(context); + assertEquals("value", mapper.parseSourceValue("value", null)); + assertEquals("42", mapper.parseSourceValue(42L, null)); + assertEquals("true", mapper.parseSourceValue(true, null)); + + WildcardFieldMapper ignoreAboveMapper = new WildcardFieldMapper.Builder("field") + .ignoreAbove(4) + .build(context); + assertNull(ignoreAboveMapper.parseSourceValue("value", null)); + assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null)); + assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null)); + } + protected MappedFieldType provideMappedFieldType(String name) { if (name.equals(WILDCARD_FIELD_NAME)) { return wildcardFieldType.fieldType(); @@ -685,7 +702,7 @@ static String randomABString(int minLength) { if (randomBoolean()) { sb.append("a"); } else { - sb.append("A"); + sb.append("A"); } } else { sb.append("b");