From 042942283b717e502f41210fe99dab44cd25061f Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Fri, 13 Mar 2020 16:17:30 +0100 Subject: [PATCH 1/2] Fix Term Vectors with artificial docs and keyword fields (#53504) Previously, Term Vectors API was returning empty results for artificial documents with keyword fields. Checking only for `string()` on `IndexableField` is not enough, since for `KeywordFieldType` `binaryValue()` must be used instead. Fixes #53494 (cherry picked from commit 1fc3fe3d32f41eab2101c0536751b7c47e63cc48) --- .../index/mapper/ParseContext.java | 18 --------------- .../index/termvectors/TermVectorsService.java | 23 ++++++++++++++++++- .../index/mapper/DateFieldMapperTests.java | 3 ++- .../index/mapper/DocumentParserTests.java | 14 +++++++---- .../mapper/FieldNamesFieldMapperTests.java | 3 ++- .../index/mapper/IpFieldMapperTests.java | 3 ++- .../index/mapper/KeywordFieldMapperTests.java | 4 ++++ .../index/mapper/NumberFieldMapperTests.java | 4 ++-- 8 files changed, 43 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java b/server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java index 4cfd5be2afed3..e463ed88aa6b2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java @@ -121,24 +121,6 @@ public IndexableField[] getFields(String name) { return f.toArray(new IndexableField[f.size()]); } - /** - * Returns an array of values of the field specified as the method parameter. - * This method returns an empty array when there are no - * matching fields. It never returns null. - * If you want the actual numeric field instances back, use {@link #getFields}. - * @param name the name of the field - * @return a String[] of field values - */ - public final String[] getValues(String name) { - List result = new ArrayList<>(); - for (IndexableField field : fields) { - if (field.name().equals(name) && field.stringValue() != null) { - result.add(field.stringValue()); - } - } - return result.toArray(new String[result.size()]); - } - public IndexableField getField(String name) { for (IndexableField field : fields) { if (field.name().equals(name)) { diff --git a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java index dac02ef5f8e10..bdb31862f685b 100644 --- a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java +++ b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java @@ -44,6 +44,7 @@ import org.elasticsearch.index.get.GetResult; import org.elasticsearch.index.mapper.DocumentMapperForType; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParseContext; @@ -56,6 +57,7 @@ import org.elasticsearch.search.dfs.AggregatedDfs; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -316,7 +318,7 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect else { seenFields.add(field.name()); } - String[] values = doc.getValues(field.name()); + String[] values = getValues(doc.getFields(field.name())); documentFields.add(new DocumentField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(indexShard, @@ -324,6 +326,25 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect request.offsets(), request.perFieldAnalyzer(), seenFields); } + /** + * Returns an array of values of the field specified as the method parameter. + * This method returns an empty array when there are no + * matching fields. It never returns null. + * @param fields The IndexableField to get the values from + * @return a String[] of field values + */ + public static String[] getValues(IndexableField[] fields) { + List result = new ArrayList<>(); + for (IndexableField field : fields) { + if (field.fieldType() instanceof KeywordFieldMapper.KeywordFieldType) { + result.add(field.binaryValue().utf8ToString()); + } else if (field.fieldType() instanceof StringFieldType) { + result.add(field.stringValue()); + } + } + return result.toArray(new String[0]); + } + private static ParsedDocument parseDocument(IndexShard indexShard, String index, String type, BytesReference doc, XContentType xContentType, String routing) { MapperService mapperService = indexShard.mapperService(); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java index 9e3d29f22724b..2ae9e89c8fb7a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.mapper.MapperService.MergeReason; +import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.InternalSettingsPlugin; @@ -202,7 +203,7 @@ private void testIgnoreMalfomedForValue(String value, String expectedException) IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); - assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored")); + assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored"))); } public void testChangeFormat() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index 9373a5ceeae20..68662242b6be4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -154,11 +154,15 @@ public void testDotsWithExistingMapper() throws Exception { .endObject()); ParsedDocument doc = mapper.parse(new SourceToParse("test", "type", "1", bytes, XContentType.JSON)); assertNull(doc.dynamicMappingsUpdate()); // no update! - String[] values = doc.rootDoc().getValues("foo.bar.baz"); - assertEquals(3, values.length); - assertEquals("123", values[0]); - assertEquals("456", values[1]); - assertEquals("789", values[2]); + + IndexableField[] fields = doc.rootDoc().getFields("foo.bar.baz"); + assertEquals(6, fields.length); + assertEquals(123, fields[0].numericValue()); + assertEquals("123", fields[1].stringValue()); + assertEquals(456, fields[2].numericValue()); + assertEquals("456", fields[3].stringValue()); + assertEquals(789, fields[4].numericValue()); + assertEquals("789", fields[5].stringValue()); } public void testDotsWithExistingNestedMapper() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java index a1ad67a0550ae..d9ce4e6666d81 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.test.ESSingleNodeTestCase; import java.util.Arrays; @@ -48,7 +49,7 @@ private static SortedSet set(String... values) { } void assertFieldNames(Set expected, ParsedDocument doc) { - String[] got = doc.rootDoc().getValues("_field_names"); + String[] got = TermVectorsService.getValues(doc.rootDoc().getFields("_field_names")); assertEquals(expected, set(got)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java index 6396b3988bc7d..376a2000dc281 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.InternalSettingsPlugin; @@ -194,7 +195,7 @@ public void testIgnoreMalformed() throws Exception { IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); - assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored")); + assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored"))); } public void testNullValue() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index b4d1e3b2180b6..74aa89e5d25ee 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -36,6 +36,7 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.MapperService.MergeReason; +import org.elasticsearch.index.termvectors.TermVectorsService; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; @@ -128,6 +129,9 @@ public void testDefaults() throws Exception { fieldType = fields[1].fieldType(); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); + + // used by TermVectorsService + assertArrayEquals(new String[] { "1234" }, TermVectorsService.getValues(doc.rootDoc().getFields("field"))); } public void testIgnoreAbove() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldMapperTests.java index 77953c0903fd2..3a46783ff3df5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldMapperTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.mapper; import com.carrotsearch.randomizedtesting.annotations.Timeout; - import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.Strings; @@ -32,6 +31,7 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.index.mapper.NumberFieldTypeTests.OutOfRangeSpec; +import org.elasticsearch.index.termvectors.TermVectorsService; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -250,7 +250,7 @@ public void testIgnoreMalformed() throws Exception { IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); - assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored")); + assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored"))); } } } From 1a4a6509358ae1dea0a430fd194a0d6034abbb31 Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Fri, 13 Mar 2020 18:25:40 +0100 Subject: [PATCH 2/2] fix tests --- .../index/mapper/FieldNamesFieldMapperTests.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java index d9ce4e6666d81..cf1b82ba644d4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java @@ -20,16 +20,19 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.termvectors.TermVectorsService; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType; import org.elasticsearch.test.ESSingleNodeTestCase; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -49,8 +52,14 @@ private static SortedSet set(String... values) { } void assertFieldNames(Set expected, ParsedDocument doc) { - String[] got = TermVectorsService.getValues(doc.rootDoc().getFields("_field_names")); - assertEquals(expected, set(got)); + IndexableField[] fields = doc.rootDoc().getFields("_field_names"); + List result = new ArrayList<>(); + for (IndexableField field : fields) { + if (field.fieldType() instanceof FieldNamesFieldType) { + result.add(field.stringValue()); + } + } + assertEquals(expected, set(result.toArray(new String[0]))); } public void testExtractFieldNames() {