From d480c0b31420984691aeecb72386415707659e8c Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Thu, 19 Nov 2020 20:13:55 +0100 Subject: [PATCH] Make document parsing aware of runtime fields (#65210) Runtime fields are defined in a separate runtime section in the mappings. Since the runtime section was introduced, runtime fields are not taken into account when parsing documents. That means that if a document gets indexed that holds a field that's already defined as a runtime field, the field gets dynamically mapped as a concrete field although it will always be shadowed by the runtime field defined with the same name. A more sensible default would be to instead consider runtime fields like ordinary mapped fields, so a dynamic update is not necessary whenever a field is defined as part of the runtime section. As a consequence, the field does not get indexed. If users prefer to keep indexing the field although it is shadowed, we consider this an exception, and they can do so by mapping the field under properties explicitly. Relates to #62906 --- .../index/mapper/DocumentParser.java | 110 ++++++++++++++++-- .../index/mapper/RootObjectMapper.java | 4 + .../index/query/TermsQueryBuilder.java | 2 +- .../index/mapper/DocumentParserTests.java | 83 +++++++++++-- .../index/mapper/DynamicMappingTests.java | 41 +++++-- 5 files changed, 211 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index c806ed253e30a..b04ba6750bb1e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -21,6 +21,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; import org.elasticsearch.common.Strings; @@ -29,15 +30,19 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.mapper.DynamicTemplate.XContentFieldType; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Objects; @@ -224,7 +229,7 @@ static Mapping createDynamicUpdate(Mapping mapping, DocumentMapper docMapper, Li // We build a mapping by first sorting the mappers, so that all mappers containing a common prefix // will be processed in a contiguous block. When the prefix is no longer seen, we pop the extra elements // off the stack, merging them upwards into the existing mappers. - Collections.sort(dynamicMappers, (Mapper o1, Mapper o2) -> o1.name().compareTo(o2.name())); + dynamicMappers.sort(Comparator.comparing(Mapper::name)); Iterator dynamicMapperItr = dynamicMappers.iterator(); List parentMappers = new ArrayList<>(); Mapper firstUpdate = dynamicMapperItr.next(); @@ -829,14 +834,14 @@ private static Tuple getDynamicParentMapper(ParseContext int pathsAdded = 0; ObjectMapper parent = mapper; for (int i = 0; i < paths.length-1; i++) { - String currentPath = context.path().pathAsText(paths[i]); - Mapper existingFieldMapper = context.docMapper().mappers().getMapper(currentPath); - if (existingFieldMapper != null) { - throw new MapperParsingException( + String currentPath = context.path().pathAsText(paths[i]); + Mapper existingFieldMapper = context.docMapper().mappers().getMapper(currentPath); + if (existingFieldMapper != null) { + throw new MapperParsingException( "Could not dynamically add mapping for field [{}]. Existing mapping for [{}] must be of type object but found [{}].", null, String.join(".", paths), currentPath, existingFieldMapper.typeName()); - } - mapper = context.docMapper().mappers().objectMappers().get(currentPath); + } + mapper = context.docMapper().mappers().objectMappers().get(currentPath); if (mapper == null) { // One mapping is missing, check if we are allowed to create a dynamic one. ObjectMapper.Dynamic dynamic = dynamicOrDefault(parent, context); @@ -905,7 +910,7 @@ private static Mapper getMapper(final ParseContext context, ObjectMapper objectM for (int i = 0; i < subfields.length - 1; ++i) { mapper = objectMapper.getMapper(subfields[i]); - if (mapper == null || (mapper instanceof ObjectMapper) == false) { + if (mapper instanceof ObjectMapper == false) { return null; } objectMapper = (ObjectMapper)mapper; @@ -915,6 +920,93 @@ private static Mapper getMapper(final ParseContext context, ObjectMapper objectM + mapper.name() + "]"); } } - return objectMapper.getMapper(subfields[subfields.length - 1]); + String leafName = subfields[subfields.length - 1]; + mapper = objectMapper.getMapper(leafName); + if (mapper != null) { + return mapper; + } + //concrete fields take the precedence over runtime fields when parsing documents, though when a field is defined as runtime field + //only, and not under properties, it is ignored when it is sent as part of _source + RuntimeFieldType runtimeFieldType = context.docMapper().mapping().root.getRuntimeFieldType(fieldPath); + if (runtimeFieldType != null) { + return new NoOpFieldMapper(leafName, runtimeFieldType); + } + return null; + } + + private static class NoOpFieldMapper extends FieldMapper { + NoOpFieldMapper(String simpleName, RuntimeFieldType runtimeField) { + super(simpleName, new MappedFieldType(runtimeField.name(), false, false, false, TextSearchInfo.NONE, Collections.emptyMap()) { + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + throw new UnsupportedOperationException(); + } + + @Override + public String typeName() { + throw new UnsupportedOperationException(); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new UnsupportedOperationException(); + } + }, MultiFields.empty(), CopyTo.empty()); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + //field defined as runtime field, don't index anything + } + + @Override + public String name() { + throw new UnsupportedOperationException(); + } + + @Override + public String typeName() { + throw new UnsupportedOperationException(); + } + + @Override + public MappedFieldType fieldType() { + throw new UnsupportedOperationException(); + } + + @Override + public MultiFields multiFields() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + @Override + protected void doValidate(MappingLookup mappers) { + throw new UnsupportedOperationException(); + } + + @Override + protected void checkIncomingMergeType(FieldMapper mergeWith) { + throw new UnsupportedOperationException(); + } + + @Override + public Builder getMergeBuilder() { + throw new UnsupportedOperationException(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + throw new UnsupportedOperationException(); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java index 88502eb61ba66..ff26592ce83f3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java @@ -274,6 +274,10 @@ Collection runtimeFieldTypes() { return runtimeFieldTypes.values(); } + RuntimeFieldType getRuntimeFieldType(String name) { + return runtimeFieldTypes.get(name); + } + public Mapper.Builder findTemplateBuilder(ParseContext context, String name, XContentFieldType matchType) { return findTemplateBuilder(context, name, matchType, null); } diff --git a/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java index c871426799302..a7c64ee46b972 100644 --- a/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java @@ -444,7 +444,7 @@ private void fetch(TermsLookup termsLookup, Client client, ActionListener { List terms = new ArrayList<>(); if (getResponse.isSourceEmpty() == false) { // extract terms only if the doc source exists - List extractedValues = XContentMapValues.extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); + List extractedValues = XContentMapValues. extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); terms.addAll(extractedValues); } delegatedListener.onResponse(terms); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index 73cf58b8151af..661432bf9e8f7 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; @@ -59,20 +60,82 @@ protected Collection getPlugins() { return org.elasticsearch.common.collect.List.of(new DocumentParserTestsPlugin(), new TestRuntimeField.Plugin()); } - public void testDynamicUpdateWithRuntimeField() throws Exception { + public void testParseWithRuntimeField() throws Exception { DocumentMapper mapper = createDocumentMapper(runtimeFieldMapping(b -> b.field("type", "test"))); - ParsedDocument doc = mapper.parse(source(b -> b.field("test", "value"))); - RootObjectMapper root = doc.dynamicMappingsUpdate().root; - assertEquals(0, root.runtimeFieldTypes().size()); - assertNotNull(root.getMapper("test")); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); + //field defined as runtime field but not under properties: no dynamic updates, the field does not get indexed + assertNull(doc.dynamicMappingsUpdate()); + assertNull(doc.rootDoc().getField("field")); } - public void testDynamicUpdateWithRuntimeFieldSameName() throws Exception { - DocumentMapper mapper = createDocumentMapper(runtimeFieldMapping(b -> b.field("type", "test"))); + public void testParseWithShadowedField() throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + builder.startObject("runtime"); + builder.startObject("field").field("type", "test").endObject(); + builder.endObject(); + builder.startObject("properties"); + builder.startObject("field").field("type", "keyword").endObject(); + builder.endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(builder); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); - RootObjectMapper root = doc.dynamicMappingsUpdate().root; - assertEquals(0, root.runtimeFieldTypes().size()); - assertNotNull(root.getMapper("field")); + //field defined as runtime field as well as under properties: no dynamic updates, the field gets indexed + assertNull(doc.dynamicMappingsUpdate()); + assertNotNull(doc.rootDoc().getField("field")); + } + + public void testParseWithRuntimeFieldDottedNameDisabledObject() throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + builder.startObject("runtime"); + builder.startObject("path1.path2.path3.field").field("type", "test").endObject(); + builder.endObject(); + builder.startObject("properties"); + builder.startObject("path1").field("type", "object").field("enabled", false).endObject(); + builder.endObject().endObject().endObject(); + MapperService mapperService = createMapperService(builder); + ParsedDocument doc = mapperService.documentMapper().parse(source(b -> { + b.startObject("path1").startObject("path2").startObject("path3"); + b.field("field", "value"); + b.endObject().endObject().endObject(); + })); + assertNull(doc.dynamicMappingsUpdate()); + assertNull(doc.rootDoc().getField("path1.path2.path3.field")); + } + + public void testParseWithShadowedSubField() throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + builder.startObject("runtime"); + builder.startObject("field.keyword").field("type", "test").endObject(); + builder.endObject(); + builder.startObject("properties"); + builder.startObject("field").field("type", "text"); + builder.startObject("fields").startObject("keyword").field("type", "keyword").endObject().endObject(); + builder.endObject().endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(builder); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); + //field defined as runtime field as well as under properties: no dynamic updates, the field gets indexed + assertNull(doc.dynamicMappingsUpdate()); + assertNotNull(doc.rootDoc().getField("field")); + assertNotNull(doc.rootDoc().getField("field.keyword")); + } + + public void testParseWithShadowedMultiField() throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + builder.startObject("runtime"); + builder.startObject("field").field("type", "test").endObject(); + builder.endObject(); + builder.startObject("properties"); + builder.startObject("field").field("type", "text"); + builder.startObject("fields").startObject("keyword").field("type", "keyword").endObject().endObject(); + builder.endObject().endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(builder); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value"))); + //field defined as runtime field as well as under properties: no dynamic updates, the field gets indexed + assertNull(doc.dynamicMappingsUpdate()); + assertNotNull(doc.rootDoc().getField("field")); + assertNotNull(doc.rootDoc().getField("field.keyword")); } public void testFieldDisabled() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java index 87410bd7df44e..41f2f1b77e088 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java @@ -192,20 +192,43 @@ public void testDynamicUpdateWithRuntimeField() throws Exception { Mapping merged = mapperService.documentMapper().mapping(); assertNotNull(merged.root.getMapper("test")); assertEquals(1, merged.root.runtimeFieldTypes().size()); - assertEquals("field", merged.root.runtimeFieldTypes().iterator().next().name()); + assertNotNull(merged.root.getRuntimeFieldType("field")); } - public void testDynamicUpdateWithRuntimeFieldSameName() throws Exception { - MapperService mapperService = createMapperService(runtimeFieldMapping(b -> b.field("type", "test"))); - ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "value"))); - assertEquals("{\"_doc\":{\"properties\":{" + - "\"field\":{\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}}}}}", - Strings.toString(doc.dynamicMappingsUpdate().root)); + public void testDynamicUpdateWithRuntimeFieldDottedName() throws Exception { + MapperService mapperService = createMapperService(runtimeMapping( + b -> b.startObject("path1.path2.path3.field").field("type", "test").endObject())); + ParsedDocument doc = mapperService.documentMapper().parse(source(b -> { + b.startObject("path1").startObject("path2").startObject("path3"); + b.field("field", "value"); + b.endObject().endObject().endObject(); + })); + RootObjectMapper root = doc.dynamicMappingsUpdate().root; + assertEquals(0, root.runtimeFieldTypes().size()); + { + //the runtime field is defined but the object structure is not, hence it is defined under properties + Mapper path1 = root.getMapper("path1"); + assertThat(path1, instanceOf(ObjectMapper.class)); + Mapper path2 = ((ObjectMapper) path1).getMapper("path2"); + assertThat(path2, instanceOf(ObjectMapper.class)); + Mapper path3 = ((ObjectMapper) path2).getMapper("path3"); + assertThat(path3, instanceOf(ObjectMapper.class)); + assertFalse(path3.iterator().hasNext()); + } + assertNull(doc.rootDoc().getField("path1.path2.path3.field")); merge(mapperService, dynamicMapping(doc.dynamicMappingsUpdate())); Mapping merged = mapperService.documentMapper().mapping(); - assertNotNull(merged.root.getMapper("field")); + { + Mapper path1 = merged.root.getMapper("path1"); + assertThat(path1, instanceOf(ObjectMapper.class)); + Mapper path2 = ((ObjectMapper) path1).getMapper("path2"); + assertThat(path2, instanceOf(ObjectMapper.class)); + Mapper path3 = ((ObjectMapper) path2).getMapper("path3"); + assertThat(path3, instanceOf(ObjectMapper.class)); + assertFalse(path3.iterator().hasNext()); + } assertEquals(1, merged.root.runtimeFieldTypes().size()); - assertEquals("field", merged.root.runtimeFieldTypes().iterator().next().name()); + assertNotNull(merged.root.getRuntimeFieldType("path1.path2.path3.field")); } public void testIncremental() throws Exception {