Skip to content

Commit 52a0eca

Browse files
authored
Add match_only_text, a space-efficient variant of text. (#72064)
This adds a new `match_only_text` field, which indexes the same data as a `text` field that has `index_options: docs` and `norms: false` and uses the `_source` for positional queries like `match_phrase`. Unlike `text`, this field doesn't support scoring.
1 parent 4eefec3 commit 52a0eca

File tree

24 files changed

+2241
-44
lines changed

24 files changed

+2241
-44
lines changed

docs/reference/mapping/types.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ values.
6969
[[text-search-types]]
7070
==== Text search types
7171

72-
<<text,`text`>>:: Analyzed, unstructured text.
72+
<<text,`text` fields>>:: The text family, including `text` and `match_only_text`.
73+
Analyzed, unstructured text.
7374
{plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special
7475
markup. Used for identifying named entities.
7576
<<completion-suggester,`completion`>>:: Used for auto-complete suggestions.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
[discrete]
2+
[[match-only-text-field-type]]
3+
=== Match-only text field type
4+
5+
A variant of <<text-field-type,`text`>> that trades scoring and efficiency of
6+
positional queries for space efficiency. This field effectively stores data the
7+
same way as a `text` field that only indexes documents (`index_options: docs`)
8+
and disables norms (`norms: false`). Term queries perform as fast if not faster
9+
as on `text` fields, however queries that need positions such as the
10+
<<query-dsl-match-query-phrase,`match_phrase` query>> perform slower as they
11+
need to look at the `_source` document to verify whether a phrase matches. All
12+
queries return constant scores that are equal to 1.0.
13+
14+
Analysis is not configurable: text is always analyzed with the
15+
<<specify-index-time-default-analyzer,default analyzer>>
16+
(<<analysis-standard-analyzer,`standard`>> by default).
17+
18+
<<span-queries,span queries>> are not supported with this field, use
19+
<<query-dsl-intervals-query,interval queries>> instead, or the
20+
<<text-field-type,`text`>> field type if you absolutely need span queries.
21+
22+
Other than that, `match_only_text` supports the same queries as `text`. And
23+
like `text`, it doesn't support sorting or aggregating.
24+
25+
[source,console]
26+
--------------------------------
27+
PUT logs
28+
{
29+
"mappings": {
30+
"properties": {
31+
"@timestamp": {
32+
"type": "date"
33+
},
34+
"message": {
35+
"type": "match_only_text"
36+
}
37+
}
38+
}
39+
}
40+
--------------------------------
41+
42+
[discrete]
43+
[[match-only-text-params]]
44+
==== Parameters for match-only text fields
45+
46+
The following mapping parameters are accepted:
47+
48+
[horizontal]
49+
50+
<<multi-fields,`fields`>>::
51+
52+
Multi-fields allow the same string value to be indexed in multiple ways for
53+
different purposes, such as one field for search and a multi-field for
54+
sorting and aggregations, or the same string value analyzed by different
55+
analyzers.
56+
57+
<<mapping-field-meta,`meta`>>::
58+
59+
Metadata about the field.

docs/reference/mapping/types/text.asciidoc

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
1+
[testenv="basic"]
12
[[text]]
2-
=== Text field type
3+
=== Text type family
34
++++
45
<titleabbrev>Text</titleabbrev>
56
++++
67

8+
The text family includes the following field types:
9+
10+
* <<text-field-type,`text`>>, the traditional field type for full-text content
11+
such as the body of an email or the description of a product.
12+
* <<match-only-text-field-type,`match_only_text`>>, a space-optimized variant
13+
of `text` that disables scoring and performs slower on queries that need
14+
positions. It is best suited for indexing log messages.
15+
16+
17+
[discrete]
18+
[[text-field-type]]
19+
=== Text field type
20+
721
A field to index full-text values, such as the body of an email or the
822
description of a product. These fields are `analyzed`, that is they are passed through an
923
<<analysis,analyzer>> to convert the string into a list of individual terms
@@ -258,3 +272,5 @@ PUT my-index-000001
258272
}
259273
}
260274
--------------------------------------------------
275+
276+
include::match-only-text.asciidoc[]

modules/mapper-extras/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@ esplugin {
1616

1717
restResources {
1818
restApi {
19-
include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get'
19+
include '_common', 'cluster', 'field_caps', 'nodes', 'indices', 'index', 'search', 'get'
2020
}
2121
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.index.mapper;
10+
11+
import org.apache.lucene.analysis.CannedTokenStream;
12+
import org.apache.lucene.analysis.Token;
13+
import org.apache.lucene.analysis.TokenStream;
14+
import org.apache.lucene.index.DocValuesType;
15+
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.IndexableField;
17+
import org.apache.lucene.index.IndexableFieldType;
18+
import org.elasticsearch.common.Strings;
19+
import org.elasticsearch.common.xcontent.XContentBuilder;
20+
import org.elasticsearch.common.xcontent.XContentFactory;
21+
import org.elasticsearch.index.query.SearchExecutionContext;
22+
import org.elasticsearch.plugins.Plugin;
23+
import org.hamcrest.Matchers;
24+
25+
import java.io.IOException;
26+
import java.util.Collection;
27+
import java.util.Collections;
28+
29+
import static org.hamcrest.Matchers.containsString;
30+
import static org.hamcrest.Matchers.equalTo;
31+
import static org.hamcrest.Matchers.instanceOf;
32+
33+
public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
34+
35+
@Override
36+
protected Collection<Plugin> getPlugins() {
37+
return Collections.singleton(new MapperExtrasPlugin());
38+
}
39+
40+
@Override
41+
protected Object getSampleValueForDocument() {
42+
return "value";
43+
}
44+
45+
public final void testExists() throws IOException {
46+
MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
47+
assertExistsQuery(mapperService);
48+
assertParseMinimalWarnings();
49+
}
50+
51+
@Override
52+
protected void registerParameters(ParameterChecker checker) throws IOException {
53+
checker.registerUpdateCheck(b -> {
54+
b.field("meta", Collections.singletonMap("format", "mysql.access"));
55+
}, m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta()));
56+
}
57+
58+
@Override
59+
protected void minimalMapping(XContentBuilder b) throws IOException {
60+
b.field("type", "match_only_text");
61+
}
62+
63+
public void testDefaults() throws IOException {
64+
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
65+
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
66+
67+
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
68+
IndexableField[] fields = doc.rootDoc().getFields("field");
69+
assertEquals(1, fields.length);
70+
assertEquals("1234", fields[0].stringValue());
71+
IndexableFieldType fieldType = fields[0].fieldType();
72+
assertThat(fieldType.omitNorms(), equalTo(true));
73+
assertTrue(fieldType.tokenized());
74+
assertFalse(fieldType.stored());
75+
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
76+
assertThat(fieldType.storeTermVectors(), equalTo(false));
77+
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
78+
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
79+
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
80+
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
81+
}
82+
83+
public void testNullConfigValuesFail() throws MapperParsingException {
84+
Exception e = expectThrows(
85+
MapperParsingException.class,
86+
() -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("meta", (String) null)))
87+
);
88+
assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [match_only_text] must not have a [null] value"));
89+
}
90+
91+
public void testSimpleMerge() throws IOException {
92+
XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text"));
93+
MapperService mapperService = createMapperService(startingMapping);
94+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
95+
96+
merge(mapperService, startingMapping);
97+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
98+
99+
XContentBuilder newField = mapping(b -> {
100+
b.startObject("field")
101+
.field("type", "match_only_text")
102+
.startObject("meta")
103+
.field("key", "value")
104+
.endObject()
105+
.endObject();
106+
b.startObject("other_field").field("type", "keyword").endObject();
107+
});
108+
merge(mapperService, newField);
109+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
110+
assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class));
111+
}
112+
113+
public void testDisabledSource() throws IOException {
114+
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc");
115+
{
116+
mapping.startObject("properties");
117+
{
118+
mapping.startObject("foo");
119+
{
120+
mapping.field("type", "match_only_text");
121+
}
122+
mapping.endObject();
123+
}
124+
mapping.endObject();
125+
126+
mapping.startObject("_source");
127+
{
128+
mapping.field("enabled", false);
129+
}
130+
mapping.endObject();
131+
}
132+
mapping.endObject().endObject();
133+
134+
MapperService mapperService = createMapperService(mapping);
135+
MappedFieldType ft = mapperService.fieldType("foo");
136+
SearchExecutionContext context = createSearchExecutionContext(mapperService);
137+
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7));
138+
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context));
139+
assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled"));
140+
141+
// Term queries are ok
142+
ft.termQuery("a", context); // no exception
143+
}
144+
145+
@Override
146+
protected Object generateRandomInputValue(MappedFieldType ft) {
147+
assumeFalse("We don't have a way to assert things here", true);
148+
return null;
149+
}
150+
151+
@Override
152+
protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
153+
assumeFalse("We don't have a way to assert things here", true);
154+
}
155+
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public Map<String, Mapper.TypeParser> getMappers() {
2929
mappers.put(RankFeatureFieldMapper.CONTENT_TYPE, RankFeatureFieldMapper.PARSER);
3030
mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, RankFeaturesFieldMapper.PARSER);
3131
mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, SearchAsYouTypeFieldMapper.PARSER);
32+
mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER);
3233
return Collections.unmodifiableMap(mappers);
3334
}
3435

0 commit comments

Comments
 (0)