diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 2aadfd2218590..9777174563626 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -54,6 +54,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import java.io.IOException; import java.io.Reader; @@ -315,45 +316,12 @@ public AnnotationToken getAnnotation(int index) { // When asked to tokenize plain-text versions by the highlighter it tokenizes the // original markup form in order to inject annotations. public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper { - private Analyzer delegate; - private AnnotatedText[] annotations; - public AnnotatedHighlighterAnalyzer(Analyzer delegate){ + private final Analyzer delegate; + private final HitContext hitContext; + public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){ super(delegate.getReuseStrategy()); this.delegate = delegate; - } - - public void init(String[] markedUpFieldValues) { - this.annotations = new AnnotatedText[markedUpFieldValues.length]; - for (int i = 0; i < markedUpFieldValues.length; i++) { - annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]); - } - } - - public String [] getPlainTextValuesForHighlighter(){ - String [] result = new String[annotations.length]; - for (int i = 0; i < annotations.length; i++) { - result[i] = annotations[i].textMinusMarkup; - } - return result; - } - - public AnnotationToken[] getIntersectingAnnotations(int start, int end) { - List intersectingAnnotations = new ArrayList<>(); - int fieldValueOffset =0; - for (AnnotatedText fieldValueAnnotations : this.annotations) { - //This is called from a highlighter where all of the field values are concatenated - // so each annotation offset will need to be adjusted so that it takes into account - // the previous values AND the MULTIVAL delimiter - for (AnnotationToken token : fieldValueAnnotations.annotations) { - if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) { - intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, - token.endOffset + fieldValueOffset, token.value)); - } - } - //add 1 for the fieldvalue separator character - fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1; - } - return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]); + this.hitContext = hitContext; } @Override @@ -364,10 +332,11 @@ public Analyzer getWrappedAnalyzer(String fieldName) { @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream()); + AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName()); AtomicInteger readerNum = new AtomicInteger(0); return new TokenStreamComponents(r -> { String plainText = readToString(r); - AnnotatedText at = this.annotations[readerNum.getAndIncrement()]; + AnnotatedText at = annotations[readerNum.getAndIncrement()]; assert at.textMinusMarkup.equals(plainText); injector.setAnnotations(at); components.getSource().accept(new StringReader(at.textMinusMarkup)); diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java index ad1acc85031dd..7d360dd0b9bac 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java @@ -23,7 +23,7 @@ import org.apache.lucene.search.uhighlight.Passage; import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.Snippet; -import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken; import java.io.UnsupportedEncodingException; @@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter { public static final String SEARCH_HIT_TYPE = "_hit_term"; private final Encoder encoder; - private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer; + AnnotatedText[] annotations; - public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) { - this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer; + public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) { this.encoder = encoder; + this.annotations = annotations; } static class MarkupPassage { @@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) { int pos; int j = 0; for (Passage passage : passages) { - AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), + AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(), passage.getEndOffset()); MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage); @@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) { } return snippets; } + + public AnnotationToken[] getIntersectingAnnotations(int start, int end) { + List intersectingAnnotations = new ArrayList<>(); + int fieldValueOffset =0; + for (AnnotatedText fieldValueAnnotations : this.annotations) { + //This is called from a highlighter where all of the field values are concatenated + // so each annotation offset will need to be adjusted so that it takes into account + // the previous values AND the MULTIVAL delimiter + for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) { + AnnotationToken token = fieldValueAnnotations.getAnnotation(i); + if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) { + intersectingAnnotations + .add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + + fieldValueOffset, token.value)); + } + } + //add 1 for the fieldvalue separator character + fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1; + } + return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]); + } private void append(StringBuilder dest, String content, int start, int end) { dest.append(encoder.encodeText(content.substring(start, end))); diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java index d93316c78921a..2ba7838b90950 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java @@ -25,24 +25,22 @@ import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; public class AnnotatedTextHighlighter extends UnifiedHighlighter { public static final String NAME = "annotated"; - - AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null; @Override - protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) { - annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type)); - return annotatedHighlighterAnalyzer; + protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) { + return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext); } // Convert the marked-up values held on-disk to plain-text versions for highlighting @@ -51,14 +49,26 @@ protected List loadFieldValues(MappedFieldType fieldType, Field field, S throws IOException { List fieldValues = super.loadFieldValues(fieldType, field, context, hitContext); String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]); - annotatedHighlighterAnalyzer.init(fieldValuesAsString); - return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter()); + + AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length]; + for (int i = 0; i < fieldValuesAsString.length; i++) { + annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]); + } + // Store the annotations in the hitContext + hitContext.cache().put(AnnotatedText.class.getName(), annotations); + + ArrayList result = new ArrayList<>(annotations.length); + for (int i = 0; i < annotations.length; i++) { + result.add(annotations[i].textMinusMarkup); + } + return result; } @Override - protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) { - return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder); - + protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) { + // Retrieve the annotations from the hitContext + AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName()); + return new AnnotatedPassageFormatter(annotations, encoder); } } diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java index ca29521802fe2..8630dc870f8c7 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java @@ -40,18 +40,20 @@ import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator; import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter; -import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.Snippet; import org.apache.lucene.search.uhighlight.SplittingBreakIterator; import org.apache.lucene.store.Directory; import org.elasticsearch.common.Strings; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; +import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter; import org.elasticsearch.test.ESTestCase; import java.net.URLEncoder; import java.text.BreakIterator; +import java.util.ArrayList; import java.util.Locale; import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; @@ -63,13 +65,24 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception { + // Annotated fields wrap the usual analyzer with one that injects extra tokens Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer()); - AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer); - hiliteAnalyzer.init(markedUpInputs); - PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder()); - String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter(); + HitContext mockHitContext = new HitContext(); + AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext); + + AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length]; + for (int i = 0; i < markedUpInputs.length; i++) { + annotations[i] = AnnotatedText.parse(markedUpInputs[i]); + } + mockHitContext.cache().put(AnnotatedText.class.getName(), annotations); + AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder()); + + ArrayList plainTextForHighlighter = new ArrayList<>(annotations.length); + for (int i = 0; i < annotations.length; i++) { + plainTextForHighlighter.add(annotations[i].textMinusMarkup); + } Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer); @@ -94,7 +107,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs, iw.close(); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER); assertThat(topDocs.totalHits.value, equalTo(1L)); - String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR)); + String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR)); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null, passageFormatter, locale, diff --git a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml index a27e6c44a7fa2..63516516252a9 100644 --- a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml +++ b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml @@ -42,3 +42,77 @@ body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } } - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."} + +--- +"issue 39395 thread safety issue -requires multiple calls to reveal": + - skip: + version: " - 6.4.99" + reason: Annotated text type introduced in 6.5.0 + + - do: + indices.create: + index: annotated + body: + settings: + number_of_shards: "5" + number_of_replicas: "0" + mappings: + properties: + my_field: + type: annotated_text + + - do: + index: + index: annotated + id: 1 + body: + "my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)" + - do: + index: + index: annotated + id: 2 + body: + "my_field" : "[A](~MARK0) [C](~MARK2)" + refresh: true + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + + - do: + search: + request_cache: false + body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } } + - match: {_shards.failed: 0} + diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java index 84154926bf665..8a8e4e8d77ff6 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java @@ -74,7 +74,6 @@ public Map cache() { } return cache; } - } /** diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index 123e18a4da618..af37fa4edab19 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -68,12 +69,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) { int numberOfFragments; try { - final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType); + final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType, + hitContext); List fieldValues = loadFieldValues(fieldType, field, context, hitContext); if (fieldValues.size() == 0) { return null; } - final PassageFormatter passageFormatter = getPassageFormatter(field, encoder); + final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder); final IndexSearcher searcher = new IndexSearcher(hitContext.reader()); final CustomUnifiedHighlighter highlighter; final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR); @@ -140,14 +142,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) { return null; } - protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) { + protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) { CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder); return passageFormatter; } - protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) { + protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) { return HighlightUtils.getAnalyzer(docMapper, type); }