diff --git a/docs/changelog/86110.yaml b/docs/changelog/86110.yaml
new file mode 100644
index 0000000000000..376cadaa56602
--- /dev/null
+++ b/docs/changelog/86110.yaml
@@ -0,0 +1,6 @@
+pr: 86110
+summary: Add LimitedOffsetsEnum to Limited offset token
+area: Search
+type: enhancement
+issues:
+ - 86109
diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java
index 640150323da51..cd781829dd08c 100644
--- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java
@@ -36,6 +36,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
+ private final Integer queryMaxAnalyzedOffset;
CustomFieldHighlighter(
String field,
@@ -46,11 +47,13 @@ class CustomFieldHighlighter extends FieldHighlighter {
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter,
- int noMatchSize
+ int noMatchSize,
+ Integer queryMaxAnalyzedOffset
) {
super(field, fieldOffsetStrategy, breakIterator, passageScorer, maxPassages, maxNoHighlightPassages, passageFormatter);
this.breakIteratorLocale = breakIteratorLocale;
this.noMatchSize = noMatchSize;
+ this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
}
FieldOffsetStrategy getFieldOffsetStrategy() {
@@ -106,6 +109,10 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
+ if (queryMaxAnalyzedOffset != null) {
+ off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
+ }
+
final int contentLength = this.breakIterator.getText().getEndIndex();
if (off.nextPosition() == false) {
diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java
index 443520ac47d55..ca5d50ba10e89 100644
--- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -193,7 +193,8 @@ protected CustomFieldHighlighter getFieldHighlighter(String field, Query query,
maxPassages,
(noMatchSize > 0 ? 1 : 0),
getFormatter(field),
- noMatchSize
+ noMatchSize,
+ queryMaxAnalyzedOffset
);
}
diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/LimitedOffsetsEnum.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/LimitedOffsetsEnum.java
new file mode 100644
index 0000000000000..aebe135d4db53
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/LimitedOffsetsEnum.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.lucene.search.uhighlight;
+
+import org.apache.lucene.search.uhighlight.OffsetsEnum;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+public class LimitedOffsetsEnum extends OffsetsEnum {
+ private final OffsetsEnum delegate;
+ private final int maxOffset;
+
+ public LimitedOffsetsEnum(OffsetsEnum delegate, int maxOffset) {
+ this.delegate = delegate;
+ this.maxOffset = maxOffset;
+ }
+
+ @Override
+ public boolean nextPosition() throws IOException {
+ boolean next = delegate.nextPosition();
+ if (next == false) {
+ return next;
+ }
+ if (delegate.startOffset() > maxOffset) {
+ return false;
+ }
+ return next;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return delegate.freq();
+ }
+
+ @Override
+ public BytesRef getTerm() throws IOException {
+ return delegate.getTerm();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return delegate.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return delegate.endOffset();
+ }
+}
diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
index e81f5834f60ae..74d70f179697b 100644
--- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
+++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java
@@ -9,6 +9,7 @@
package org.elasticsearch.lucene.search.uhighlight;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -40,6 +41,8 @@
import java.text.BreakIterator;
import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
import static org.hamcrest.CoreMatchers.equalTo;
@@ -82,6 +85,34 @@ private void assertHighlightOneDoc(
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
+ ) throws Exception {
+ assertHighlightOneDoc(
+ fieldName,
+ inputs,
+ analyzer,
+ query,
+ locale,
+ breakIterator,
+ noMatchSize,
+ expectedPassages,
+ maxAnalyzedOffset,
+ queryMaxAnalyzedOffset,
+ UnifiedHighlighter.OffsetSource.ANALYSIS
+ );
+ }
+
+ private void assertHighlightOneDoc(
+ String fieldName,
+ String[] inputs,
+ Analyzer analyzer,
+ Query query,
+ Locale locale,
+ BreakIterator breakIterator,
+ int noMatchSize,
+ String[] expectedPassages,
+ int maxAnalyzedOffset,
+ Integer queryMaxAnalyzedOffset,
+ UnifiedHighlighter.OffsetSource offsetSource
) throws Exception {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
@@ -106,7 +137,7 @@ private void assertHighlightOneDoc(
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
searcher,
analyzer,
- UnifiedHighlighter.OffsetSource.ANALYSIS,
+ offsetSource,
new CustomPassageFormatter("", "", new DefaultEncoder()),
locale,
breakIterator,
@@ -394,4 +425,72 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
10
);
}
+
+ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
+
+ TermQuery query = new TermQuery(new Term("text", "Fun"));
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ assertHighlightOneDoc(
+ "text",
+ new String[] { "Testing Fun Testing Fun" },
+ analyzer,
+ query,
+ Locale.ROOT,
+ BreakIterator.getSentenceInstance(Locale.ROOT),
+ 0,
+ new String[] { "Testing Fun Testing Fun" },
+ 29,
+ 10,
+ UnifiedHighlighter.OffsetSource.ANALYSIS
+ );
+ assertHighlightOneDoc(
+ "text",
+ new String[] { "Testing Fun Testing Fun" },
+ analyzer,
+ query,
+ Locale.ROOT,
+ BreakIterator.getSentenceInstance(Locale.ROOT),
+ 0,
+ new String[] { "Testing Fun Testing Fun" },
+ 29,
+ 10,
+ UnifiedHighlighter.OffsetSource.POSTINGS
+ );
+ }
+
+ public void testExceedMaxAnalyzedOffsetRandomOffset() throws Exception {
+ TermQuery query = new TermQuery(new Term("text", "fun"));
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ UnifiedHighlighter.OffsetSource offsetSource = randomBoolean()
+ ? UnifiedHighlighter.OffsetSource.ANALYSIS
+ : UnifiedHighlighter.OffsetSource.POSTINGS;
+ final String[] inputs = { "Fun fun fun fun fun" };
+ TreeMap outputs = new TreeMap<>(
+ Map.of(
+ 7,
+ "Fun fun fun fun fun",
+ 11,
+ "Fun fun fun fun fun",
+ 15,
+ "Fun fun fun fun fun",
+ 19,
+ "Fun fun fun fun fun"
+ )
+ );
+ Integer randomOffset = between(7, 19);
+ String output = outputs.ceilingEntry(randomOffset).getValue();
+ assertHighlightOneDoc(
+ "text",
+ inputs,
+ analyzer,
+ query,
+ Locale.ROOT,
+ BreakIterator.getSentenceInstance(Locale.ROOT),
+ 0,
+ new String[] { output },
+ 47,
+ randomOffset,
+ offsetSource
+ );
+ }
}