Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/86110.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 86110
summary: Add LimitedOffsetsEnum to Limited offset token
area: Search
type: enhancement
issues:
- 86109
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
private final Integer queryMaxAnalyzedOffset;

CustomFieldHighlighter(
String field,
Expand All @@ -46,11 +47,13 @@ class CustomFieldHighlighter extends FieldHighlighter {
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter,
int noMatchSize
int noMatchSize,
Integer queryMaxAnalyzedOffset
) {
super(field, fieldOffsetStrategy, breakIterator, passageScorer, maxPassages, maxNoHighlightPassages, passageFormatter);
this.breakIteratorLocale = breakIteratorLocale;
this.noMatchSize = noMatchSize;
this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
}

FieldOffsetStrategy getFieldOffsetStrategy() {
Expand Down Expand Up @@ -106,6 +109,10 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {

if (queryMaxAnalyzedOffset != null) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
}

final int contentLength = this.breakIterator.getText().getEndIndex();

if (off.nextPosition() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ protected CustomFieldHighlighter getFieldHighlighter(String field, Query query,
maxPassages,
(noMatchSize > 0 ? 1 : 0),
getFormatter(field),
noMatchSize
noMatchSize,
queryMaxAnalyzedOffset
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.search.uhighlight;

import org.apache.lucene.search.uhighlight.OffsetsEnum;
import org.apache.lucene.util.BytesRef;

import java.io.IOException;

public class LimitedOffsetsEnum extends OffsetsEnum {
private final OffsetsEnum delegate;
private final int maxOffset;

public LimitedOffsetsEnum(OffsetsEnum delegate, int maxOffset) {
this.delegate = delegate;
this.maxOffset = maxOffset;
}

@Override
public boolean nextPosition() throws IOException {
boolean next = delegate.nextPosition();
if (next == false) {
return next;
}
if (delegate.startOffset() > maxOffset) {
return false;
}
return next;
}

@Override
public int freq() throws IOException {
return delegate.freq();
}

@Override
public BytesRef getTerm() throws IOException {
return delegate.getTerm();
}

@Override
public int startOffset() throws IOException {
return delegate.startOffset();
}

@Override
public int endOffset() throws IOException {
return delegate.endOffset();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.elasticsearch.lucene.search.uhighlight;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
Expand Down Expand Up @@ -40,6 +41,8 @@

import java.text.BreakIterator;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
import static org.hamcrest.CoreMatchers.equalTo;
Expand Down Expand Up @@ -82,6 +85,34 @@ private void assertHighlightOneDoc(
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
) throws Exception {
assertHighlightOneDoc(
fieldName,
inputs,
analyzer,
query,
locale,
breakIterator,
noMatchSize,
expectedPassages,
maxAnalyzedOffset,
queryMaxAnalyzedOffset,
UnifiedHighlighter.OffsetSource.ANALYSIS
);
}

private void assertHighlightOneDoc(
String fieldName,
String[] inputs,
Analyzer analyzer,
Query query,
Locale locale,
BreakIterator breakIterator,
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
UnifiedHighlighter.OffsetSource offsetSource
) throws Exception {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
Expand All @@ -106,7 +137,7 @@ private void assertHighlightOneDoc(
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
searcher,
analyzer,
UnifiedHighlighter.OffsetSource.ANALYSIS,
offsetSource,
new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()),
locale,
breakIterator,
Expand Down Expand Up @@ -394,4 +425,72 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
10
);
}

public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {

TermQuery query = new TermQuery(new Term("text", "Fun"));
Analyzer analyzer = new WhitespaceAnalyzer();
assertHighlightOneDoc(
"text",
new String[] { "Testing Fun Testing Fun" },
analyzer,
query,
Locale.ROOT,
BreakIterator.getSentenceInstance(Locale.ROOT),
0,
new String[] { "Testing <b>Fun</b> Testing Fun" },
29,
10,
UnifiedHighlighter.OffsetSource.ANALYSIS
);
assertHighlightOneDoc(
"text",
new String[] { "Testing Fun Testing Fun" },
analyzer,
query,
Locale.ROOT,
BreakIterator.getSentenceInstance(Locale.ROOT),
0,
new String[] { "Testing <b>Fun</b> Testing Fun" },
29,
10,
UnifiedHighlighter.OffsetSource.POSTINGS
);
}

public void testExceedMaxAnalyzedOffsetRandomOffset() throws Exception {
TermQuery query = new TermQuery(new Term("text", "fun"));
Analyzer analyzer = new WhitespaceAnalyzer();
UnifiedHighlighter.OffsetSource offsetSource = randomBoolean()
? UnifiedHighlighter.OffsetSource.ANALYSIS
: UnifiedHighlighter.OffsetSource.POSTINGS;
final String[] inputs = { "Fun fun fun fun fun" };
TreeMap<Integer, String> outputs = new TreeMap<>(
Map.of(
7,
"Fun <b>fun</b> fun fun fun",
11,
"Fun <b>fun</b> <b>fun</b> fun fun",
15,
"Fun <b>fun</b> <b>fun</b> <b>fun</b> fun",
19,
"Fun <b>fun</b> <b>fun</b> <b>fun</b> <b>fun</b>"
)
);
Integer randomOffset = between(7, 19);
String output = outputs.ceilingEntry(randomOffset).getValue();
assertHighlightOneDoc(
"text",
inputs,
analyzer,
query,
Locale.ROOT,
BreakIterator.getSentenceInstance(Locale.ROOT),
0,
new String[] { output },
47,
randomOffset,
offsetSource
);
}
}