Skip to content

Commit 5d5e701

Browse files
authored
1. Add changelog AND Random tests for different offset source in CustomUnifiedHighlighterTests
1 parent 1a009ea commit 5d5e701

File tree

2 files changed

+71
-65
lines changed

2 files changed

+71
-65
lines changed

docs/changelog/86110.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 86110
2+
summary: Add LimitedOffsetsEnum to Limited offset token
3+
area: Search/Highlighting
4+
type: enhancement
5+
issues:
6+
- 86109

server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java

Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@
3737
import org.apache.lucene.tests.index.RandomIndexWriter;
3838
import org.elasticsearch.common.Strings;
3939
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
40-
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
4140
import org.elasticsearch.test.ESTestCase;
4241

4342
import java.text.BreakIterator;
4443
import java.util.Locale;
44+
import java.util.Map;
45+
import java.util.TreeMap;
4546

4647
import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
4748
import static org.hamcrest.CoreMatchers.equalTo;
@@ -84,6 +85,34 @@ private void assertHighlightOneDoc(
8485
String[] expectedPassages,
8586
int maxAnalyzedOffset,
8687
Integer queryMaxAnalyzedOffset
88+
) throws Exception {
89+
assertHighlightOneDoc(
90+
fieldName,
91+
inputs,
92+
analyzer,
93+
query,
94+
locale,
95+
breakIterator,
96+
noMatchSize,
97+
expectedPassages,
98+
maxAnalyzedOffset,
99+
queryMaxAnalyzedOffset,
100+
UnifiedHighlighter.OffsetSource.ANALYSIS
101+
);
102+
}
103+
104+
private void assertHighlightOneDoc(
105+
String fieldName,
106+
String[] inputs,
107+
Analyzer analyzer,
108+
Query query,
109+
Locale locale,
110+
BreakIterator breakIterator,
111+
int noMatchSize,
112+
String[] expectedPassages,
113+
int maxAnalyzedOffset,
114+
Integer queryMaxAnalyzedOffset,
115+
UnifiedHighlighter.OffsetSource offsetSource
87116
) throws Exception {
88117
try (Directory dir = newDirectory()) {
89118
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
@@ -108,7 +137,7 @@ private void assertHighlightOneDoc(
108137
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
109138
searcher,
110139
analyzer,
111-
UnifiedHighlighter.OffsetSource.ANALYSIS,
140+
offsetSource,
112141
new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()),
113142
locale,
114143
breakIterator,
@@ -397,63 +426,6 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
397426
);
398427
}
399428

400-
private void assertHighlightOneDoc(
401-
String fieldName,
402-
String[] inputs,
403-
Analyzer analyzer,
404-
Query query,
405-
Locale locale,
406-
BreakIterator breakIterator,
407-
int noMatchSize,
408-
String[] expectedPassages,
409-
int maxAnalyzedOffset,
410-
Integer queryMaxAnalyzedOffset,
411-
UnifiedHighlighter.OffsetSource offsetSource
412-
) throws Exception {
413-
try (Directory dir = newDirectory()) {
414-
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
415-
iwc.setMergePolicy(newTieredMergePolicy(random()));
416-
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
417-
FieldType ft = new FieldType(TextField.TYPE_STORED);
418-
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
419-
ft.freeze();
420-
Document doc = new Document();
421-
for (String input : inputs) {
422-
Field field = new Field(fieldName, "", ft);
423-
field.setStringValue(input);
424-
doc.add(field);
425-
}
426-
iw.addDocument(doc);
427-
try (DirectoryReader reader = iw.getReader()) {
428-
IndexSearcher searcher = newSearcher(reader);
429-
iw.close();
430-
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
431-
assertThat(topDocs.totalHits.value, equalTo(1L));
432-
String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
433-
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
434-
searcher,
435-
wrapAnalyzer(analyzer,queryMaxAnalyzedOffset),
436-
offsetSource,
437-
new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()),
438-
locale,
439-
breakIterator,
440-
"index",
441-
"text",
442-
query,
443-
noMatchSize,
444-
expectedPassages.length,
445-
name -> "text".equals(name),
446-
maxAnalyzedOffset,
447-
queryMaxAnalyzedOffset
448-
);
449-
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
450-
assertEquals(snippets.length, expectedPassages.length);
451-
for (int i = 0; i < snippets.length; i++) {
452-
assertEquals(snippets[i].getText(), expectedPassages[i]);
453-
}
454-
}
455-
}
456-
}
457429
public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
458430

459431
TermQuery query = new TermQuery(new Term("text", "Fun"));
@@ -487,11 +459,39 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
487459
);
488460
}
489461

490-
491-
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
492-
if (maxAnalyzedOffset != null) {
493-
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
494-
}
495-
return analyzer;
462+
public void testExceedMaxAnalyzedOffsetRandomOffset() throws Exception {
463+
TermQuery query = new TermQuery(new Term("text", "fun"));
464+
Analyzer analyzer = new WhitespaceAnalyzer();
465+
UnifiedHighlighter.OffsetSource offsetSource = randomBoolean()
466+
? UnifiedHighlighter.OffsetSource.ANALYSIS
467+
: UnifiedHighlighter.OffsetSource.POSTINGS;
468+
final String[] inputs = { "Fun fun fun fun fun" };
469+
TreeMap<Integer, String> outputs = new TreeMap<>(
470+
Map.of(
471+
7,
472+
"Fun <b>fun</b> fun fun fun",
473+
11,
474+
"Fun <b>fun</b> <b>fun</b> fun fun",
475+
15,
476+
"Fun <b>fun</b> <b>fun</b> <b>fun</b> fun",
477+
19,
478+
"Fun <b>fun</b> <b>fun</b> <b>fun</b> <b>fun</b>"
479+
)
480+
);
481+
Integer randomOffset = between(1, 19);
482+
String output = outputs.ceilingEntry(randomOffset).getValue();
483+
assertHighlightOneDoc(
484+
"text",
485+
inputs,
486+
analyzer,
487+
query,
488+
Locale.ROOT,
489+
BreakIterator.getSentenceInstance(Locale.ROOT),
490+
0,
491+
new String[] { output },
492+
47,
493+
randomOffset,
494+
offsetSource
495+
);
496496
}
497497
}

0 commit comments

Comments
 (0)