3737import org .apache .lucene .tests .index .RandomIndexWriter ;
3838import org .elasticsearch .common .Strings ;
3939import org .elasticsearch .common .lucene .search .MultiPhrasePrefixQuery ;
40- import org .elasticsearch .search .fetch .subphase .highlight .LimitTokenOffsetAnalyzer ;
4140import org .elasticsearch .test .ESTestCase ;
4241
4342import java .text .BreakIterator ;
4443import java .util .Locale ;
44+ import java .util .Map ;
45+ import java .util .TreeMap ;
4546
4647import static org .elasticsearch .lucene .search .uhighlight .CustomUnifiedHighlighter .MULTIVAL_SEP_CHAR ;
4748import static org .hamcrest .CoreMatchers .equalTo ;
@@ -84,6 +85,34 @@ private void assertHighlightOneDoc(
8485 String [] expectedPassages ,
8586 int maxAnalyzedOffset ,
8687 Integer queryMaxAnalyzedOffset
88+ ) throws Exception {
89+ assertHighlightOneDoc (
90+ fieldName ,
91+ inputs ,
92+ analyzer ,
93+ query ,
94+ locale ,
95+ breakIterator ,
96+ noMatchSize ,
97+ expectedPassages ,
98+ maxAnalyzedOffset ,
99+ queryMaxAnalyzedOffset ,
100+ UnifiedHighlighter .OffsetSource .ANALYSIS
101+ );
102+ }
103+
104+ private void assertHighlightOneDoc (
105+ String fieldName ,
106+ String [] inputs ,
107+ Analyzer analyzer ,
108+ Query query ,
109+ Locale locale ,
110+ BreakIterator breakIterator ,
111+ int noMatchSize ,
112+ String [] expectedPassages ,
113+ int maxAnalyzedOffset ,
114+ Integer queryMaxAnalyzedOffset ,
115+ UnifiedHighlighter .OffsetSource offsetSource
87116 ) throws Exception {
88117 try (Directory dir = newDirectory ()) {
89118 IndexWriterConfig iwc = newIndexWriterConfig (analyzer );
@@ -108,7 +137,7 @@ private void assertHighlightOneDoc(
108137 CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter (
109138 searcher ,
110139 analyzer ,
111- UnifiedHighlighter . OffsetSource . ANALYSIS ,
140+ offsetSource ,
112141 new CustomPassageFormatter ("<b>" , "</b>" , new DefaultEncoder ()),
113142 locale ,
114143 breakIterator ,
@@ -397,63 +426,6 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
397426 );
398427 }
399428
400- private void assertHighlightOneDoc (
401- String fieldName ,
402- String [] inputs ,
403- Analyzer analyzer ,
404- Query query ,
405- Locale locale ,
406- BreakIterator breakIterator ,
407- int noMatchSize ,
408- String [] expectedPassages ,
409- int maxAnalyzedOffset ,
410- Integer queryMaxAnalyzedOffset ,
411- UnifiedHighlighter .OffsetSource offsetSource
412- ) throws Exception {
413- try (Directory dir = newDirectory ()) {
414- IndexWriterConfig iwc = newIndexWriterConfig (analyzer );
415- iwc .setMergePolicy (newTieredMergePolicy (random ()));
416- RandomIndexWriter iw = new RandomIndexWriter (random (), dir , iwc );
417- FieldType ft = new FieldType (TextField .TYPE_STORED );
418- ft .setIndexOptions (IndexOptions .DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS );
419- ft .freeze ();
420- Document doc = new Document ();
421- for (String input : inputs ) {
422- Field field = new Field (fieldName , "" , ft );
423- field .setStringValue (input );
424- doc .add (field );
425- }
426- iw .addDocument (doc );
427- try (DirectoryReader reader = iw .getReader ()) {
428- IndexSearcher searcher = newSearcher (reader );
429- iw .close ();
430- TopDocs topDocs = searcher .search (new MatchAllDocsQuery (), 1 , Sort .INDEXORDER );
431- assertThat (topDocs .totalHits .value , equalTo (1L ));
432- String rawValue = Strings .arrayToDelimitedString (inputs , String .valueOf (MULTIVAL_SEP_CHAR ));
433- CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter (
434- searcher ,
435- wrapAnalyzer (analyzer ,queryMaxAnalyzedOffset ),
436- offsetSource ,
437- new CustomPassageFormatter ("<b>" , "</b>" , new DefaultEncoder ()),
438- locale ,
439- breakIterator ,
440- "index" ,
441- "text" ,
442- query ,
443- noMatchSize ,
444- expectedPassages .length ,
445- name -> "text" .equals (name ),
446- maxAnalyzedOffset ,
447- queryMaxAnalyzedOffset
448- );
449- final Snippet [] snippets = highlighter .highlightField (getOnlyLeafReader (reader ), topDocs .scoreDocs [0 ].doc , () -> rawValue );
450- assertEquals (snippets .length , expectedPassages .length );
451- for (int i = 0 ; i < snippets .length ; i ++) {
452- assertEquals (snippets [i ].getText (), expectedPassages [i ]);
453- }
454- }
455- }
456- }
457429 public void testExceedMaxAnalyzedOffsetWithRepeatedWords () throws Exception {
458430
459431 TermQuery query = new TermQuery (new Term ("text" , "Fun" ));
@@ -487,11 +459,39 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
487459 );
488460 }
489461
490-
491- protected Analyzer wrapAnalyzer (Analyzer analyzer , Integer maxAnalyzedOffset ) {
492- if (maxAnalyzedOffset != null ) {
493- analyzer = new LimitTokenOffsetAnalyzer (analyzer , maxAnalyzedOffset );
494- }
495- return analyzer ;
462+ public void testExceedMaxAnalyzedOffsetRandomOffset () throws Exception {
463+ TermQuery query = new TermQuery (new Term ("text" , "fun" ));
464+ Analyzer analyzer = new WhitespaceAnalyzer ();
465+ UnifiedHighlighter .OffsetSource offsetSource = randomBoolean ()
466+ ? UnifiedHighlighter .OffsetSource .ANALYSIS
467+ : UnifiedHighlighter .OffsetSource .POSTINGS ;
468+ final String [] inputs = { "Fun fun fun fun fun" };
469+ TreeMap <Integer , String > outputs = new TreeMap <>(
470+ Map .of (
471+ 7 ,
472+ "Fun <b>fun</b> fun fun fun" ,
473+ 11 ,
474+ "Fun <b>fun</b> <b>fun</b> fun fun" ,
475+ 15 ,
476+ "Fun <b>fun</b> <b>fun</b> <b>fun</b> fun" ,
477+ 19 ,
478+ "Fun <b>fun</b> <b>fun</b> <b>fun</b> <b>fun</b>"
479+ )
480+ );
481+ Integer randomOffset = between (1 , 19 );
482+ String output = outputs .ceilingEntry (randomOffset ).getValue ();
483+ assertHighlightOneDoc (
484+ "text" ,
485+ inputs ,
486+ analyzer ,
487+ query ,
488+ Locale .ROOT ,
489+ BreakIterator .getSentenceInstance (Locale .ROOT ),
490+ 0 ,
491+ new String [] { output },
492+ 47 ,
493+ randomOffset ,
494+ offsetSource
495+ );
496496 }
497497}
0 commit comments