Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ setup:

---
"Multisearch test with typed_keys parameter for sampler and significant terms":
- skip:
version: "all"
reason: "AwaitsFix https://github.com/elastic/elasticsearch/issues/57402"
- do:
msearch:
rest_total_hits_as_int: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ protected StringTerms buildEmptyTermsAggregation() {
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0);
}

protected SignificantStringTerms buildEmptySignificantTermsAggregation(SignificanceHeuristic significanceHeuristic) {
protected SignificantStringTerms buildEmptySignificantTermsAggregation(long subsetSize, SignificanceHeuristic significanceHeuristic) {
// We need to account for the significance of a miss in our global stats - provide corpus size as context
ContextIndexSearcher searcher = context.searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
metadata(), format, 0, supersetSize, significanceHeuristic, emptyList());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 bad!

metadata(), format, subsetSize, supersetSize, significanceHeuristic, emptyList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ SignificantStringTerms buildResult(SignificantStringTerms.Bucket[] topBuckets, l

@Override
SignificantStringTerms buildEmptyResult() {
return buildEmptySignificantTermsAggregation(significanceHeuristic);
return buildEmptySignificantTermsAggregation(subsetSize, significanceHeuristic);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ SignificantStringTerms buildResult(SignificantStringTerms.Bucket[] topBuckets, l

@Override
SignificantStringTerms buildEmptyResult() {
return buildEmptySignificantTermsAggregation(significanceHeuristic);
return buildEmptySignificantTermsAggregation(subsetSize, significanceHeuristic);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
Expand Down Expand Up @@ -146,8 +147,9 @@ public void testSignificance() throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);

// Search "odd"
SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
SignificantStringTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);

assertThat(terms.getSubsetSize(), equalTo(5L));
assertEquals(1, terms.getBuckets().size());
assertNull(terms.getBucketByKey("even"));
assertNull(terms.getBucketByKey("common"));
Expand All @@ -156,6 +158,7 @@ public void testSignificance() throws IOException {
// Search even
terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigAgg, textFieldType);

assertThat(terms.getSubsetSize(), equalTo(5L));
assertEquals(1, terms.getBuckets().size());
assertNull(terms.getBucketByKey("odd"));
assertNull(terms.getBucketByKey("common"));
Expand All @@ -164,6 +167,7 @@ public void testSignificance() throws IOException {
// Search odd with regex includeexcludes
sigAgg.includeExclude(new IncludeExclude("o.d", null));
terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
assertThat(terms.getSubsetSize(), equalTo(5L));
assertEquals(1, terms.getBuckets().size());
assertNotNull(terms.getBucketByKey("odd"));
assertNull(terms.getBucketByKey("common"));
Expand All @@ -176,6 +180,7 @@ public void testSignificance() throws IOException {
sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings));
sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic());
terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
assertThat(terms.getSubsetSize(), equalTo(5L));
assertEquals(1, terms.getBuckets().size());
assertNotNull(terms.getBucketByKey("odd"));
assertNull(terms.getBucketByKey("weird"));
Expand All @@ -185,6 +190,7 @@ public void testSignificance() throws IOException {

sigAgg.includeExclude(new IncludeExclude(evenStrings, oddStrings));
terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
assertThat(terms.getSubsetSize(), equalTo(5L));
assertEquals(0, terms.getBuckets().size());
assertNull(terms.getBucketByKey("odd"));
assertNull(terms.getBucketByKey("weird"));
Expand Down Expand Up @@ -240,13 +246,15 @@ public void testNumericSignificance() throws IOException {
// Search "odd"
SignificantLongTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigNumAgg, longFieldType);
assertEquals(1, terms.getBuckets().size());
assertThat(terms.getSubsetSize(), equalTo(5L));

assertNull(terms.getBucketByKey(Long.toString(EVEN_VALUE)));
assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
assertNotNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));

terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigNumAgg, longFieldType);
assertEquals(1, terms.getBuckets().size());
assertThat(terms.getSubsetSize(), equalTo(5L));

assertNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));
assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
Expand Down Expand Up @@ -379,6 +387,172 @@ public void testFieldAlias() throws IOException {
}
}

public void testAllDocsWithoutStringFieldviaGlobalOrds() throws IOException {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These actually reproduce the issue.

testAllDocsWithoutStringField("global_ordinals");
}

public void testAllDocsWithoutStringFieldViaMap() throws IOException {
testAllDocsWithoutStringField("map");
}

/**
* Make sure that when the field is mapped but there aren't any values
* for it we return a properly shaped "empty" result. In particular, the
* {@link InternalMappedSignificantTerms#getSubsetSize()} needs to be set
* to the number of matching documents.
*/
private void testAllDocsWithoutStringField(String executionHint) throws IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test (or it's mapped/global wrappers, if you'd rather) needs javadoc. It's not at all clear from the name what path this is intended to exercise.

try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
writer.addDocument(new Document());
try (IndexReader reader = maybeWrapReaderEs(writer.getReader())) {
IndexSearcher searcher = newIndexSearcher(reader);
SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder("f").field("f")
.executionHint(executionHint);
SignificantStringTerms result = search(searcher, new MatchAllDocsQuery(), request, keywordField("f"));
assertThat(result.getSubsetSize(), equalTo(1L));
}
}
}
}

/**
* Make sure that when the field is mapped but there aren't any values
* for it we return a properly shaped "empty" result. In particular, the
* {@link InternalMappedSignificantTerms#getSubsetSize()} needs to be set
* to the number of matching documents.
*/
public void testAllDocsWithoutNumericField() throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
writer.addDocument(new Document());
try (IndexReader reader = maybeWrapReaderEs(writer.getReader())) {
IndexSearcher searcher = newIndexSearcher(reader);
SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder("f").field("f");
SignificantLongTerms result = search(searcher, new MatchAllDocsQuery(), request, longField("f"));
assertThat(result.getSubsetSize(), equalTo(1L));
}
}
}
}

public void testSomeDocsWithoutStringFieldviaGlobalOrds() throws IOException {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this would reproduce the issue but it doesn't. It still feels useful to add.

testSomeDocsWithoutStringField("global_ordinals");
}

public void testSomeDocsWithoutStringFieldViaMap() throws IOException {
testSomeDocsWithoutStringField("map");
}

/**
* Make sure that when the field a segment doesn't contain the field we
* still include the count of its matching documents
* in {@link InternalMappedSignificantTerms#getSubsetSize()}.
*/
private void testSomeDocsWithoutStringField(String executionHint) throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Document d = new Document();
d.add(new SortedDocValuesField("f", new BytesRef("f")));
writer.addDocument(d);
writer.flush();
writer.addDocument(new Document());
try (IndexReader reader = maybeWrapReaderEs(writer.getReader())) {
IndexSearcher searcher = newIndexSearcher(reader);
SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder("f").field("f")
.executionHint(executionHint);
SignificantStringTerms result = search(searcher, new MatchAllDocsQuery(), request, keywordField("f"));
assertThat(result.getSubsetSize(), equalTo(2L));
}
}
}
}

/**
* Make sure that when the field a segment doesn't contain the field we
* still include the count of its matching documents
* in {@link InternalMappedSignificantTerms#getSubsetSize()}.
*/
public void testSomeDocsWithoutNumericField() throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Document d = new Document();
d.add(new SortedNumericDocValuesField("f", 1));
writer.addDocument(d);
writer.addDocument(new Document());
try (IndexReader reader = maybeWrapReaderEs(writer.getReader())) {
IndexSearcher searcher = newIndexSearcher(reader);
SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder("f").field("f");
SignificantLongTerms result = search(searcher, new MatchAllDocsQuery(), request, longField("f"));
assertThat(result.getSubsetSize(), equalTo(2L));
}
}
}
}

public void testThreeLayerStringViaGlobalOrds() throws IOException {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are coming in #57758 and I thought they might reproduce the issue so I added them as well. No dice. Either way, they are nice to have.

threeLayerStringTestCase("global_ordinals");
}

public void testThreeLayerStringViaMap() throws IOException {
threeLayerStringTestCase("map");
}

private void threeLayerStringTestCase(String executionHint) throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
for (int k = 0; k < 10; k++) {
Document d = new Document();
d.add(new SortedDocValuesField("i", new BytesRef(Integer.toString(i))));
d.add(new SortedDocValuesField("j", new BytesRef(Integer.toString(j))));
d.add(new SortedDocValuesField("k", new BytesRef(Integer.toString(k))));
writer.addDocument(d);
}
}
}
try (IndexReader reader = maybeWrapReaderEs(writer.getReader())) {
IndexSearcher searcher = newIndexSearcher(reader);
SignificantTermsAggregationBuilder kRequest = new SignificantTermsAggregationBuilder("k").field("k")
.executionHint(executionHint);
SignificantTermsAggregationBuilder jRequest = new SignificantTermsAggregationBuilder("j").field("j")
.executionHint(executionHint)
.subAggregation(kRequest);
SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder("i").field("i")
.executionHint(executionHint)
.subAggregation(jRequest);
SignificantStringTerms result = search(
searcher,
new MatchAllDocsQuery(),
request,
keywordField("i"),
keywordField("j"),
keywordField("k")
);
assertThat(result.getSubsetSize(), equalTo(1000L));
for (int i = 0; i < 10; i++) {
SignificantStringTerms.Bucket iBucket = result.getBucketByKey(Integer.toString(i));
assertThat(iBucket.getDocCount(), equalTo(100L));
SignificantStringTerms jAgg = iBucket.getAggregations().get("j");
assertThat(jAgg.getSubsetSize(), equalTo(100L));
for (int j = 0; j < 10; j++) {
SignificantStringTerms.Bucket jBucket = jAgg.getBucketByKey(Integer.toString(j));
assertThat(jBucket.getDocCount(), equalTo(10L));
SignificantStringTerms kAgg = jBucket.getAggregations().get("k");
assertThat(kAgg.getSubsetSize(), equalTo(10L));
for (int k = 0; k < 10; k++) {
SignificantStringTerms.Bucket kBucket = kAgg.getBucketByKey(Integer.toString(k));
assertThat(jAgg.getSubsetSize(), equalTo(100L));
assertThat(kBucket.getDocCount(), equalTo(1L));
}
}
}
}
}
}
}

public void testThreeLayerLong() throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Expand All @@ -400,14 +574,17 @@ public void testThreeLayerLong() throws IOException {
.subAggregation(new SignificantTermsAggregationBuilder("k").field("k")));
SignificantLongTerms result = search(searcher, new MatchAllDocsQuery(), request,
longField("i"), longField("j"), longField("k"));
assertThat(result.getSubsetSize(), equalTo(1000L));
for (int i = 0; i < 10; i++) {
SignificantLongTerms.Bucket iBucket = result.getBucketByKey(Integer.toString(i));
assertThat(iBucket.getDocCount(), equalTo(100L));
SignificantLongTerms jAgg = iBucket.getAggregations().get("j");
assertThat(jAgg.getSubsetSize(), equalTo(100L));
for (int j = 0; j < 10; j++) {
SignificantLongTerms.Bucket jBucket = jAgg.getBucketByKey(Integer.toString(j));
assertThat(jBucket.getDocCount(), equalTo(10L));
SignificantLongTerms kAgg = jBucket.getAggregations().get("k");
assertThat(kAgg.getSubsetSize(), equalTo(10L));
for (int k = 0; k < 10; k++) {
SignificantLongTerms.Bucket kBucket = kAgg.getBucketByKey(Integer.toString(k));
assertThat(kBucket.getDocCount(), equalTo(1L));
Expand Down