Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
a34e1db
Existing tests passing against new implementation
not-napoleon Oct 25, 2018
3cd7097
WIP - Testing Strategy
not-napoleon Nov 1, 2018
d018c82
Different distribution modes for testing
not-napoleon Nov 2, 2018
6f2b2a5
Set tolerance so tests pass
not-napoleon Nov 2, 2018
287d640
Up the population size
not-napoleon Nov 20, 2018
c645e34
remove exploratory testing - doesn't provide long term value to keep …
not-napoleon Nov 30, 2018
d214897
This has likely been fixed for a while
not-napoleon Nov 30, 2018
92f642e
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Nov 30, 2018
b0a578d
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Dec 4, 2018
27b17e6
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Dec 7, 2018
f2a245d
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Dec 10, 2018
54f37b3
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Dec 14, 2018
b7c4a84
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Jan 3, 2019
95199db
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon May 21, 2019
9a49d47
Merge branch 'master' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Nov 5, 2021
2494f6c
focused test for tdigest bug
not-napoleon Nov 8, 2021
c099aa5
Update docs/changelog/35182.yaml
not-napoleon Feb 2, 2022
fbdf6c4
Merge branch 'main' into feature/replace-AVLDigest-with-MergingDigest
not-napoleon Apr 18, 2023
32cfb9d
fix compile errors
not-napoleon Apr 18, 2023
2c43e61
upgraded to t-digest 3.3
not-napoleon Apr 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/35182.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 35182
summary: Feature/replace avl digest with merging digest
area: Aggregations
type: feature
issues: []
5 changes: 5 additions & 0 deletions gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,11 @@
<sha256 value="03db291a8887b474f90db67bfb1f92d084e990150037e231babbb374ee11d7c3" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.tdunning" name="t-digest" version="3.3">
<artifact name="t-digest-3.3.jar">
<sha256 value="dc8be5228e0733e12fbe35eeffb6f720dff576bb2eb5dee69c1c211adcd7aeb9" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.thoughtworks.paranamer" name="paranamer" version="2.3">
<artifact name="paranamer-2.3.jar">
<sha256 value="e93f50ae4d0de11080677f44ab268691266fed2b3ff7bc6fd97636febae7d8fe" origin="Generated by Gradle"/>
Expand Down
2 changes: 1 addition & 1 deletion server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ dependencies {
implementation 'com.carrotsearch:hppc:0.8.1'

// percentiles aggregation
api 'com.tdunning:t-digest:3.2'
api 'com.tdunning:t-digest:3.3'
// precentil ranks aggregation
api 'org.hdrhistogram:HdrHistogram:2.1.9'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ public void add(List<? extends TDigest> others) {
throw new UnsupportedOperationException("Immutable Empty TDigest");
}

@Override
public void add(double x, int w, List<Double> data) {
throw new UnsupportedOperationException("Immutable Empty TDigest");
}

@Override
public void compress() {
throw new UnsupportedOperationException("Immutable Empty TDigest");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
*/
package org.elasticsearch.search.aggregations.metrics;

import com.tdunning.math.stats.AVLTreeDigest;
import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.MergingDigest;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand All @@ -19,7 +19,7 @@
/**
* Extension of {@link com.tdunning.math.stats.TDigest} with custom serialization.
*/
public class TDigestState extends AVLTreeDigest {
public class TDigestState extends MergingDigest {

private final double compression;

Expand All @@ -34,6 +34,7 @@ public double compression() {
}

public static void write(TDigestState state, StreamOutput out) throws IOException {
state.compress();
out.writeDouble(state.compression);
out.writeVInt(state.centroidCount());
for (Centroid centroid : state.centroids()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,6 @@ public void testCompress() {
expectThrows(UnsupportedOperationException.class, singleton::compress);
}

public void testTestAddList() {
expectThrows(
UnsupportedOperationException.class,
() -> singleton.add(randomDouble(), randomInt(10), List.of(randomDouble(), randomDouble()))
);
}

public void testTestAddListTDigest() {
expectThrows(UnsupportedOperationException.class, () -> singleton.add(List.of(new EmptyTDigestState(), new EmptyTDigestState())));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ protected InternalTDigestPercentileRanks createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
assertEquals(state.size(), values.length);
return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ protected InternalTDigestPercentiles createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
assertEquals(state.size(), values.length);
return new InternalTDigestPercentiles(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,14 @@ public void testSimple() throws IOException {
Iterator<Percentile> rankIterator = ranks.iterator();
Percentile rank = rankIterator.next();
assertEquals(0.1, rank.getValue(), 0d);
// TODO: Fix T-Digest: this assertion should pass but we currently get ~15
// https://github.com/elastic/elasticsearch/issues/14851
// assertThat(rank.getPercent(), Matchers.equalTo(0d));
assertThat(rank.getPercent(), Matchers.equalTo(0d));
rank = rankIterator.next();
assertEquals(0.5, rank.getValue(), 0d);
assertThat(rank.getPercent(), Matchers.greaterThan(0d));
assertThat(rank.getPercent(), Matchers.lessThan(100d));
rank = rankIterator.next();
assertEquals(12, rank.getValue(), 0d);
// TODO: Fix T-Digest: this assertion should pass but we currently get ~59
// https://github.com/elastic/elasticsearch/issues/14851
// assertThat(rank.getPercent(), Matchers.equalTo(100d));
assertThat(rank.getPercent(), Matchers.equalTo(100d));
assertFalse(rankIterator.hasNext());
assertTrue(AggregationInspectionHelper.hasValue(((InternalTDigestPercentileRanks) ranks)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ public void testSomeMatchesSortedNumericDocValues() throws IOException {
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 0)));
}, tdigest -> {
assertEquals(7L, tdigest.state.size());
assertEquals(7L, tdigest.state.centroidCount());
assertEquals(4.5d, tdigest.percentile(75), 0.0d);
assertEquals("4.5", tdigest.percentileAsString(75));
assertEquals(2.0d, tdigest.percentile(50), 0.0d);
assertEquals("2.0", tdigest.percentileAsString(50));
assertEquals(1.0d, tdigest.percentile(22), 0.0d);
assertEquals("1.0", tdigest.percentileAsString(22));
assertEquals(7L, tdigest.state.centroidCount());
assertTrue(AggregationInspectionHelper.hasValue(tdigest));
});
}
Expand All @@ -97,7 +97,6 @@ public void testSomeMatchesNumericDocValues() throws IOException {
iw.addDocument(singleton(new NumericDocValuesField("number", 0)));
}, tdigest -> {
assertEquals(tdigest.state.size(), 7L);
assertEquals(tdigest.state.centroidCount(), 7L);
assertEquals(8.0d, tdigest.percentile(100), 0.0d);
assertEquals("8.0", tdigest.percentileAsString(100));
assertEquals(6.98d, tdigest.percentile(88), 0.0d);
Expand All @@ -108,6 +107,7 @@ public void testSomeMatchesNumericDocValues() throws IOException {
assertEquals("1.0", tdigest.percentileAsString(25));
assertEquals(0.0d, tdigest.percentile(1), 0.0d);
assertEquals("0.0", tdigest.percentileAsString(1));
assertEquals(tdigest.state.centroidCount(), 7L);
assertTrue(AggregationInspectionHelper.hasValue(tdigest));
});
}
Expand All @@ -125,10 +125,10 @@ public void testQueryFiltering() throws IOException {

testCase(LongPoint.newRangeQuery("row", 1, 4), docs, tdigest -> {
assertEquals(4L, tdigest.state.size());
assertEquals(4L, tdigest.state.centroidCount());
assertEquals(2.0d, tdigest.percentile(100), 0.0d);
assertEquals(1.0d, tdigest.percentile(50), 0.0d);
assertEquals(0.5d, tdigest.percentile(25), 0.0d);
assertEquals(4L, tdigest.state.centroidCount());
assertTrue(AggregationInspectionHelper.hasValue(tdigest));
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,26 @@ public void testTDigestStateHash() {
assertTrue(set.stream().anyMatch(digest -> digest.equals(empty1)));
assertTrue(set.stream().anyMatch(digest -> digest.equals(empty2)));
}

public void testBasicAssumptions() {
int size = 7;
TDigestState[] states = new TDigestState[size];
for (int counter = 0; counter < size; counter++) {
states[counter] = new TDigestState(50);
final int numberDocs = randomIntBetween(5, 50);
for (int i = 0; i < numberDocs; i++) {
double value = randomDoubleBetween(-1000, 1000, true);
states[counter].add(value);
}
}

int fromIndex = 0;
int toIndex = 6;
TDigestState result = new TDigestState(states[0].compression());
for (int i = fromIndex; i < toIndex; i++) {
result.add(states[i]);
}
assertNotEquals(Double.NEGATIVE_INFINITY, result.getMax());
assertTrue(result.getMax() > result.getMin());
}
}