Skip to content

Commit 7b06235

Browse files
github-actions[bot]russcamMpdreamz
authored
Add support for T-Test aggregations (#4732) (#4766)
* Add support for T-Test aggregations Relates: #4718 Co-authored-by: Russ Cam <[email protected]> Co-authored-by: Martijn Laarman <[email protected]>
1 parent c8f0265 commit 7b06235

File tree

10 files changed

+463
-5
lines changed

10 files changed

+463
-5
lines changed

docs/aggregations.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ The values are typically extracted from the fields of the document (using the fi
6868

6969
* <<top-metrics-aggregation-usage,Top Metrics Aggregation Usage>>
7070

71+
* <<t-test-aggregation-usage,T Test Aggregation Usage>>
72+
7173
* <<value-count-aggregation-usage,Value Count Aggregation Usage>>
7274

7375
* <<weighted-average-aggregation-usage,Weighted Average Aggregation Usage>>
@@ -110,6 +112,8 @@ include::aggregations/metric/top-hits/top-hits-aggregation-usage.asciidoc[]
110112

111113
include::aggregations/metric/top-metrics/top-metrics-aggregation-usage.asciidoc[]
112114

115+
include::aggregations/metric/t-test/t-test-aggregation-usage.asciidoc[]
116+
113117
include::aggregations/metric/value-count/value-count-aggregation-usage.asciidoc[]
114118

115119
include::aggregations/metric/weighted-average/weighted-average-aggregation-usage.asciidoc[]
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master
2+
3+
:github: https://github.com/elastic/elasticsearch-net
4+
5+
:nuget: https://www.nuget.org/packages
6+
7+
////
8+
IMPORTANT NOTE
9+
==============
10+
This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs.
11+
If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file,
12+
please modify the original csharp file found at the link and submit the PR with that change. Thanks!
13+
////
14+
15+
[[t-test-aggregation-usage]]
16+
=== T Test Aggregation Usage
17+
18+
A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
19+
Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
20+
generated by provided scripts. In practice, this will tell you if the difference between two population means
21+
are statistically significant and did not occur by chance alone.
22+
23+
NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level
24+
25+
Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation].
26+
27+
==== Fluent DSL example
28+
29+
[source,csharp]
30+
----
31+
a => a
32+
.TTest("commits_visibility", c => c
33+
.A(t => t
34+
.Field(f => f.NumberOfCommits)
35+
.Filter(f => f
36+
.Term(ff => ff.Visibility, Visibility.Public)
37+
)
38+
)
39+
.B(t => t
40+
.Field(f => f.NumberOfCommits)
41+
.Filter(f => f
42+
.Term(ff => ff.Visibility, Visibility.Private)
43+
)
44+
)
45+
.Type(TTestType.Heteroscedastic)
46+
)
47+
----
48+
49+
==== Object Initializer syntax example
50+
51+
[source,csharp]
52+
----
53+
new TTestAggregation("commits_visibility")
54+
{
55+
A = new TTestPopulation
56+
{
57+
Field = Field<Project>(f => f.NumberOfCommits),
58+
Filter = new TermQuery
59+
{
60+
Field = Field<Project>(f => f.Visibility),
61+
Value = Visibility.Public
62+
}
63+
},
64+
B = new TTestPopulation
65+
{
66+
Field = Field<Project>(f => f.NumberOfCommits),
67+
Filter = new TermQuery
68+
{
69+
Field = Field<Project>(f => f.Visibility),
70+
Value = Visibility.Private
71+
}
72+
},
73+
Type = TTestType.Heteroscedastic
74+
}
75+
----
76+
77+
[source,javascript]
78+
.Example json output
79+
----
80+
{
81+
"commits_visibility": {
82+
"t_test": {
83+
"a": {
84+
"field": "numberOfCommits",
85+
"filter": {
86+
"term": {
87+
"visibility": {
88+
"value": "Public"
89+
}
90+
}
91+
}
92+
},
93+
"b": {
94+
"field": "numberOfCommits",
95+
"filter": {
96+
"term": {
97+
"visibility": {
98+
"value": "Private"
99+
}
100+
}
101+
}
102+
},
103+
"type": "heteroscedastic"
104+
}
105+
}
106+
}
107+
----
108+
109+
==== Handling Responses
110+
111+
[source,csharp]
112+
----
113+
response.ShouldBeValid();
114+
var tTest = response.Aggregations.TTest("commits_visibility");
115+
tTest.Should().NotBeNull();
116+
tTest.Value.Should().BeGreaterThan(0);
117+
----
118+

src/Nest/Aggregations/AggregateDictionary.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ public CompositeBucketAggregate Composite(string key)
244244

245245
public BoxplotAggregate Boxplot(string key) => TryGet<BoxplotAggregate>(key);
246246

247+
public ValueAggregate TTest(string key) => TryGet<ValueAggregate>(key);
248+
247249
private TAggregate TryGet<TAggregate>(string key) where TAggregate : class, IAggregate =>
248250
BackingDictionary.TryGetValue(key, out var agg) ? agg as TAggregate : null;
249251

src/Nest/Aggregations/AggregationContainer.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ public interface IAggregationContainer
257257
[DataMember(Name = "top_hits")]
258258
ITopHitsAggregation TopHits { get; set; }
259259

260+
/// <inheritdoc cref="ITTestAggregation"/>
261+
[DataMember(Name = "t_test")]
262+
ITTestAggregation TTest { get; set; }
263+
260264
[DataMember(Name = "value_count")]
261265
IValueCountAggregation ValueCount { get; set; }
262266

@@ -387,6 +391,9 @@ public class AggregationContainer : IAggregationContainer
387391
public ITermsAggregation Terms { get; set; }
388392

389393
public ITopHitsAggregation TopHits { get; set; }
394+
395+
public ITTestAggregation TTest { get; set; }
396+
390397
public IValueCountAggregation ValueCount { get; set; }
391398

392399
public IWeightedAverageAggregation WeightedAverage { get; set; }
@@ -542,6 +549,8 @@ public class AggregationContainerDescriptor<T> : DescriptorBase<AggregationConta
542549

543550
ITopHitsAggregation IAggregationContainer.TopHits { get; set; }
544551

552+
ITTestAggregation IAggregationContainer.TTest { get; set; }
553+
545554
IValueCountAggregation IAggregationContainer.ValueCount { get; set; }
546555

547556
IWeightedAverageAggregation IAggregationContainer.WeightedAverage { get; set; }
@@ -719,6 +728,12 @@ Func<TopHitsAggregationDescriptor<T>, ITopHitsAggregation> selector
719728
) =>
720729
_SetInnerAggregation(name, selector, (a, d) => a.TopHits = d);
721730

731+
/// <inheritdoc cref="ITTestAggregation"/>
732+
public AggregationContainerDescriptor<T> TTest(string name,
733+
Func<TTestAggregationDescriptor<T>, ITTestAggregation> selector
734+
) =>
735+
_SetInnerAggregation(name, selector, (a, d) => a.TTest = d);
736+
722737
public AggregationContainerDescriptor<T> Children<TChild>(string name,
723738
Func<ChildrenAggregationDescriptor<TChild>, IChildrenAggregation> selector
724739
) where TChild : class =>
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Runtime.Serialization;
8+
using Elasticsearch.Net;
9+
using Elasticsearch.Net.Utf8Json;
10+
11+
namespace Nest
12+
{
13+
/// <summary>
14+
/// A metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
15+
/// Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
16+
/// generated by provided scripts. In practice, this will tell you if the difference between two population means
17+
/// are statistically significant and did not occur by chance alone.
18+
/// <para />
19+
/// Available in Elasticsearch 7.8.0+ with at least basic license level
20+
/// </summary>
21+
[InterfaceDataContract]
22+
[ReadAs(typeof(TTestAggregation))]
23+
public interface ITTestAggregation : IAggregation
24+
{
25+
/// <summary>
26+
/// T-test population A
27+
/// </summary>
28+
[DataMember(Name= "a")]
29+
public ITTestPopulation A { get; set; }
30+
31+
/// <summary>
32+
/// T-test population B
33+
/// </summary>
34+
[DataMember(Name= "b")]
35+
public ITTestPopulation B { get; set; }
36+
37+
/// <summary>
38+
/// T-test type
39+
/// </summary>
40+
[DataMember(Name = "type")]
41+
public TTestType? Type { get; set; }
42+
}
43+
44+
/// <inheritdoc cref="ITTestAggregation" />
45+
public class TTestAggregation : AggregationBase, ITTestAggregation
46+
{
47+
internal TTestAggregation() { }
48+
49+
public TTestAggregation(string name) : base(name) { }
50+
51+
internal override void WrapInContainer(AggregationContainer c) => c.TTest = this;
52+
53+
/// <inheritdoc />
54+
public ITTestPopulation A { get; set; }
55+
/// <inheritdoc />
56+
public ITTestPopulation B { get; set; }
57+
/// <inheritdoc />
58+
public TTestType? Type { get; set; }
59+
}
60+
61+
/// <inheritdoc cref="ITTestAggregation" />
62+
public class TTestAggregationDescriptor<T>
63+
: DescriptorBase<TTestAggregationDescriptor<T>, ITTestAggregation>, ITTestAggregation
64+
where T : class
65+
{
66+
IDictionary<string, object> IAggregation.Meta { get; set; }
67+
string IAggregation.Name { get; set; }
68+
ITTestPopulation ITTestAggregation.A { get; set; }
69+
ITTestPopulation ITTestAggregation.B { get; set; }
70+
TTestType? ITTestAggregation.Type { get; set; }
71+
72+
/// <inheritdoc cref="ITTestAggregation.A"/>
73+
public TTestAggregationDescriptor<T> A(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
74+
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<T>()));
75+
76+
/// <inheritdoc cref="ITTestAggregation.A"/>
77+
public TTestAggregationDescriptor<T> A<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
78+
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<TOther>()));
79+
80+
/// <inheritdoc cref="ITTestAggregation.B"/>
81+
public TTestAggregationDescriptor<T> B(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
82+
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<T>()));
83+
84+
/// <inheritdoc cref="ITTestAggregation.B"/>
85+
public TTestAggregationDescriptor<T> B<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
86+
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<TOther>()));
87+
88+
/// <inheritdoc cref="ITTestAggregation.Type"/>
89+
public TTestAggregationDescriptor<T> Type(TTestType? type) => Assign(type, (a, v) => a.Type = v);
90+
91+
/// <inheritdoc cref="IAggregation.Meta"/>
92+
public TTestAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) =>
93+
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>()));
94+
}
95+
96+
/// <summary>
97+
/// The type of t-test
98+
/// </summary>
99+
[StringEnum]
100+
public enum TTestType
101+
{
102+
/// <summary>
103+
/// performs paired t-test
104+
/// </summary>
105+
[EnumMember(Value = "paired")]
106+
Paired,
107+
108+
/// <summary>
109+
/// performs two-sample equal variance test
110+
/// </summary>
111+
[EnumMember(Value = "homoscedastic")]
112+
Homoscedastic,
113+
114+
/// <summary>
115+
/// performs two-sample unequal variance test (this is default)
116+
/// </summary>
117+
[EnumMember(Value = "heteroscedastic")]
118+
Heteroscedastic,
119+
}
120+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System;
6+
using System.Linq.Expressions;
7+
using System.Runtime.Serialization;
8+
using Elasticsearch.Net.Utf8Json;
9+
10+
namespace Nest
11+
{
12+
/// <summary>
13+
/// A population for a <see cref="TTestAggregation"/>
14+
/// </summary>
15+
[InterfaceDataContract]
16+
[ReadAs(typeof(TTestPopulation))]
17+
public interface ITTestPopulation
18+
{
19+
/// <summary>
20+
/// The field to use for the population values. Must be a numeric field.
21+
/// </summary>
22+
[DataMember(Name = "field")]
23+
Field Field { get; set; }
24+
25+
/// <summary>
26+
/// A script tp use to calculate population values.
27+
/// </summary>
28+
[DataMember(Name = "script")]
29+
IScript Script { get; set; }
30+
31+
/// <summary>
32+
/// A filter to apply to target field to filter population values. Useful
33+
/// when two populations use the same field for values, to filter the values.
34+
/// </summary>
35+
[DataMember(Name = "filter")]
36+
QueryContainer Filter { get; set; }
37+
}
38+
39+
/// <inheritdoc />
40+
public class TTestPopulation : ITTestPopulation
41+
{
42+
/// <inheritdoc />
43+
public Field Field { get; set; }
44+
/// <inheritdoc />
45+
public IScript Script { get; set; }
46+
/// <inheritdoc />
47+
public QueryContainer Filter { get; set; }
48+
}
49+
50+
/// <inheritdoc cref="ITTestPopulation"/>
51+
public class TTestPopulationDescriptor<T> : DescriptorBase<TTestPopulationDescriptor<T>, ITTestPopulation>, ITTestPopulation where T : class
52+
{
53+
Field ITTestPopulation.Field { get; set; }
54+
IScript ITTestPopulation.Script { get; set; }
55+
QueryContainer ITTestPopulation.Filter { get; set; }
56+
57+
/// <inheritdoc cref="ITTestPopulation.Field"/>
58+
public TTestPopulationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);
59+
60+
/// <inheritdoc cref="ITTestPopulation.Field"/>
61+
public TTestPopulationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v);
62+
63+
/// <inheritdoc cref="ITTestPopulation.Script"/>
64+
public TTestPopulationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v);
65+
66+
/// <inheritdoc cref="ITTestPopulation.Script"/>
67+
public TTestPopulationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) =>
68+
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor()));
69+
70+
/// <inheritdoc cref="ITTestPopulation.Filter"/>
71+
public TTestPopulationDescriptor<T> Filter(Func<QueryContainerDescriptor<T>, QueryContainer> filter) =>
72+
Assign(filter, (a, v) => a.Filter = v?.Invoke(new QueryContainerDescriptor<T>()));
73+
}
74+
}

src/Nest/Aggregations/Metric/ValueAggregate.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5-
namespace Nest
5+
namespace Nest
66
{
77
public class ValueAggregate : MetricAggregateBase
88
{

0 commit comments

Comments
 (0)