Skip to content

Commit 19b8c5f

Browse files
authored
Add hard_bounds for histograms (#5098)
* Add hard_bounds for histograms * Apply date_optional_time format for hard_bounds * Add new hard_bounds tests As with extended_bounds, we append the date_optional_time format to the format starting on the aggregation. This avoids parsing errors on the server. Includes an update to the documentation which we be generated and added in a subsequent PR.
1 parent fa2edcc commit 19b8c5f

File tree

5 files changed

+186
-5
lines changed

5 files changed

+186
-5
lines changed

src/Nest/Aggregations/Bucket/DateHistogram/DateHistogramAggregation.cs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ public interface IDateHistogramAggregation : IBucketAggregation
2323
[DataMember(Name ="extended_bounds")]
2424
ExtendedBounds<DateMath> ExtendedBounds { get; set; }
2525

26+
/// <summary>
27+
/// The hard_bounds is a counterpart of extended_bounds and can limit the range of buckets in the histogram.
28+
/// It is particularly useful in the case of open data ranges that can result in a very large number of buckets.
29+
/// </summary>
30+
[DataMember(Name = "hard_bounds")]
31+
HardBounds<DateMath> HardBounds { get; set; }
32+
2633
/// <summary>
2734
/// The field to target
2835
/// </summary>
@@ -103,20 +110,21 @@ public DateHistogramAggregation(string name) : base(name) { }
103110
/// <inheritdoc />
104111
public ExtendedBounds<DateMath> ExtendedBounds { get; set; }
105112
/// <inheritdoc />
113+
public HardBounds<DateMath> HardBounds { get; set; }
114+
/// <inheritdoc />
106115
public Field Field { get; set; }
107116

108117
/// <inheritdoc />
109118
public string Format
110119
{
111120
get => !string.IsNullOrEmpty(_format) &&
112121
!_format.Contains("date_optional_time") &&
113-
(ExtendedBounds != null || Missing.HasValue)
122+
(ExtendedBounds != null || HardBounds != null || Missing.HasValue)
114123
? _format + "||date_optional_time"
115124
: _format;
116125
set => _format = value;
117126
}
118127

119-
120128
[Obsolete("Deprecated in version 7.2.0, use CalendarInterval or FixedInterval instead")]
121129
public Union<DateInterval, Time> Interval { get; set; }
122130
/// <inheritdoc />
@@ -147,14 +155,15 @@ public class DateHistogramAggregationDescriptor<T>
147155
private string _format;
148156

149157
ExtendedBounds<DateMath> IDateHistogramAggregation.ExtendedBounds { get; set; }
158+
HardBounds<DateMath> IDateHistogramAggregation.HardBounds { get; set; }
150159
Field IDateHistogramAggregation.Field { get; set; }
151160

152161
//see: https://github.com/elastic/elasticsearch/issues/9725
153162
string IDateHistogramAggregation.Format
154163
{
155164
get => !string.IsNullOrEmpty(_format) &&
156165
!_format.Contains("date_optional_time") &&
157-
(Self.ExtendedBounds != null || Self.Missing.HasValue)
166+
(Self.ExtendedBounds != null || Self.HardBounds != null || Self.Missing.HasValue)
158167
? _format + "||date_optional_time"
159168
: _format;
160169
set => _format = value;
@@ -228,6 +237,10 @@ public DateHistogramAggregationDescriptor<T> OrderDescending(string key) =>
228237
public DateHistogramAggregationDescriptor<T> ExtendedBounds(DateMath min, DateMath max) =>
229238
Assign(new ExtendedBounds<DateMath> { Minimum = min, Maximum = max }, (a, v) => a.ExtendedBounds = v);
230239

240+
/// <inheritdoc cref="IDateHistogramAggregation.HardBounds" />
241+
public DateHistogramAggregationDescriptor<T> HardBounds(DateMath min, DateMath max) =>
242+
Assign(new HardBounds<DateMath> { Minimum = min, Maximum = max }, (a, v) => a.HardBounds = v);
243+
231244
/// <inheritdoc cref="IDateHistogramAggregation.Missing" />
232245
public DateHistogramAggregationDescriptor<T> Missing(DateTime? missing) => Assign(missing, (a, v) => a.Missing = v);
233246
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Runtime.Serialization;
6+
7+
namespace Nest
8+
{
9+
public class HardBounds<T>
10+
{
11+
[DataMember(Name = "max")]
12+
public T Maximum { get; set; }
13+
14+
[DataMember(Name = "min")]
15+
public T Minimum { get; set; }
16+
}
17+
}

src/Nest/Aggregations/Bucket/Histogram/HistogramAggregation.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ public interface IHistogramAggregation : IBucketAggregation
1616
[DataMember(Name ="extended_bounds")]
1717
ExtendedBounds<double> ExtendedBounds { get; set; }
1818

19+
[DataMember(Name = "hard_bounds")]
20+
HardBounds<double> HardBounds { get; set; }
21+
1922
[DataMember(Name ="field")]
2023
Field Field { get; set; }
2124

@@ -45,6 +48,7 @@ internal HistogramAggregation() { }
4548
public HistogramAggregation(string name) : base(name) { }
4649

4750
public ExtendedBounds<double> ExtendedBounds { get; set; }
51+
public HardBounds<double> HardBounds { get; set; }
4852
public Field Field { get; set; }
4953
public double? Interval { get; set; }
5054
public int? MinimumDocumentCount { get; set; }
@@ -61,6 +65,7 @@ public class HistogramAggregationDescriptor<T>
6165
where T : class
6266
{
6367
ExtendedBounds<double> IHistogramAggregation.ExtendedBounds { get; set; }
68+
HardBounds<double> IHistogramAggregation.HardBounds { get; set; }
6469
Field IHistogramAggregation.Field { get; set; }
6570

6671
double? IHistogramAggregation.Interval { get; set; }
@@ -100,6 +105,9 @@ public HistogramAggregationDescriptor<T> OrderDescending(string key) =>
100105
public HistogramAggregationDescriptor<T> ExtendedBounds(double min, double max) =>
101106
Assign(new ExtendedBounds<double> { Minimum = min, Maximum = max }, (a, v) => a.ExtendedBounds = v);
102107

108+
public HistogramAggregationDescriptor<T> HardBounds(double min, double max) =>
109+
Assign(new HardBounds<double> { Minimum = min, Maximum = max }, (a, v) => a.HardBounds = v);
110+
103111
public HistogramAggregationDescriptor<T> Offset(double? offset) => Assign(offset, (a, v) => a.Offset = v);
104112

105113
public HistogramAggregationDescriptor<T> Missing(double? missing) => Assign(missing, (a, v) => a.Missing = v);

tests/Tests/Aggregations/Bucket/DateHistogram/DateHistogramAggregationUsageTests.cs

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Linq;
7+
using Elastic.Elasticsearch.Xunit.XunitPlumbing;
78
using FluentAssertions;
89
using Nest;
910
using Tests.Core.Extensions;
@@ -20,7 +21,7 @@ namespace Tests.Aggregations.Bucket.DateHistogram
2021
* From a functionality perspective, this histogram supports the same features as the normal histogram.
2122
* The main difference is that the interval can be specified by date/time expressions.
2223
*
23-
* NOTE: When specifying a `format` **and** `extended_bounds` or `missing`, in order for Elasticsearch to be able to parse
24+
* NOTE: When specifying a `format` **and** `extended_bounds`, `hard_bounds` or `missing`, in order for Elasticsearch to be able to parse
2425
* the serialized `DateTime` of `extended_bounds` or `missing` correctly, the `date_optional_time` format is included
2526
* as part of the `format` value.
2627
*
@@ -39,7 +40,7 @@ public DateHistogramAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage
3940
field = "startedOn",
4041
calendar_interval = "month",
4142
min_doc_count = 2,
42-
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time", //<1> Note the inclusion of `date_optional_time` to `format`
43+
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time", // <1> Note the inclusion of `date_optional_time` to `format`
4344
order = new { _count = "asc" },
4445
extended_bounds = new
4546
{
@@ -209,4 +210,82 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
209210
}
210211
}
211212
}
213+
214+
// hide
215+
[SkipVersion("<7.10.0", "hard_bounds introduced in 7.10.0")]
216+
public class DateHistogramAggregationWithHardBoundsUsageTests : ProjectsOnlyAggregationUsageTestBase
217+
{
218+
private readonly DateTime _hardBoundsMinimum;
219+
private readonly DateTime _hardBoundsMaximum;
220+
221+
public DateHistogramAggregationWithHardBoundsUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage)
222+
{
223+
// Note: If these tests are run against an existing node, and seeding is not forced, it's possible the
224+
// dates used will not appear in the index and result in no buckets being returned. The test will still
225+
// pass if this is the case. For best results locally, force a reseed. This is not an issue in CI.
226+
227+
var projects = Project.Projects.OrderBy(p => p.StartedOn).Skip(2).Take(5).ToArray();
228+
229+
_hardBoundsMinimum = projects.Min(p => p.StartedOn.Date);
230+
_hardBoundsMaximum = projects.Max(p => p.StartedOn.Date);
231+
}
232+
233+
protected override object AggregationJson => new
234+
{
235+
projects_started_per_day = new
236+
{
237+
date_histogram = new
238+
{
239+
field = "startedOn",
240+
calendar_interval = "day",
241+
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time",
242+
min_doc_count = 1,
243+
hard_bounds = new
244+
{
245+
min = _hardBoundsMinimum,
246+
max = _hardBoundsMaximum
247+
},
248+
order = new { _key = "asc" },
249+
}
250+
}
251+
};
252+
253+
#pragma warning disable 618, 612
254+
protected override Func<AggregationContainerDescriptor<Project>, IAggregationContainer> FluentAggs => a => a
255+
.DateHistogram("projects_started_per_day", date => date
256+
.Field(p => p.StartedOn)
257+
.Format("yyyy-MM-dd'T'HH:mm:ss")
258+
.CalendarInterval(DateInterval.Day)
259+
.HardBounds(_hardBoundsMinimum, _hardBoundsMaximum)
260+
.MinimumDocumentCount(1)
261+
.Order(HistogramOrder.KeyAscending)
262+
);
263+
264+
protected override AggregationDictionary InitializerAggs =>
265+
new DateHistogramAggregation("projects_started_per_day")
266+
{
267+
Field = Field<Project>(p => p.StartedOn),
268+
Format = "yyyy-MM-dd'T'HH:mm:ss",
269+
CalendarInterval = DateInterval.Day,
270+
HardBounds = new HardBounds<DateMath>
271+
{
272+
Minimum = _hardBoundsMinimum,
273+
Maximum = _hardBoundsMaximum
274+
},
275+
MinimumDocumentCount = 1,
276+
Order = HistogramOrder.KeyAscending
277+
};
278+
#pragma warning restore 618, 612
279+
280+
protected override void ExpectResponse(ISearchResponse<Project> response)
281+
{
282+
response.ShouldBeValid();
283+
var dateHistogram = response.Aggregations.DateHistogram("projects_started_per_day");
284+
dateHistogram.Should().NotBeNull();
285+
dateHistogram.Buckets.Should().NotBeNull();
286+
287+
foreach (var date in dateHistogram.Buckets.Select(b => DateTime.Parse(b.KeyAsString)))
288+
date.Should().BeOnOrAfter(_hardBoundsMinimum).And.BeOnOrBefore(_hardBoundsMaximum);
289+
}
290+
}
212291
}

tests/Tests/Aggregations/Bucket/Histogram/HistogramAggregationUsageTests.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information
44

55
using System;
6+
using Elastic.Elasticsearch.Xunit.XunitPlumbing;
67
using FluentAssertions;
78
using Nest;
89
using Tests.Core.Extensions;
@@ -31,6 +32,7 @@ public HistogramAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) :
3132
_key = "desc"
3233
},
3334
offset = 1.1
35+
3436
}
3537
}
3638
};
@@ -65,4 +67,66 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
6567
item.DocCount.Should().BeGreaterThan(0);
6668
}
6769
}
70+
71+
// hide
72+
[SkipVersion("<7.10.0", "hard_bounds introduced in 7.10.0")]
73+
public class HistogramAggregationWithHardBoundsUsageTests : AggregationUsageTestBase
74+
{
75+
private const double HardBoundsMinimum = 100;
76+
private const double HardBoundsMaximum = 300;
77+
78+
public HistogramAggregationWithHardBoundsUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { }
79+
80+
protected override object AggregationJson => new
81+
{
82+
commits = new
83+
{
84+
histogram = new
85+
{
86+
field = "numberOfCommits",
87+
hard_bounds = new { min = HardBoundsMinimum, max = HardBoundsMaximum },
88+
interval = 100.0,
89+
min_doc_count = 1,
90+
order = new
91+
{
92+
_key = "desc"
93+
}
94+
}
95+
}
96+
};
97+
98+
protected override Func<AggregationContainerDescriptor<Project>, IAggregationContainer> FluentAggs => a => a
99+
.Histogram("commits", h => h
100+
.Field(p => p.NumberOfCommits)
101+
.Interval(100)
102+
.MinimumDocumentCount(1)
103+
.Order(HistogramOrder.KeyDescending)
104+
.HardBounds(HardBoundsMinimum, HardBoundsMaximum)
105+
);
106+
107+
protected override AggregationDictionary InitializerAggs =>
108+
new HistogramAggregation("commits")
109+
{
110+
Field = Field<Project>(p => p.NumberOfCommits),
111+
Interval = 100,
112+
MinimumDocumentCount = 1,
113+
Order = HistogramOrder.KeyDescending,
114+
HardBounds = new HardBounds<double>
115+
{
116+
Minimum = HardBoundsMinimum,
117+
Maximum = HardBoundsMaximum
118+
}
119+
};
120+
121+
protected override void ExpectResponse(ISearchResponse<Project> response)
122+
{
123+
response.ShouldBeValid();
124+
var commits = response.Aggregations.Histogram("commits");
125+
commits.Should().NotBeNull();
126+
commits.Buckets.Should().NotBeNull();
127+
128+
foreach (var bucket in commits.Buckets)
129+
bucket.Key.Should().BeGreaterOrEqualTo(HardBoundsMinimum).And.BeLessOrEqualTo(HardBoundsMaximum);
130+
}
131+
}
68132
}

0 commit comments

Comments
 (0)