Skip to content

Commit 3b3e6a9

Browse files
markstoryevanhZylphrex
authored
fix(discover) Fix user.display in aggregates (#20329)
Include the full column function in the group by clause so that we don't get errors about ungrouped fields. Mostly remove the string formatting for ArgValue substitutions. Using ArgValue lets us expand function parameters into other expressions. Refs SENTRY-GWT Co-authored-by: evanh <[email protected]> Co-authored-by: Tony <[email protected]>
1 parent 3838255 commit 3b3e6a9

File tree

5 files changed

+137
-29
lines changed

5 files changed

+137
-29
lines changed

src/sentry/api/event_search.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -782,8 +782,7 @@ def convert_search_filter_to_snuba_query(search_filter, key=None):
782782
# allow snuba's prewhere optimizer to find this condition.
783783
return [name, search_filter.operator, value]
784784
elif name == USER_DISPLAY_ALIAS:
785-
# Slice off the function without an alias.
786-
user_display_expr = FIELD_ALIASES[USER_DISPLAY_ALIAS]["fields"][0][0:2]
785+
user_display_expr = FIELD_ALIASES[USER_DISPLAY_ALIAS]["expression"]
787786

788787
# Handle 'has' condition
789788
if search_filter.value.raw_value == "":
@@ -1179,12 +1178,8 @@ def get_filter(query=None, params=None):
11791178
FIELD_ALIASES = {
11801179
"project": {"fields": ["project.id"], "column_alias": "project.id"},
11811180
"issue": {"fields": ["issue.id"], "column_alias": "issue.id"},
1182-
# TODO(mark) This alias doesn't work inside count_unique().
1183-
# The column resolution code is really convoluted and expanding
1184-
# it to support functions that need columns resolved inside of
1185-
# aggregations is complicated. Until that gets sorted user.display
1186-
# should not be added to the public field list/docs.
11871181
"user.display": {
1182+
"expression": ["coalesce", ["user.email", "user.username", "user.ip"]],
11881183
"fields": [["coalesce", ["user.email", "user.username", "user.ip"], "user.display"]],
11891184
"column_alias": "user.display",
11901185
},
@@ -1231,19 +1226,39 @@ def has_default(self, params):
12311226
return False
12321227

12331228

1229+
class NullColumn(FunctionArg):
1230+
"""
1231+
Convert the provided column to null so that we
1232+
can drop it. Used to make count() not have a
1233+
required argument that we ignore.
1234+
"""
1235+
1236+
def has_default(self, params):
1237+
return None
1238+
1239+
def normalize(self, value):
1240+
return None
1241+
1242+
12341243
class CountColumn(FunctionArg):
12351244
def has_default(self, params):
12361245
return None
12371246

12381247
def normalize(self, value):
12391248
if value is None:
1249+
raise InvalidFunctionArgument("a column is required")
1250+
1251+
if value not in FIELD_ALIASES:
12401252
return value
12411253

1242-
# If we use an alias inside an aggregate, resolve it here
1243-
if value in FIELD_ALIASES:
1244-
value = FIELD_ALIASES[value].get("column_alias", value)
1254+
alias = FIELD_ALIASES[value]
12451255

1246-
return value
1256+
# If the alias has an expression prefer that over the column alias
1257+
# This enables user.display to work in aggregates
1258+
if "expression" in alias:
1259+
return alias["expression"]
1260+
1261+
return alias.get("column_alias", value)
12471262

12481263

12491264
class NumericColumn(FunctionArg):
@@ -1410,7 +1425,7 @@ def has_default(self, params):
14101425
"column": [
14111426
"multiply",
14121427
[
1413-
["floor", [["divide", [u"{column}", ArgValue("bucket_size")]]]],
1428+
["floor", [["divide", [ArgValue("column"), ArgValue("bucket_size")]]]],
14141429
ArgValue("bucket_size"),
14151430
],
14161431
None,
@@ -1420,38 +1435,35 @@ def has_default(self, params):
14201435
"count_unique": {
14211436
"name": "count_unique",
14221437
"args": [CountColumn("column")],
1423-
"aggregate": ["uniq", u"{column}", None],
1438+
"aggregate": ["uniq", ArgValue("column"), None],
14241439
"result_type": "integer",
14251440
},
1426-
# TODO(evanh) Count doesn't accept parameters in the frontend, but we support it here
1427-
# for backwards compatibility. Once we've migrated existing queries this should get
1428-
# changed to accept no parameters.
14291441
"count": {
14301442
"name": "count",
1431-
"args": [CountColumn("column")],
1443+
"args": [NullColumn("column")],
14321444
"aggregate": ["count", None, None],
14331445
"result_type": "integer",
14341446
},
14351447
"min": {
14361448
"name": "min",
14371449
"args": [NumericColumnNoLookup("column")],
1438-
"aggregate": ["min", u"{column}", None],
1450+
"aggregate": ["min", ArgValue("column"), None],
14391451
},
14401452
"max": {
14411453
"name": "max",
14421454
"args": [NumericColumnNoLookup("column")],
1443-
"aggregate": ["max", u"{column}", None],
1455+
"aggregate": ["max", ArgValue("column"), None],
14441456
},
14451457
"avg": {
14461458
"name": "avg",
14471459
"args": [DurationColumnNoLookup("column")],
1448-
"aggregate": ["avg", u"{column}", None],
1460+
"aggregate": ["avg", ArgValue("column"), None],
14491461
"result_type": "duration",
14501462
},
14511463
"sum": {
14521464
"name": "sum",
14531465
"args": [DurationColumnNoLookup("column")],
1454-
"aggregate": ["sum", u"{column}", None],
1466+
"aggregate": ["sum", ArgValue("column"), None],
14551467
"result_type": "duration",
14561468
},
14571469
# Currently only being used by the baseline PoC
@@ -1560,7 +1572,9 @@ def resolve_function(field, match=None, params=None):
15601572
aggregate[0] = aggregate[0].format(**arguments)
15611573
if isinstance(aggregate[1], six.string_types):
15621574
aggregate[1] = aggregate[1].format(**arguments)
1563-
1575+
elif isinstance(aggregate[1], ArgValue):
1576+
arg = aggregate[1].arg
1577+
aggregate[1] = arguments[arg]
15641578
if aggregate[2] is None:
15651579
aggregate[2] = get_function_alias_with_columns(
15661580
function["name"], columns if not used_default else []
@@ -1744,6 +1758,10 @@ def resolve_field_list(fields, snuba_filter, auto_fields=True):
17441758
if column[0] == "transform":
17451759
# When there's a project transform, we already group by project_id
17461760
continue
1761+
if column[2] == USER_DISPLAY_ALIAS:
1762+
# user.display needs to be grouped by its coalesce function
1763+
groupby.append(column)
1764+
continue
17471765
groupby.append(column[2])
17481766
else:
17491767
groupby.append(column)

src/sentry/static/sentry/app/utils/discover/fields.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,11 @@ enum FieldKey {
330330
TRANSACTION_DURATION = 'transaction.duration',
331331
TRANSACTION_OP = 'transaction.op',
332332
TRANSACTION_STATUS = 'transaction.status',
333-
// user.display is intentionally not here as
334-
// it isn't stable and ready for customers to use just yet.
335333
USER_EMAIL = 'user.email',
336334
USER_ID = 'user.id',
337335
USER_IP = 'user.ip',
338336
USER_USERNAME = 'user.username',
337+
USER_DISPLAY = 'user.display',
339338
}
340339

341340
/**
@@ -410,6 +409,7 @@ export const FIELDS: Readonly<Record<FieldKey, ColumnType>> = {
410409
// Field alises defined in src/sentry/api/event_search.py
411410
[FieldKey.PROJECT]: 'string',
412411
[FieldKey.ISSUE]: 'string',
412+
[FieldKey.USER_DISPLAY]: 'string',
413413
};
414414

415415
export type FieldTag = {

src/sentry/utils/snuba.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,11 @@ def nest_groups(data, groups, aggregate_cols):
752752

753753
def resolve_column(dataset):
754754
def _resolve_column(col):
755-
if col is None or col.startswith("tags[") or QUOTED_LITERAL_RE.match(col):
755+
if col is None:
756+
return col
757+
if isinstance(col, six.string_types) and (
758+
col.startswith("tags[") or QUOTED_LITERAL_RE.match(col)
759+
):
756760
return col
757761

758762
# Some dataset specific logic:
@@ -973,7 +977,17 @@ def resolve_snuba_aliases(snuba_filter, resolve_func, function_translations=None
973977
if isinstance(aggregation[1], six.string_types):
974978
aggregation[1] = resolve_func(aggregation[1])
975979
elif isinstance(aggregation[1], (set, tuple, list)):
976-
aggregation[1] = [resolve_func(col) for col in aggregation[1]]
980+
# The aggregation has another function call as its parameter
981+
func_index = get_function_index(aggregation[1])
982+
if func_index is not None:
983+
# Resolve the columns on the nested function, and add a wrapping
984+
# list to become a valid query expression.
985+
aggregation[1] = [
986+
[aggregation[1][0], [resolve_func(col) for col in aggregation[1][1]]]
987+
]
988+
else:
989+
# Parameter is a list of fields.
990+
aggregation[1] = [resolve_func(col) for col in aggregation[1]]
977991
resolved.aggregations = aggregations
978992

979993
conditions = resolved.conditions

tests/sentry/api/test_event_search.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,11 +1816,24 @@ def test_field_alias_expansion(self):
18161816
assert result["groupby"] == [
18171817
"title",
18181818
"issue.id",
1819-
"user.display",
1819+
["coalesce", ["user.email", "user.username", "user.ip"], "user.display"],
18201820
"message",
18211821
"project.id",
18221822
]
18231823

1824+
def test_field_alias_with_aggregates(self):
1825+
fields = ["event.type", "user.display", "count_unique(title)"]
1826+
result = resolve_field_list(fields, eventstore.Filter())
1827+
assert result["selected_columns"] == [
1828+
"event.type",
1829+
["coalesce", ["user.email", "user.username", "user.ip"], "user.display"],
1830+
]
1831+
assert result["aggregations"] == [["uniq", "title", "count_unique_title"]]
1832+
assert result["groupby"] == [
1833+
"event.type",
1834+
["coalesce", ["user.email", "user.username", "user.ip"], "user.display"],
1835+
]
1836+
18241837
def test_aggregate_function_expansion(self):
18251838
fields = ["count_unique(user)", "count(id)", "min(timestamp)"]
18261839
result = resolve_field_list(fields, eventstore.Filter())
@@ -1833,13 +1846,16 @@ def test_aggregate_function_expansion(self):
18331846
]
18341847
assert result["groupby"] == []
18351848

1836-
@pytest.mark.xfail(reason="functions need to handle non string column replacements")
18371849
def test_aggregate_function_complex_field_expansion(self):
18381850
fields = ["count_unique(user.display)"]
18391851
result = resolve_field_list(fields, eventstore.Filter())
18401852
assert result["selected_columns"] == []
18411853
assert result["aggregations"] == [
1842-
["uniq", ["coalesce", ["user.email", "user.username", "user.ip"]], "count_unique_user"],
1854+
[
1855+
"uniq",
1856+
["coalesce", ["user.email", "user.username", "user.ip"]],
1857+
"count_unique_user_display",
1858+
],
18431859
]
18441860
assert result["groupby"] == []
18451861

@@ -1883,6 +1899,19 @@ def test_aggregate_function_invalid_column(self):
18831899
in six.text_type(err)
18841900
)
18851901

1902+
def test_aggregate_function_missing_parameter(self):
1903+
with pytest.raises(InvalidSearchQuery) as err:
1904+
fields = ["count_unique()"]
1905+
resolve_field_list(fields, eventstore.Filter())
1906+
assert (
1907+
"InvalidSearchQuery: count_unique(): column argument invalid: a column is required"
1908+
in six.text_type(err)
1909+
)
1910+
1911+
with pytest.raises(InvalidSearchQuery) as err:
1912+
fields = ["count_unique( )"]
1913+
resolve_field_list(fields, eventstore.Filter())
1914+
18861915
def test_percentile_function(self):
18871916
fields = ["percentile(transaction.duration, 0.75)"]
18881917
result = resolve_field_list(fields, eventstore.Filter())

tests/snuba/api/endpoints/test_organization_events_v2.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,53 @@ def test_user_display(self):
15051505
result = set([r["user.display"] for r in data])
15061506
assert result == set(["catherine", "[email protected]"])
15071507

1508+
def test_user_display_with_aggregates(self):
1509+
self.login_as(user=self.user)
1510+
1511+
project1 = self.create_project()
1512+
self.store_event(
1513+
data={
1514+
"event_id": "a" * 32,
1515+
"transaction": "/example",
1516+
"message": "how to make fast",
1517+
"timestamp": self.two_min_ago,
1518+
"user": {"email": "[email protected]"},
1519+
},
1520+
project_id=project1.id,
1521+
)
1522+
1523+
with self.feature(
1524+
{"organizations:discover-basic": True, "organizations:global-views": True}
1525+
):
1526+
response = self.client.get(
1527+
self.url,
1528+
format="json",
1529+
data={
1530+
"field": ["event.type", "user.display", "count_unique(title)"],
1531+
"statsPeriod": "24h",
1532+
},
1533+
)
1534+
1535+
assert response.status_code == 200, response.content
1536+
data = response.data["data"]
1537+
assert len(data) == 1
1538+
result = set([r["user.display"] for r in data])
1539+
assert result == set(["[email protected]"])
1540+
1541+
with self.feature(
1542+
{"organizations:discover-basic": True, "organizations:global-views": True}
1543+
):
1544+
response = self.client.get(
1545+
self.url,
1546+
format="json",
1547+
data={"field": ["event.type", "count_unique(user.display)"], "statsPeriod": "24h"},
1548+
)
1549+
1550+
assert response.status_code == 200, response.content
1551+
data = response.data["data"]
1552+
assert len(data) == 1
1553+
assert data[0]["count_unique_user_display"] == 1
1554+
15081555
def test_has_transaction_status(self):
15091556
project = self.create_project()
15101557
data = load_data("transaction", timestamp=before_now(minutes=1))

0 commit comments

Comments
 (0)