diff --git a/src/sentry/replays/post_process.py b/src/sentry/replays/post_process.py index 51b59d44af4f8d..008cfa1e524a5c 100644 --- a/src/sentry/replays/post_process.py +++ b/src/sentry/replays/post_process.py @@ -25,7 +25,7 @@ def generate_normalized_output( """For each payload in the response strip "agg_" prefixes.""" for item in response: item["id"] = item.pop("replay_id", None) - item["project_id"] = item.pop("projectId", None) + item["project_id"] = str(item["project_id"]) item["trace_ids"] = item.pop("traceIds", []) item["error_ids"] = item.pop("errorIds", []) item["environment"] = item.pop("agg_environment", None) diff --git a/src/sentry/replays/query.py b/src/sentry/replays/query.py index 128a0b810efbab..176d81716ed500 100644 --- a/src/sentry/replays/query.py +++ b/src/sentry/replays/query.py @@ -7,6 +7,7 @@ from snuba_sdk import ( Column, Condition, + CurriedFunction, Entity, Function, Granularity, @@ -153,7 +154,7 @@ def query_replays_dataset( *having, ], orderby=sorting, - groupby=[Column("replay_id")], + groupby=[Column("project_id"), Column("replay_id")], granularity=Granularity(3600), **query_options, ), @@ -308,17 +309,22 @@ def _grouped_unique_values( ) -def _grouped_unique_scalar_value( - column_name: str, alias: Optional[str] = None, aliased: bool = True +def take_first_from_aggregation( + column_name: str, + alias: Optional[str] = None, + aliased: bool = True, ) -> Function: - """Returns the first value of a unique array. + """Returns the first value encountered in an aggregated array. E.g. - [1, 2, 2, 3, 3, 3, null] => [1, 2, 3] => 1 + [1, 2, 2, 3, 3, 3, null] => [1] => 1 """ return Function( "arrayElement", - parameters=[_grouped_unique_values(column_name), 1], + parameters=[ + CurriedFunction("groupArray", initializers=[1], parameters=[Column(column_name)]), + 1, + ], alias=alias or column_name if aliased else None, ) @@ -383,11 +389,11 @@ class ReplayQueryConfig(QueryConfig): releases = ListField() release = ListField(query_alias="releases") dist = String() - error_ids = ListField(query_alias="error_ids", is_uuid=True) - error_id = ListField(query_alias="error_ids", is_uuid=True) - trace_ids = ListField(query_alias="trace_ids", is_uuid=True) - trace_id = ListField(query_alias="trace_ids", is_uuid=True) - trace = ListField(query_alias="trace_ids", is_uuid=True) + error_ids = ListField(query_alias="errorIds") + error_id = ListField(query_alias="errorIds") + trace_ids = ListField(query_alias="traceIds") + trace_id = ListField(query_alias="traceIds") + trace = ListField(query_alias="traceIds") urls = ListField(query_alias="urls_sorted") url = ListField(query_alias="urls_sorted") user_id = String(field_alias="user.id", query_alias="user_id") @@ -421,8 +427,8 @@ class ReplayQueryConfig(QueryConfig): started_at = String(is_filterable=False) finished_at = String(is_filterable=False) # Dedicated url parameter should be used. - project_id = String(query_alias="projectId", is_filterable=False) - project = String(query_alias="projectId", is_filterable=False) + project_id = String(query_alias="project_id", is_filterable=False) + project = String(query_alias="project_id", is_filterable=False) # Pagination. @@ -467,20 +473,8 @@ def _activity_score(): # score = (count_errors * 25 + pagesVisited * 5 ) / 10; # score = Math.floor(Math.min(10, Math.max(1, score))); - error_weight = Function( - "multiply", - parameters=[Column("count_errors"), 25], - ) - pages_visited_weight = Function( - "multiply", - parameters=[ - Function( - "length", - parameters=[Column("urls_sorted")], - ), - 5, - ], - ) + error_weight = Function("multiply", parameters=[Column("count_errors"), 25]) + pages_visited_weight = Function("multiply", parameters=[Column("count_urls"), 5]) combined_weight = Function( "plus", @@ -549,10 +543,10 @@ def _activity_score(): "urls": ["urls_sorted", "agg_urls"], "url": ["urls_sorted", "agg_urls"], "count_errors": ["count_errors"], - "count_urls": ["count_urls", "urls_sorted", "agg_urls"], + "count_urls": ["count_urls"], "count_segments": ["count_segments"], "is_archived": ["is_archived"], - "activity": ["activity", "count_errors", "urls_sorted", "agg_urls"], + "activity": ["activity", "count_errors", "count_urls"], "user": ["user_id", "user_email", "user_name", "user_ip"], "os": ["os_name", "os_version"], "browser": ["browser_name", "browser_version"], @@ -582,18 +576,7 @@ def _activity_score(): QUERY_ALIAS_COLUMN_MAP = { "replay_id": _strip_uuid_dashes("replay_id", Column("replay_id")), - "replay_type": _grouped_unique_scalar_value(column_name="replay_type", alias="replay_type"), - "project_id": Function( - "toString", - parameters=[_grouped_unique_scalar_value(column_name="project_id", alias="agg_pid")], - alias="projectId", - ), - "platform": _grouped_unique_scalar_value(column_name="platform"), - "agg_environment": _grouped_unique_scalar_value( - column_name="environment", alias="agg_environment" - ), - "releases": _grouped_unique_values(column_name="release", alias="releases", aliased=True), - "dist": _grouped_unique_scalar_value(column_name="dist"), + "project_id": Column("project_id"), "trace_ids": Function( "arrayMap", parameters=[ @@ -637,13 +620,13 @@ def _activity_score(): ), "count_segments": Function("count", parameters=[Column("segment_id")], alias="count_segments"), "count_errors": Function( - "uniqArray", - parameters=[Column("error_ids")], + "sum", + parameters=[Function("length", parameters=[Column("error_ids")])], alias="count_errors", ), "count_urls": Function( - "length", - parameters=[Column("urls_sorted")], + "sum", + parameters=[Function("length", parameters=[Column("urls")])], alias="count_urls", ), "is_archived": Function( @@ -652,29 +635,31 @@ def _activity_score(): alias="isArchived", ), "activity": _activity_score(), - "user_id": _grouped_unique_scalar_value(column_name="user_id"), - "user_email": _grouped_unique_scalar_value(column_name="user_email"), - "user_name": _grouped_unique_scalar_value(column_name="user_name"), + "releases": _grouped_unique_values(column_name="release", alias="releases", aliased=True), + "replay_type": take_first_from_aggregation(column_name="replay_type", alias="replay_type"), + "platform": take_first_from_aggregation(column_name="platform"), + "agg_environment": take_first_from_aggregation( + column_name="environment", alias="agg_environment" + ), + "dist": take_first_from_aggregation(column_name="dist"), + "user_id": take_first_from_aggregation(column_name="user_id"), + "user_email": take_first_from_aggregation(column_name="user_email"), + "user_name": take_first_from_aggregation(column_name="user_name"), "user_ip": Function( "IPv4NumToString", - parameters=[ - _grouped_unique_scalar_value( - column_name="ip_address_v4", - aliased=False, - ) - ], + parameters=[take_first_from_aggregation(column_name="ip_address_v4", aliased=False)], alias="user_ip", ), - "os_name": _grouped_unique_scalar_value(column_name="os_name"), - "os_version": _grouped_unique_scalar_value(column_name="os_version"), - "browser_name": _grouped_unique_scalar_value(column_name="browser_name"), - "browser_version": _grouped_unique_scalar_value(column_name="browser_version"), - "device_name": _grouped_unique_scalar_value(column_name="device_name"), - "device_brand": _grouped_unique_scalar_value(column_name="device_brand"), - "device_family": _grouped_unique_scalar_value(column_name="device_family"), - "device_model": _grouped_unique_scalar_value(column_name="device_model"), - "sdk_name": _grouped_unique_scalar_value(column_name="sdk_name"), - "sdk_version": _grouped_unique_scalar_value(column_name="sdk_version"), + "os_name": take_first_from_aggregation(column_name="os_name"), + "os_version": take_first_from_aggregation(column_name="os_version"), + "browser_name": take_first_from_aggregation(column_name="browser_name"), + "browser_version": take_first_from_aggregation(column_name="browser_version"), + "device_name": take_first_from_aggregation(column_name="device_name"), + "device_brand": take_first_from_aggregation(column_name="device_brand"), + "device_family": take_first_from_aggregation(column_name="device_family"), + "device_model": take_first_from_aggregation(column_name="device_model"), + "sdk_name": take_first_from_aggregation(column_name="sdk_name"), + "sdk_version": take_first_from_aggregation(column_name="sdk_version"), "tk": Function("groupArrayArray", parameters=[Column("tags.key")], alias="tk"), "tv": Function("groupArrayArray", parameters=[Column("tags.value")], alias="tv"), } diff --git a/src/sentry/replays/serializers.py b/src/sentry/replays/serializers.py index 60aeecaa666b6f..380d6c14e83b4a 100644 --- a/src/sentry/replays/serializers.py +++ b/src/sentry/replays/serializers.py @@ -17,7 +17,7 @@ def serialize(self, obj: ReplayRecordingSegment, attrs, user): VALID_FIELD_SET = { "id", "title", - "projectId", + "project_id", "errorIds", "traceIds", "urls", diff --git a/tests/sentry/replays/test_organization_replay_index.py b/tests/sentry/replays/test_organization_replay_index.py index 8462a4ef1cb096..69fe424424ad22 100644 --- a/tests/sentry/replays/test_organization_replay_index.py +++ b/tests/sentry/replays/test_organization_replay_index.py @@ -68,6 +68,7 @@ def test_get_replays(self): # error_ids=[uuid.uuid4().hex, replay1_id], # duplicate error-id urls=["http://localhost:3000/"], # duplicate urls are okay tags={"test": "world", "other": "hello"}, + error_ids=[], ) ) @@ -434,6 +435,10 @@ def test_get_replays_user_filters(self): seq2_timestamp, project.id, replay1_id, + user_id=None, + user_name=None, + user_email=None, + ipv4=None, os_name=None, os_version=None, browser_name=None, @@ -443,6 +448,7 @@ def test_get_replays_user_filters(self): device_family=None, device_model=None, tags={"a": "n", "b": "o"}, + error_ids=[], ) ) diff --git a/tests/sentry/replays/test_project_replay_details.py b/tests/sentry/replays/test_project_replay_details.py index 86fb56395fe7ff..11d62f780010aa 100644 --- a/tests/sentry/replays/test_project_replay_details.py +++ b/tests/sentry/replays/test_project_replay_details.py @@ -109,6 +109,7 @@ def test_get_replay_schema(self): segment_id=1, trace_ids=[trace_id_2], urls=["http://www.sentry.io/"], + error_ids=[], ) ) self.store_replays( @@ -119,6 +120,7 @@ def test_get_replay_schema(self): segment_id=2, trace_ids=[trace_id_2], urls=["http://localhost:3000/"], + error_ids=[], ) )