Skip to content

Commit a71bc4c

Browse files
getsentry-botmichellewzhang
authored andcommitted
Revert "feat(replay): query IP for trace connected errors for replay summary (#97737)"
This reverts commit b71d79f. Co-authored-by: michellewzhang <[email protected]>
1 parent ee78b73 commit a71bc4c

File tree

3 files changed

+42
-366
lines changed

3 files changed

+42
-366
lines changed

src/sentry/replays/lib/summarize.py

Lines changed: 31 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@
88

99
from sentry import nodestore
1010
from sentry.constants import ObjectStatus
11-
from sentry.issues.grouptype import FeedbackGroup
1211
from sentry.models.project import Project
13-
from sentry.replays.query import query_trace_connected_events
1412
from sentry.replays.usecases.ingest.event_parser import EventType
1513
from sentry.replays.usecases.ingest.event_parser import (
1614
get_timestamp_ms as get_replay_event_timestamp_ms,
1715
)
1816
from sentry.replays.usecases.ingest.event_parser import parse_network_content_lengths, which
19-
from sentry.search.events.types import SnubaParams
17+
from sentry.search.events.builder.discover import DiscoverQueryBuilder
18+
from sentry.search.events.types import QueryBuilderConfig, SnubaParams
2019
from sentry.services.eventstore.models import Event
20+
from sentry.snuba.dataset import Dataset
2121
from sentry.snuba.referrer import Referrer
2222
from sentry.utils import json
23+
from sentry.utils.snuba import bulk_snuba_queries
2324

2425
logger = logging.getLogger(__name__)
2526

@@ -98,86 +99,54 @@ def fetch_trace_connected_errors(
9899
organization=project.organization,
99100
)
100101

101-
# Query errors dataset
102-
error_query = query_trace_connected_events(
103-
dataset_label="errors",
102+
# Generate a query for each trace ID. This will be executed in bulk.
103+
error_query = DiscoverQueryBuilder(
104+
Dataset.Events,
105+
params={},
106+
snuba_params=snuba_params,
107+
query=f"trace:{trace_id}",
104108
selected_columns=[
105109
"id",
106110
"timestamp_ms",
107111
"timestamp",
108112
"title",
109113
"message",
110114
],
111-
query=f"trace:{trace_id}",
112-
snuba_params=snuba_params,
113115
orderby=["id"],
114116
limit=100,
115-
referrer=Referrer.API_REPLAY_SUMMARIZE_BREADCRUMBS.value,
117+
config=QueryBuilderConfig(
118+
auto_fields=False,
119+
),
116120
)
117121
queries.append(error_query)
118122

119-
# Query issuePlatform dataset - this returns all other IP events,
120-
# such as feedback and performance issues.
121-
issue_query = query_trace_connected_events(
122-
dataset_label="issuePlatform",
123-
selected_columns=[
124-
"event_id",
125-
"title",
126-
"subtitle",
127-
"timestamp",
128-
"occurrence_type_id",
129-
],
130-
query=f"trace:{trace_id}",
131-
snuba_params=snuba_params,
132-
orderby=["event_id"],
133-
limit=100,
134-
referrer=Referrer.API_REPLAY_SUMMARIZE_BREADCRUMBS.value,
135-
)
136-
queries.append(issue_query)
137-
138123
if not queries:
139124
return []
140125

126+
# Execute all queries
127+
results = bulk_snuba_queries(
128+
[query.get_snql_query() for query in queries],
129+
referrer=Referrer.API_REPLAY_SUMMARIZE_BREADCRUMBS.value,
130+
)
131+
141132
# Process results and convert to EventDict objects
142133
error_events = []
143-
seen_event_ids = set() # Track seen event IDs to avoid duplicates
144-
145-
for query in queries:
146-
result = query
147-
error_data = result["data"]
134+
for result, query in zip(results, queries):
135+
error_data = query.process_results(result)["data"]
148136

149137
for event in error_data:
150-
event_id = event.get("id") or event.get("event_id")
151-
152-
# Skip if we've already seen this event
153-
if event_id in seen_event_ids:
154-
continue
155-
156-
seen_event_ids.add(event_id)
157-
158138
timestamp = _parse_iso_timestamp_to_ms(
159139
event.get("timestamp_ms")
160140
) or _parse_iso_timestamp_to_ms(event.get("timestamp"))
161-
message = event.get("subtitle", "") or event.get("message", "")
162-
163-
if event.get("occurrence_type_id") == FeedbackGroup.type_id:
164-
category = "feedback"
165-
else:
166-
category = "error"
167141

168-
# NOTE: The issuePlatform dataset query can return feedback.
169-
# We also fetch feedback from nodestore in fetch_feedback_details
170-
# for feedback breadcrumbs.
171-
# We avoid creating duplicate feedback logs
172-
# by filtering for unique feedback IDs during log generation.
173142
if timestamp:
174143
error_events.append(
175144
EventDict(
176-
category=category,
177-
id=event_id,
145+
category="error",
146+
id=event["id"],
178147
title=event.get("title", ""),
179148
timestamp=timestamp,
180-
message=message,
149+
message=event.get("message", ""),
181150
)
182151
)
183152

@@ -238,7 +207,7 @@ def get_summary_logs(
238207
error_events: list[EventDict],
239208
project_id: int,
240209
) -> list[str]:
241-
# Sort error events by timestamp. This list includes all feedback events still.
210+
# Sort error events by timestamp
242211
error_events.sort(key=lambda x: x["timestamp"])
243212
return list(generate_summary_logs(segment_data, error_events, project_id))
244213

@@ -248,12 +217,8 @@ def generate_summary_logs(
248217
error_events: list[EventDict],
249218
project_id,
250219
) -> Generator[str]:
251-
"""
252-
Generate log messages from events and errors in chronological order.
253-
Avoid processing duplicate feedback events.
254-
"""
220+
"""Generate log messages from events and errors in chronological order."""
255221
error_idx = 0
256-
seen_feedback_ids = set()
257222

258223
# Process segments
259224
for _, segment in segment_data:
@@ -267,39 +232,23 @@ def generate_summary_logs(
267232
error_idx < len(error_events) and error_events[error_idx]["timestamp"] < timestamp
268233
):
269234
error = error_events[error_idx]
270-
271-
if error["category"] == "error":
272-
yield generate_error_log_message(error)
273-
elif error["category"] == "feedback":
274-
seen_feedback_ids.add(error["id"])
275-
yield generate_feedback_log_message(error)
276-
235+
yield generate_error_log_message(error)
277236
error_idx += 1
278237

279238
# Yield the current event's log message
280239
if event_type == EventType.FEEDBACK:
281240
feedback_id = event["data"]["payload"].get("data", {}).get("feedbackId")
282-
# Filter out duplicate feedback events.
283-
if feedback_id not in seen_feedback_ids:
284-
seen_feedback_ids.add(feedback_id)
285-
feedback = fetch_feedback_details(feedback_id, project_id)
286-
287-
if feedback:
288-
yield generate_feedback_log_message(feedback)
241+
feedback = fetch_feedback_details(feedback_id, project_id)
242+
if feedback:
243+
yield generate_feedback_log_message(feedback)
289244

290245
elif message := as_log_message(event):
291246
yield message
292247

293248
# Yield any remaining error messages
294249
while error_idx < len(error_events):
295250
error = error_events[error_idx]
296-
297-
if error["category"] == "error":
298-
yield generate_error_log_message(error)
299-
elif error["category"] == "feedback":
300-
seen_feedback_ids.add(error["id"])
301-
yield generate_feedback_log_message(error)
302-
251+
yield generate_error_log_message(error)
303252
error_idx += 1
304253

305254

src/sentry/replays/query.py

Lines changed: 1 addition & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from collections.abc import Generator, Sequence
44
from datetime import datetime
5-
from typing import Any, Literal
5+
from typing import Any
66

77
from snuba_sdk import (
88
Column,
@@ -34,8 +34,6 @@
3434
make_full_aggregation_query,
3535
query_using_optimized_search,
3636
)
37-
from sentry.search.events.types import SnubaParams
38-
from sentry.snuba.utils import get_dataset
3937
from sentry.utils.snuba import raw_snql_query
4038

4139
MAX_PAGE_SIZE = 100
@@ -904,55 +902,3 @@ def compute_has_viewed(viewed_by_id: int | None) -> Function:
904902
],
905903
alias="has_viewed",
906904
)
907-
908-
909-
def query_trace_connected_events(
910-
dataset_label: Literal["errors", "issuePlatform", "discover"],
911-
selected_columns: list[str],
912-
query: str | None,
913-
snuba_params: SnubaParams,
914-
equations: list[str] | None = None,
915-
orderby: list[str] | None = None,
916-
offset: int = 0,
917-
limit: int = 10,
918-
referrer: str = "api.replay.details-page",
919-
) -> dict[str, Any]:
920-
"""
921-
Query for trace-connected events, with a reusable query configuration for replays.
922-
923-
Args:
924-
dataset: The Snuba dataset to query against
925-
selected_columns: List of columns to select
926-
query: Optional query string
927-
snuba_params: Snuba parameters including project IDs, time range, etc.
928-
equations: Optional list of equations
929-
orderby: Optional ordering specification
930-
offset: Pagination offset
931-
limit: Pagination limit
932-
referrer: Referrer string for tracking
933-
934-
Returns:
935-
Query result from the dataset
936-
"""
937-
query_details = {
938-
"selected_columns": selected_columns,
939-
"query": query,
940-
"snuba_params": snuba_params,
941-
"equations": equations,
942-
"orderby": orderby,
943-
"offset": offset,
944-
"limit": limit,
945-
"referrer": referrer,
946-
"auto_fields": True,
947-
"auto_aggregations": True,
948-
"use_aggregate_conditions": True,
949-
"allow_metric_aggregates": False,
950-
"transform_alias_to_input_format": True,
951-
}
952-
953-
dataset = get_dataset(dataset_label)
954-
955-
if dataset is None:
956-
raise ValueError(f"Unknown dataset: {dataset_label}")
957-
958-
return dataset.query(**query_details)

0 commit comments

Comments
 (0)