From cea254d53244681e89d7087806bb9465e87b2505 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Mon, 20 Mar 2023 17:31:02 -0400 Subject: [PATCH 1/5] Add spec --- .../detectors/n_plus_one_api_calls_detector.py | 11 +++++++---- .../test_n_plus_one_api_calls_detector.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py index 4da85efc12cb74..1e2bb73afa8fcd 100644 --- a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py +++ b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py @@ -157,10 +157,12 @@ def is_span_eligible(cls, span: Span) -> bool: if description.strip()[:3].upper() != "GET": return False + url = get_url_from_span(span) + # GraphQL URLs have complicated queries in them. Until we parse those # queries to check for what's duplicated, we can't tell what is being # duplicated. Ignore them for now - if "graphql" in description: + if "graphql" in url: return False # Next.js infixes its data URLs with a build ID. (e.g., @@ -168,11 +170,9 @@ def is_span_eligible(cls, span: Span) -> bool: # explosion, since every deploy would change this ID and create new # fingerprints. Since we're not parameterizing URLs yet, we need to # exclude them - if "_next/data" in description: + if "_next/data" in url: return False - url = get_url_from_span(span) - # Next.js error pages cause an N+1 API Call that isn't useful to anyone if "__nextjs_original-stack-frame" in url: return False @@ -180,6 +180,9 @@ def is_span_eligible(cls, span: Span) -> bool: if not url: return False + # Once most users update their SDKs to use the latest standard, we + # won't have to do this, since the URLs will be sent in as `span.data` + # in a parsed format parsed_url = urlparse(str(url)) if parsed_url.netloc in cls.HOST_DENYLIST: diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py index 9f755cac0ecb39..b26180b68a646e 100644 --- a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py +++ b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py @@ -353,6 +353,16 @@ def test_allows_eligible_spans(span): "description": "GET http://service.io/resource?graphql=somequery", "hash": "a", }, + { + "span_id": "a", + "op": "http.client", + "description": "GET http://service.io/resource", # New JS SDK removes query string from description + "hash": "a", + "data": { + "http.query": "graphql=somequery", + "url": "http://service.io", + }, + }, { "span_id": "a", "op": "http.client", From 7c02b91c4793b0393032a10e73fa1d9e2f230eff Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Wed, 22 Mar 2023 09:53:42 -0400 Subject: [PATCH 2/5] Fix incorrect fixtures The URL in the data is never different than the one in the description! --- .../performance_issues/test_n_plus_one_api_calls_detector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py index b26180b68a646e..288f64c4b3a3d2 100644 --- a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py +++ b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py @@ -343,7 +343,7 @@ def test_allows_eligible_spans(span): { "span_id": "a", "op": "http.client", - "description": "GET http://service.io/resource", + "description": "GET /resource.js", "hash": "a", "data": {"url": "/resource.js"}, }, @@ -360,7 +360,7 @@ def test_allows_eligible_spans(span): "hash": "a", "data": { "http.query": "graphql=somequery", - "url": "http://service.io", + "url": "http://service.io/resource", }, }, { From a3a2b576e423945ef0b97ab2686d9185898c01a2 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Wed, 22 Mar 2023 10:05:50 -0400 Subject: [PATCH 3/5] Improve span URL extraction Clarify, and add handling for cases where span data has `http.query` information. --- src/sentry/utils/performance_issues/base.py | 49 ++++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py index 7235ae4a19b522..72b376f2f71282 100644 --- a/src/sentry/utils/performance_issues/base.py +++ b/src/sentry/utils/performance_issues/base.py @@ -167,19 +167,44 @@ def get_duration_between_spans(first_span: Span, second_span: Span): def get_url_from_span(span: Span) -> str: + """ + Parses the span data and pulls out the URL. Accounts for different SDKs and + different versions of SDKs formatting and parsing the URL contents + differently. + """ + data = span.get("data") or {} - url = data.get("url") or "" - if not url: - # If data is missing, fall back to description - description = span.get("description") or "" - parts = description.split(" ", 1) - if len(parts) == 2: - url = parts[1] - - if type(url) is dict: - url = url.get("pathname") or "" - - return url + + # The most modern and version is to provide URL information in the span + # data + url_data = data.get("url") + + if type(url_data) is dict: + # Some transactions mysteriously provide the URL as a dict that looks + # like JavaScript's URL object + url = url_data.get("pathname") or "" + url += url_data.get("search") or "" + return url + + if type(url_data) is str: + # But usually the URL is a regular string, and so is the query. If + # `http.query` is absent, `url` contains the query + url = url_data + query_data = data.get("http.query") + if type(query_data) is str and len(query_data) > 0: + url += f"?{query_data}" + + return url + + # Attempt to parse the full URL from the span description, in case + # the previous approaches did not yield a good result + description = span.get("description") or "" + parts = description.split(" ", 1) + if len(parts) == 2: + url = parts[1] + return url + + return "" def fingerprint_spans(spans: List[Span], unique_only: bool = False): From 93307509ad6eaa53fc6385366e744ec266e46fda Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Wed, 22 Mar 2023 11:50:51 -0400 Subject: [PATCH 4/5] Fix typo in docs --- src/sentry/utils/performance_issues/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py index 72b376f2f71282..1b20cf865dbced 100644 --- a/src/sentry/utils/performance_issues/base.py +++ b/src/sentry/utils/performance_issues/base.py @@ -175,7 +175,7 @@ def get_url_from_span(span: Span) -> str: data = span.get("data") or {} - # The most modern and version is to provide URL information in the span + # The most modern version is to provide URL information in the span # data url_data = data.get("url") From cf8950febf165bffc87f444a9c288b291786f1d3 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Wed, 22 Mar 2023 14:20:58 -0400 Subject: [PATCH 5/5] Update doc string --- src/sentry/utils/performance_issues/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py index 1b20cf865dbced..8c468fb13bb342 100644 --- a/src/sentry/utils/performance_issues/base.py +++ b/src/sentry/utils/performance_issues/base.py @@ -187,8 +187,10 @@ def get_url_from_span(span: Span) -> str: return url if type(url_data) is str: - # But usually the URL is a regular string, and so is the query. If - # `http.query` is absent, `url` contains the query + # Usually the URL is a regular string, and so is the query. This + # is the standardized format for all SDKs, and is the preferred + # format going forward. Otherwise, if `http.query` is absent, `url` + # contains the query. url = url_data query_data = data.get("http.query") if type(query_data) is str and len(query_data) > 0: