From cea254d53244681e89d7087806bb9465e87b2505 Mon Sep 17 00:00:00 2001
From: George Gritsouk <989898+gggritso@users.noreply.github.com>
Date: Mon, 20 Mar 2023 17:31:02 -0400
Subject: [PATCH 1/5] Add spec

---
 .../detectors/n_plus_one_api_calls_detector.py        | 11 +++++++----
 .../test_n_plus_one_api_calls_detector.py             | 10 ++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py
index 4da85efc12cb74..1e2bb73afa8fcd 100644
--- a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py
+++ b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py
@@ -157,10 +157,12 @@ def is_span_eligible(cls, span: Span) -> bool:
         if description.strip()[:3].upper() != "GET":
             return False
 
+        url = get_url_from_span(span)
+
         # GraphQL URLs have complicated queries in them. Until we parse those
         # queries to check for what's duplicated, we can't tell what is being
         # duplicated. Ignore them for now
-        if "graphql" in description:
+        if "graphql" in url:
             return False
 
         # Next.js infixes its data URLs with a build ID. (e.g.,
@@ -168,11 +170,9 @@ def is_span_eligible(cls, span: Span) -> bool:
         # explosion, since every deploy would change this ID and create new
         # fingerprints. Since we're not parameterizing URLs yet, we need to
         # exclude them
-        if "_next/data" in description:
+        if "_next/data" in url:
             return False
 
-        url = get_url_from_span(span)
-
         # Next.js error pages cause an N+1 API Call that isn't useful to anyone
         if "__nextjs_original-stack-frame" in url:
             return False
@@ -180,6 +180,9 @@ def is_span_eligible(cls, span: Span) -> bool:
         if not url:
             return False
 
+        # Once most users update their SDKs to use the latest standard, we
+        # won't have to do this, since the URLs will be sent in as `span.data`
+        # in a parsed format
         parsed_url = urlparse(str(url))
 
         if parsed_url.netloc in cls.HOST_DENYLIST:
diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
index 9f755cac0ecb39..b26180b68a646e 100644
--- a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
+++ b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
@@ -353,6 +353,16 @@ def test_allows_eligible_spans(span):
             "description": "GET http://service.io/resource?graphql=somequery",
             "hash": "a",
         },
+        {
+            "span_id": "a",
+            "op": "http.client",
+            "description": "GET http://service.io/resource",  # New JS SDK removes query string from description
+            "hash": "a",
+            "data": {
+                "http.query": "graphql=somequery",
+                "url": "http://service.io",
+            },
+        },
         {
             "span_id": "a",
             "op": "http.client",

From 7c02b91c4793b0393032a10e73fa1d9e2f230eff Mon Sep 17 00:00:00 2001
From: George Gritsouk <989898+gggritso@users.noreply.github.com>
Date: Wed, 22 Mar 2023 09:53:42 -0400
Subject: [PATCH 2/5] Fix incorrect fixtures

The URL in the data is never different than the one in the description!
---
 .../performance_issues/test_n_plus_one_api_calls_detector.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
index b26180b68a646e..288f64c4b3a3d2 100644
--- a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
+++ b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py
@@ -343,7 +343,7 @@ def test_allows_eligible_spans(span):
         {
             "span_id": "a",
             "op": "http.client",
-            "description": "GET http://service.io/resource",
+            "description": "GET /resource.js",
             "hash": "a",
             "data": {"url": "/resource.js"},
         },
@@ -360,7 +360,7 @@ def test_allows_eligible_spans(span):
             "hash": "a",
             "data": {
                 "http.query": "graphql=somequery",
-                "url": "http://service.io",
+                "url": "http://service.io/resource",
             },
         },
         {

From a3a2b576e423945ef0b97ab2686d9185898c01a2 Mon Sep 17 00:00:00 2001
From: George Gritsouk <989898+gggritso@users.noreply.github.com>
Date: Wed, 22 Mar 2023 10:05:50 -0400
Subject: [PATCH 3/5] Improve span URL extraction

Clarify, and add handling for cases where span data has `http.query`
information.
---
 src/sentry/utils/performance_issues/base.py | 49 ++++++++++++++++-----
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py
index 7235ae4a19b522..72b376f2f71282 100644
--- a/src/sentry/utils/performance_issues/base.py
+++ b/src/sentry/utils/performance_issues/base.py
@@ -167,19 +167,44 @@ def get_duration_between_spans(first_span: Span, second_span: Span):
 
 
 def get_url_from_span(span: Span) -> str:
+    """
+    Parses the span data and pulls out the URL. Accounts for different SDKs and
+    different versions of SDKs formatting and parsing the URL contents
+    differently.
+    """
+
     data = span.get("data") or {}
-    url = data.get("url") or ""
-    if not url:
-        # If data is missing, fall back to description
-        description = span.get("description") or ""
-        parts = description.split(" ", 1)
-        if len(parts) == 2:
-            url = parts[1]
-
-    if type(url) is dict:
-        url = url.get("pathname") or ""
-
-    return url
+
+    # The most modern and version is to provide URL information in the span
+    # data
+    url_data = data.get("url")
+
+    if type(url_data) is dict:
+        # Some transactions mysteriously provide the URL as a dict that looks
+        # like JavaScript's URL object
+        url = url_data.get("pathname") or ""
+        url += url_data.get("search") or ""
+        return url
+
+    if type(url_data) is str:
+        # But usually the URL is a regular string, and so is the query. If
+        # `http.query` is absent, `url` contains the query
+        url = url_data
+        query_data = data.get("http.query")
+        if type(query_data) is str and len(query_data) > 0:
+            url += f"?{query_data}"
+
+        return url
+
+    # Attempt to parse the full URL from the span description, in case
+    # the previous approaches did not yield a good result
+    description = span.get("description") or ""
+    parts = description.split(" ", 1)
+    if len(parts) == 2:
+        url = parts[1]
+        return url
+
+    return ""
 
 
 def fingerprint_spans(spans: List[Span], unique_only: bool = False):

From 93307509ad6eaa53fc6385366e744ec266e46fda Mon Sep 17 00:00:00 2001
From: George Gritsouk <989898+gggritso@users.noreply.github.com>
Date: Wed, 22 Mar 2023 11:50:51 -0400
Subject: [PATCH 4/5] Fix typo in docs

---
 src/sentry/utils/performance_issues/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py
index 72b376f2f71282..1b20cf865dbced 100644
--- a/src/sentry/utils/performance_issues/base.py
+++ b/src/sentry/utils/performance_issues/base.py
@@ -175,7 +175,7 @@ def get_url_from_span(span: Span) -> str:
 
     data = span.get("data") or {}
 
-    # The most modern and version is to provide URL information in the span
+    # The most modern version is to provide URL information in the span
     # data
     url_data = data.get("url")
 

From cf8950febf165bffc87f444a9c288b291786f1d3 Mon Sep 17 00:00:00 2001
From: George Gritsouk <989898+gggritso@users.noreply.github.com>
Date: Wed, 22 Mar 2023 14:20:58 -0400
Subject: [PATCH 5/5] Update doc string

---
 src/sentry/utils/performance_issues/base.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py
index 1b20cf865dbced..8c468fb13bb342 100644
--- a/src/sentry/utils/performance_issues/base.py
+++ b/src/sentry/utils/performance_issues/base.py
@@ -187,8 +187,10 @@ def get_url_from_span(span: Span) -> str:
         return url
 
     if type(url_data) is str:
-        # But usually the URL is a regular string, and so is the query. If
-        # `http.query` is absent, `url` contains the query
+        # Usually the URL is a regular string, and so is the query. This
+        # is the standardized format for all SDKs, and is the preferred
+        # format going forward. Otherwise, if `http.query` is absent, `url`
+        # contains the query.
         url = url_data
         query_data = data.get("http.query")
         if type(query_data) is str and len(query_data) > 0: