From a51b3082eebe0148d8b4c55e2ebcc67f8ae55920 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Tue, 14 Feb 2023 14:03:08 +0100
Subject: [PATCH 01/28] feat(source-maps): Implement new tables

---
 src/sentry/models/artifactbundle.py | 55 +++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 src/sentry/models/artifactbundle.py

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
new file mode 100644
index 00000000000000..b98ff7c5e60896
--- /dev/null
+++ b/src/sentry/models/artifactbundle.py
@@ -0,0 +1,55 @@
+from django.db import models
+
+from sentry.db.models import (
+    BoundedBigIntegerField,
+    BoundedPositiveIntegerField,
+    FlexibleForeignKey,
+    Model,
+    region_silo_only_model,
+)
+
+
+@region_silo_only_model
+class ArtifactBundle(Model):
+    __include_in_export__ = False
+
+    bundle_id = models.CharField(max_length=32, null=True)
+    organization_id = BoundedBigIntegerField(db_index=True)
+    # TODO: define max length.
+    release_name = models.CharField(null=True)
+    dist_id = BoundedBigIntegerField(null=True)
+    file = FlexibleForeignKey("sentry.File")
+    artifact_count = BoundedPositiveIntegerField()
+
+    class Meta:
+        app_label = "sentry"
+        db_table = "sentry_artifactbundlefile"
+
+        unique_together = (("organization_id", "bundle_id"),)
+        index_together = (("organization_id", "release_name"),)
+
+
+@region_silo_only_model
+class DebugIdIndex(Model):
+    __include_in_export__ = False
+
+    debug_id = models.CharField(max_length=32, unique=True, db_index=True)
+    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
+
+    class Meta:
+        app_label = "sentry"
+        db_table = "sentry_debugidindex"
+
+
+@region_silo_only_model
+class ProjectArtifactBundleIndex(Model):
+    __include_in_export__ = False
+
+    project_id = BoundedBigIntegerField(db_index=True)
+    artifact_bundle_file = FlexibleForeignKey("sentry.ArtifactBundle")
+
+    class Meta:
+        app_label = "sentry"
+        db_table = "sentry_projectartifactbundleindex"
+
+        unique_together = (("project_id", "artifact_bundle"),)

From 2e417801e4296c98ca0ab94d38c56b18da26a2aa Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Tue, 14 Feb 2023 15:39:58 +0100
Subject: [PATCH 02/28] Improve models

---
 src/sentry/models/artifactbundle.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
index b98ff7c5e60896..33ba17d932f0c6 100644
--- a/src/sentry/models/artifactbundle.py
+++ b/src/sentry/models/artifactbundle.py
@@ -7,16 +7,16 @@
     Model,
     region_silo_only_model,
 )
+from sentry.models import DB_VERSION_LENGTH
 
 
 @region_silo_only_model
 class ArtifactBundle(Model):
     __include_in_export__ = False
 
-    bundle_id = models.CharField(max_length=32, null=True)
+    bundle_id = models.UUIDField(null=True)
     organization_id = BoundedBigIntegerField(db_index=True)
-    # TODO: define max length.
-    release_name = models.CharField(null=True)
+    release_name = models.CharField(max_length=DB_VERSION_LENGTH, null=True)
     dist_id = BoundedBigIntegerField(null=True)
     file = FlexibleForeignKey("sentry.File")
     artifact_count = BoundedPositiveIntegerField()
@@ -25,31 +25,34 @@ class Meta:
         app_label = "sentry"
         db_table = "sentry_artifactbundlefile"
 
-        unique_together = (("organization_id", "bundle_id"),)
+        unique_together = (
+            ("organization_id", "bundle_id"),
+            ("organization_id", "release_name"),
+        )
         index_together = (("organization_id", "release_name"),)
 
 
 @region_silo_only_model
-class DebugIdIndex(Model):
+class DebugIdArtifactBundle(Model):
     __include_in_export__ = False
 
-    debug_id = models.CharField(max_length=32, unique=True, db_index=True)
+    debug_id = models.UUIDField(unique=True, db_index=True)
     artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
 
     class Meta:
         app_label = "sentry"
-        db_table = "sentry_debugidindex"
+        db_table = "sentry_debugidartifactbundle"
 
 
 @region_silo_only_model
-class ProjectArtifactBundleIndex(Model):
+class ProjectArtifactBundle(Model):
     __include_in_export__ = False
 
     project_id = BoundedBigIntegerField(db_index=True)
-    artifact_bundle_file = FlexibleForeignKey("sentry.ArtifactBundle")
+    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
 
     class Meta:
         app_label = "sentry"
-        db_table = "sentry_projectartifactbundleindex"
+        db_table = "sentry_projectartifactbundle"
 
         unique_together = (("project_id", "artifact_bundle"),)

From 64f7794b539478ed51e867f8ff464c3ad26d142a Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 16 Feb 2023 13:52:20 +0100
Subject: [PATCH 03/28] Add date information

---
 src/sentry/models/artifactbundle.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
index 33ba17d932f0c6..54081529d3b312 100644
--- a/src/sentry/models/artifactbundle.py
+++ b/src/sentry/models/artifactbundle.py
@@ -1,4 +1,5 @@
 from django.db import models
+from django.utils import timezone
 
 from sentry.db.models import (
     BoundedBigIntegerField,
@@ -29,20 +30,29 @@ class Meta:
             ("organization_id", "bundle_id"),
             ("organization_id", "release_name"),
         )
-        index_together = (("organization_id", "release_name"),)
 
 
 @region_silo_only_model
 class DebugIdArtifactBundle(Model):
     __include_in_export__ = False
 
-    debug_id = models.UUIDField(unique=True, db_index=True)
+    debug_id = models.UUIDField()
     artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
+    upload_date = models.DateTimeField(default=timezone.now)
 
     class Meta:
         app_label = "sentry"
         db_table = "sentry_debugidartifactbundle"
 
+        # We can have the same debug_id pointing to different artifact_bundle(s) because the user might upload
+        # the same artifacts twice, or they might have certain build files that don't change across builds.
+        unique_together = (
+            (
+                "debug_id",
+                "artifact_bundle",
+            ),
+        )
+
 
 @region_silo_only_model
 class ProjectArtifactBundle(Model):

From 8bd08e6d57b5d4428037c512877c30cdc1793a62 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 20 Feb 2023 14:36:53 +0100
Subject: [PATCH 04/28] Change table name

---
 src/sentry/models/artifactbundle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
index 54081529d3b312..fa1587ab7cc68a 100644
--- a/src/sentry/models/artifactbundle.py
+++ b/src/sentry/models/artifactbundle.py
@@ -24,7 +24,7 @@ class ArtifactBundle(Model):
 
     class Meta:
         app_label = "sentry"
-        db_table = "sentry_artifactbundlefile"
+        db_table = "sentry_artifactbundle"
 
         unique_together = (
             ("organization_id", "bundle_id"),

From 301820529b8b41522900694875415fb43e8c4348 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 20 Feb 2023 15:49:37 +0100
Subject: [PATCH 05/28] Add new datetime fields

---
 src/sentry/models/artifactbundle.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
index fa1587ab7cc68a..bb2c02040cfabc 100644
--- a/src/sentry/models/artifactbundle.py
+++ b/src/sentry/models/artifactbundle.py
@@ -38,7 +38,8 @@ class DebugIdArtifactBundle(Model):
 
     debug_id = models.UUIDField()
     artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
-    upload_date = models.DateTimeField(default=timezone.now)
+    date_added = models.DateTimeField(default=timezone.now)
+    date_last_accessed = models.DateTimeField(default=timezone.now)
 
     class Meta:
         app_label = "sentry"

From 0c2702c7bcb404262b44bf944a3f340773f0e0e7 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Tue, 21 Feb 2023 15:19:17 +0100
Subject: [PATCH 06/28] Update tables

---
 src/sentry/models/artifactbundle.py | 47 +++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
index bb2c02040cfabc..1c5f0822d9d273 100644
--- a/src/sentry/models/artifactbundle.py
+++ b/src/sentry/models/artifactbundle.py
@@ -1,3 +1,5 @@
+from enum import Enum
+
 from django.db import models
 from django.utils import timezone
 
@@ -11,14 +13,23 @@
 from sentry.models import DB_VERSION_LENGTH
 
 
+class SourceFileType(Enum):
+    SOURCE = 1
+    MINIFIED_SOURCE = 2
+    SOURCE_MAP = 3
+    INDEXED_RAM_BUNDLE = 4
+
+    @classmethod
+    def choices(cls):
+        return [(key.value, key.name) for key in cls]
+
+
 @region_silo_only_model
 class ArtifactBundle(Model):
     __include_in_export__ = False
 
-    bundle_id = models.UUIDField(null=True)
     organization_id = BoundedBigIntegerField(db_index=True)
-    release_name = models.CharField(max_length=DB_VERSION_LENGTH, null=True)
-    dist_id = BoundedBigIntegerField(null=True)
+    bundle_id = models.UUIDField(null=True)
     file = FlexibleForeignKey("sentry.File")
     artifact_count = BoundedPositiveIntegerField()
 
@@ -26,18 +37,33 @@ class Meta:
         app_label = "sentry"
         db_table = "sentry_artifactbundle"
 
-        unique_together = (
-            ("organization_id", "bundle_id"),
-            ("organization_id", "release_name"),
-        )
+        unique_together = (("organization_id", "bundle_id"),)
+
+
+@region_silo_only_model
+class ReleaseArtifactBundle(Model):
+    __include_in_export__ = False
+
+    organization_id = BoundedBigIntegerField(db_index=True)
+    release_name = models.CharField(max_length=DB_VERSION_LENGTH)
+    dist_id = BoundedBigIntegerField(null=True)
+    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
+
+    class Meta:
+        app_label = "sentry"
+        db_table = "sentry_releaseartifactbundle"
+
+        unique_together = (("organization_id", "release_name", "dist_id", "artifact_bundle"),)
 
 
 @region_silo_only_model
 class DebugIdArtifactBundle(Model):
     __include_in_export__ = False
 
+    organization_id = BoundedBigIntegerField(db_index=True)
     debug_id = models.UUIDField()
     artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
+    source_file_type = models.IntegerField(choices=SourceFileType.choices())
     date_added = models.DateTimeField(default=timezone.now)
     date_last_accessed = models.DateTimeField(default=timezone.now)
 
@@ -47,12 +73,7 @@ class Meta:
 
         # We can have the same debug_id pointing to different artifact_bundle(s) because the user might upload
         # the same artifacts twice, or they might have certain build files that don't change across builds.
-        unique_together = (
-            (
-                "debug_id",
-                "artifact_bundle",
-            ),
-        )
+        unique_together = (("debug_id", "artifact_bundle", "source_file_type"),)
 
 
 @region_silo_only_model

From 71be64b31591cb2046b0b26cb9690b7711df6851 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 13:14:01 +0100
Subject: [PATCH 07/28] Improve

---
 src/sentry/lang/javascript/processor.py       | 492 +++++++++++-------
 .../sentry/lang/javascript/test_processor.py  |   8 +-
 .../sentry/lang/javascript/test_sourcemaps.py |   6 +-
 3 files changed, 313 insertions(+), 193 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index ec28d017bfd65d..96a9be4c8451a7 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -18,6 +18,7 @@
 from django.utils.encoding import force_bytes, force_text
 from requests.utils import get_encoding_from_headers
 from symbolic import SourceMapCache as SmCache
+from symbolic import SourceView
 
 from sentry import features, http, options
 from sentry.event_manager import set_tag
@@ -38,7 +39,7 @@
 from sentry.utils.safe import get_path
 from sentry.utils.urls import non_standard_url_join
 
-from .cache import SourceCache, SourceMapCache
+from .cache import SourceMapCache
 
 __all__ = ["JavaScriptStacktraceProcessor"]
 
@@ -576,127 +577,146 @@ def fetch_release_artifact(url, release, dist):
     return result
 
 
-def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
-    """
-    Pull down a URL, returning a UrlResult object.
-
-    Attempts to fetch from the database first (assuming there's a release on the
-    event), then the internet. Caches the result of each of those two attempts
-    separately, whether or not those attempts are successful. Used for both
-    source files and source maps.
-    """
-    # If our url has been truncated, it'd be impossible to fetch
-    # so we check for this early and bail
-    if url[-3:] == "...":
-        raise http.CannotFetch({"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)})
-
-    # if we've got a release to look on, try that first (incl associated cache)
-    if release:
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.fetch_file.fetch_release_artifact"
-        ):
-            result = fetch_release_artifact(url, release, dist)
-    else:
-        result = None
+class Fetcher:
+    def __init__(self, project, release, dist, allow_scraping):
+        self.project = project
+        self.release = release
+        self.dist = dist
+        self.allow_scraping = allow_scraping
 
-    # otherwise, try the web-scraping cache and then the web itself
+        self.sourcemap_cache = SourceMapCache()
 
-    cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}"
+    def fetch_by_debug_id(self, debug_id, source_file_type):
+        pass
 
-    if result is None:
-        if not allow_scraping or not url.startswith(("http:", "https:")):
-            error = {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)}
-            raise http.CannotFetch(error)
+    def fetch_by_url(self, url):
+        """
+        Pull down a URL, returning a UrlResult object.
 
-        logger.debug("Checking cache for url %r", url)
-        result = cache.get(cache_key)
-        if result is not None:
-            # Previous caches would be a 3-tuple instead of a 4-tuple,
-            # so this is being maintained for backwards compatibility
-            try:
-                encoding = result[4]
-            except IndexError:
-                encoding = None
-            # We got a cache hit, but the body is compressed, so we
-            # need to decompress it before handing it off
-            result = http.UrlResult(
-                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
+        Attempts to fetch from the database first (assuming there's a release on the
+        event), then the internet. Caches the result of each of those two attempts
+        separately, whether or not those attempts are successful. Used for both
+        source files and source maps.
+        """
+        # If our url has been truncated, it'd be impossible to fetch
+        # so we check for this early and bail
+        if url[-3:] == "...":
+            raise http.CannotFetch(
+                {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)}
             )
 
-    if result is None:
-        headers = {}
-        verify_ssl = False
-        if project and is_valid_origin(url, project=project):
-            verify_ssl = bool(project.get_option("sentry:verify_ssl", False))
-            token = project.get_option("sentry:token")
-            if token:
-                token_header = project.get_option("sentry:token_header") or "X-Sentry-Token"
-                headers[token_header] = token
-
-        with metrics.timer("sourcemaps.fetch"):
-            with sentry_sdk.start_span(op="JavaScriptStacktraceProcessor.fetch_file.http"):
-                result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
+        # if we've got a release to look on, try that first (incl associated cache)
+        if self.release:
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.fetch_file.compress_for_cache"
+                op="JavaScriptStacktraceProcessor.fetch_file.fetch_release_artifact"
             ):
-                z_body = zlib.compress(result.body)
-            cache.set(
-                cache_key,
-                (url, result.headers, z_body, result.status, result.encoding),
-                get_max_age(result.headers),
+                result = fetch_release_artifact(url, self.release, self.dist)
+        else:
+            result = None
+
+        # otherwise, try the web-scraping cache and then the web itself
+
+        cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}"
+
+        if result is None:
+            if not self.allow_scraping or not url.startswith(("http:", "https:")):
+                error = {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)}
+                raise http.CannotFetch(error)
+
+            logger.debug("Checking cache for url %r", url)
+            result = cache.get(cache_key)
+            if result is not None:
+                # Previous caches would be a 3-tuple instead of a 4-tuple,
+                # so this is being maintained for backwards compatibility
+                try:
+                    encoding = result[4]
+                except IndexError:
+                    encoding = None
+                # We got a cache hit, but the body is compressed, so we
+                # need to decompress it before handing it off
+                result = http.UrlResult(
+                    result[0], result[1], zlib.decompress(result[2]), result[3], encoding
+                )
+
+        if result is None:
+            headers = {}
+            verify_ssl = False
+            if self.project and is_valid_origin(url, project=self.project):
+                verify_ssl = bool(self.project.get_option("sentry:verify_ssl", False))
+                token = self.project.get_option("sentry:token")
+                if token:
+                    token_header = (
+                        self.project.get_option("sentry:token_header") or "X-Sentry-Token"
+                    )
+                    headers[token_header] = token
+
+            with metrics.timer("sourcemaps.fetch"):
+                with sentry_sdk.start_span(op="JavaScriptStacktraceProcessor.fetch_file.http"):
+                    result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
+                with sentry_sdk.start_span(
+                    op="JavaScriptStacktraceProcessor.fetch_file.compress_for_cache"
+                ):
+                    z_body = zlib.compress(result.body)
+                cache.set(
+                    cache_key,
+                    (url, result.headers, z_body, result.status, result.encoding),
+                    get_max_age(result.headers),
+                )
+
+                # since the cache.set above can fail we can end up in a situation
+                # where the file is too large for the cache. In that case we abort
+                # the fetch and cache a failure and lock the domain for future
+                # http fetches.
+                if cache.get(cache_key) is None:
+                    error = {
+                        "type": EventError.TOO_LARGE_FOR_CACHE,
+                        "url": http.expose_url(url),
+                    }
+                    http.lock_domain(url, error=error)
+                    raise http.CannotFetch(error)
+
+        # If we did not get a 200 OK we just raise a cannot fetch here.
+        if result.status != 200:
+            raise http.CannotFetch(
+                {
+                    "type": EventError.FETCH_INVALID_HTTP_CODE,
+                    "value": result.status,
+                    "url": http.expose_url(url),
+                }
             )
 
-            # since the cache.set above can fail we can end up in a situation
-            # where the file is too large for the cache. In that case we abort
-            # the fetch and cache a failure and lock the domain for future
-            # http fetches.
-            if cache.get(cache_key) is None:
+        # Make sure the file we're getting back is bytes. The only
+        # reason it'd not be binary would be from old cached blobs, so
+        # for compatibility with current cached files, let's coerce back to
+        # binary and say utf8 encoding.
+        if not isinstance(result.body, bytes):
+            try:
+                result = http.UrlResult(
+                    result.url,
+                    result.headers,
+                    result.body.encode("utf8"),
+                    result.status,
+                    result.encoding,
+                )
+            except UnicodeEncodeError:
                 error = {
-                    "type": EventError.TOO_LARGE_FOR_CACHE,
+                    "type": EventError.FETCH_INVALID_ENCODING,
+                    "value": "utf8",
                     "url": http.expose_url(url),
                 }
-                http.lock_domain(url, error=error)
                 raise http.CannotFetch(error)
 
-    # If we did not get a 200 OK we just raise a cannot fetch here.
-    if result.status != 200:
-        raise http.CannotFetch(
-            {
-                "type": EventError.FETCH_INVALID_HTTP_CODE,
-                "value": result.status,
-                "url": http.expose_url(url),
-            }
-        )
-
-    # Make sure the file we're getting back is bytes. The only
-    # reason it'd not be binary would be from old cached blobs, so
-    # for compatibility with current cached files, let's coerce back to
-    # binary and say utf8 encoding.
-    if not isinstance(result.body, bytes):
-        try:
-            result = http.UrlResult(
-                result.url,
-                result.headers,
-                result.body.encode("utf8"),
-                result.status,
-                result.encoding,
-            )
-        except UnicodeEncodeError:
-            error = {
-                "type": EventError.FETCH_INVALID_ENCODING,
-                "value": "utf8",
-                "url": http.expose_url(url),
-            }
+        # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
+        # NOTE: possible to have JS files that don't actually end w/ ".js", but
+        # this should catch 99% of cases
+        if urlsplit(url).path.endswith(".js") and is_html_response(result):
+            error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
             raise http.CannotFetch(error)
 
-    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
-    # NOTE: possible to have JS files that don't actually end w/ ".js", but
-    # this should catch 99% of cases
-    if urlsplit(url).path.endswith(".js") and is_html_response(result):
-        error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
-        raise http.CannotFetch(error)
+        return result
 
-    return result
+    def _fetch_sourcemap_by_debug_id(self):
+        pass
 
 
 def is_html_response(result):
@@ -737,13 +757,14 @@ def fetch_sourcemap(url, source=b"", project=None, release=None, dist=None, allo
             op="JavaScriptStacktraceProcessor.fetch_sourcemap.fetch_file"
         ) as span:
             span.set_data("url", url)
-            result = fetch_file(
-                url,
-                project=project,
-                release=release,
-                dist=dist,
-                allow_scraping=allow_scraping,
-            )
+            # result = fetch_file(
+            #     url,
+            #     project=project,
+            #     release=release,
+            #     dist=dist,
+            #     allow_scraping=allow_scraping,
+            # )
+            result = None
         body = result.body
     try:
         with sentry_sdk.start_span(
@@ -889,7 +910,21 @@ def __init__(self, *args, **kwargs):
 
         # cache holding mangled code, original code, and errors associated with
         # each abs_path in the stacktrace
-        self.cache = SourceCache()
+        # self.cache = SourceCache()
+        self.fetch_by_url_cache = {}
+        self.fetch_by_url_result = {}
+        self.fetch_by_url_errors = {}
+
+        self.fetch_by_debug_id_result = {}
+        self.fetch_by_debug_id_cache = {}
+
+        self.debug_id_sourcemap_cache = {}
+        self.url_sourcemap_cache = {}
+
+        self.url_aliases = {}
+        self.minified_source_url_to_sourcemap_url = {}
+
+        self.fetcher = None
 
         # cache holding source URLs, corresponding source map URLs, and source map contents
         self.sourcemaps = SourceMapCache()
@@ -920,6 +955,8 @@ def preprocess_step(self, processing_task):
                 date = timestamp and datetime.fromtimestamp(timestamp).replace(tzinfo=timezone.utc)
                 self.dist = self.release.add_dist(self.data["dist"], date)
 
+        self.fetcher = Fetcher(self.project, self.release, self.dist, self.allow_scraping)
+
         with sentry_sdk.start_span(
             op="JavaScriptStacktraceProcessor.preprocess_step.populate_source_cache"
         ):
@@ -967,7 +1004,7 @@ def process_frame(self, processable_frame, processing_task):
         # `source` is used for pre/post and `context_line` frame expansion.
         # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
         # (or `None` if the token doesnt provide us with the `context_line`) down the road.
-        source = self.get_sourceview(frame["abs_path"])
+        source = self.get_or_fetch_sourceview(frame["abs_path"])
         source_context = None
 
         in_app = None
@@ -1026,7 +1063,7 @@ def process_frame(self, processable_frame, processing_task):
                 if token.context_line is not None:
                     source_context = token.pre_context, token.context_line, token.post_context
                 else:
-                    source = self.get_sourceview(abs_path)
+                    source = self.get_or_fetch_sourceview(abs_path)
 
                 if source is None:
                     errors = cache.get_errors(abs_path)
@@ -1180,7 +1217,7 @@ def expand_frame(self, frame, source_context=None, source=None):
             return False
 
         if source_context is None:
-            source = source or self.get_sourceview(frame["abs_path"])
+            source = source or self.get_or_fetch_sourceview(frame["abs_path"])
             if source is None:
                 logger.debug("No source found for %s", frame["abs_path"])
                 return False
@@ -1198,89 +1235,172 @@ def expand_frame(self, frame, source_context=None, source=None):
 
         return True
 
-    def get_sourceview(self, filename):
-        if filename not in self.cache:
-            self.cache_source(filename)
-        return self.cache.get(filename)
+    # If we return None it means that we weren't able to find the file.
+    def get_or_fetch_sourceview(self, url=None, debug_id=None, source_file_type=None):
+        # We require that artifact bundles with debug ids used embedded source in the source map.
+        if debug_id is not None and source_file_type != "source":
+            return None
 
-    def cache_source(self, filename):
-        """
-        Look for and (if found) cache a source file and its associated source
-        map (if any).
-        """
+        rv = self._get_cached_sourceview(url, debug_id, source_file_type)
+        if rv is not None:
+            return rv
 
-        sourcemaps = self.sourcemaps
-        cache = self.cache
+        return self._fetch_and_cache_sourceview(url, debug_id, source_file_type)
+
+    def _get_cached_sourceview(self, url=None, debug_id=None, source_file_type=None):
+        if debug_id is not None:
+            rv = self.fetch_by_debug_id_cache.get((debug_id, source_file_type))
+            if rv is not None:
+                return rv
 
+        if url is not None:
+            real_url = self.url_aliases.get(url, url)
+            rv = self.fetch_by_url_cache.get(real_url)
+            if rv is not None:
+                return rv
+
+        return None
+
+    def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
         self.fetch_count += 1
 
+        # TODO: this branch should only apply to http fetches.
         if self.fetch_count > self.max_fetches:
-            cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
-            return
+            cache.add_error(url, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
+            return None
 
         # TODO: respect cache-control/max-age headers to some extent
-        logger.debug("Attempting to cache source %r", filename)
-        try:
-            # this both looks in the database and tries to scrape the internet
-            with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.cache_source.fetch_file"
-            ) as span:
-                span.set_data("filename", filename)
-                result = fetch_file(
-                    filename,
-                    project=self.project,
-                    release=self.release,
-                    dist=self.dist,
-                    allow_scraping=self.allow_scraping,
-                )
-        except http.BadSource as exc:
-            if not fetch_error_should_be_silienced(exc.data, filename):
-                cache.add_error(filename, exc.data)
-            # either way, there's no more for us to do here, since we don't have
-            # a valid file to cache
-            return
-        cache.add(filename, result.body, result.encoding)
-        cache.alias(result.url, filename)
+        logger.debug("Attempting to cache source %r", url)
+        # this both looks in the database and tries to scrape the internet
+        with sentry_sdk.start_span(
+            op="JavaScriptStacktraceProcessor.cache_source.fetch_file"
+        ) as span:
+            span.set_data("filename", url)
+            if debug_id is not None:
+                result = self.fetcher.fetch_by_debug_id(debug_id, source_file_type)
+                if result is not None:
+                    # We temporarily store the result directly, in case we want to compute the sourceview on demand
+                    # we can delete the other dictionary.
+                    self.fetch_by_debug_id_result[debug_id, source_file_type] = result
+                    sourceview = SourceView.from_bytes(result.body)
+                    self.fetch_by_debug_id_cache[debug_id, source_file_type] = sourceview
+
+                    return sourceview
 
-        sourcemap_url = discover_sourcemap(result)
-        if not sourcemap_url:
-            return
+            try:
+                result = self.fetcher.fetch_by_url(url)
+            except http.BadSource as exc:
+                if not fetch_error_should_be_silienced(exc.data, url):
+                    self.fetch_by_url_errors[url] = exc.data
+                # either way, there's no more for us to do here, since we don't have
+                # a valid file to cache
+                return None
+            else:
+                # TODO: it should do something but we don't know.
+                if result.url != url:
+                    self.url_aliases[result.url] = url
+
+                # We temporarily store the result directly, in case we want to compute the sourceview on demand
+                # we can delete the other dictionary.
+                self.fetch_by_url_result[url] = result
+                sourceview = SourceView.from_bytes(result.body)
+                self.fetch_by_url_cache[url] = sourceview
+
+                sourcemap_url = discover_sourcemap(result)
+                if sourcemap_url:
+                    self.minified_source_url_to_sourcemap_url[url] = sourcemap_url
+
+                return sourceview
+
+    # get_or_fetch_sourceview("test.js")
+    # get_or_fetch_sourcemap("test.js")
+    #
+    # The url is of the minified file.
+    # If this function returns None, it means that we didn't find the sourcemap, maybe because the file at url isn't
+    # a minified file.
+    def get_or_fetch_sourcemap(self, url=None, debug_id=None):
+        rv = self._get_cached_sourcemap(url, debug_id)
+        if rv is not None:
+            return rv
+
+        return self._fetch_and_cache_sourcemap(url, debug_id)
+
+    def _get_cached_sourcemap(self, url, debug_id):
+        if debug_id is not None:
+            rv = self.debug_id_sourcemap_cache.get(debug_id)
+            if rv is not None:
+                return rv
+
+        if url is not None:
+            real_url = self.url_aliases.get(url, url)
+            sourcemap_url = self.minified_source_url_to_sourcemap_url.get(real_url)
+            rv = self.url_sourcemap_cache.get(sourcemap_url)
+            if rv is not None:
+                return rv
 
-        logger.debug(
-            "Found sourcemap URL %r for minified script %r", sourcemap_url[:256], result.url
-        )
-        sourcemaps.link(filename, sourcemap_url)
-        if sourcemap_url in sourcemaps:
-            return
+        return None
 
-        # pull down sourcemap
-        try:
-            with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
-            ) as span:
-                span.set_data("sourcemap_url", sourcemap_url)
-                sourcemap_view = fetch_sourcemap(
-                    sourcemap_url,
-                    source=result.body,
-                    project=self.project,
-                    release=self.release,
-                    dist=self.dist,
-                    allow_scraping=self.allow_scraping,
+    def _fetch_and_cache_sourcemap(self, url, debug_id):
+        if debug_id is not None:
+
+            minified_result = self.fetch_by_debug_id_result.get(debug_id)
+            if minified_result is None:
+                return None
+
+            # TODO: use enum for source file type.
+            result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
+            if result is not None:
+                self.debug_id_sourcemap_cache = SmCache.from_bytes(
+                    result.body, minified_result.body
                 )
-        except http.BadSource as exc:
-            # we don't perform the same check here as above, because if someone has
-            # uploaded a node_modules file, which has a sourceMappingURL, they
-            # presumably would like it mapped (and would like to know why it's not
-            # working, if that's the case). If they're not looking for it to be
-            # mapped, then they shouldn't be uploading the source file in the
-            # first place.
-            cache.add_error(filename, exc.data)
-            return
+                return result
 
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
-        ) as span:
-            sourcemaps.add(sourcemap_url, sourcemap_view)
+        if url is not None:
+            minified_result = self.fetch_by_url_result.get(url)
+            if minified_result is None:
+                return None
+
+            sourcemap_url = self.minified_source_url_to_sourcemap_url.get(url)
+            if sourcemap_url is None:
+                return None
+
+            # TODO: find a way to get the original url of the request.
+            # logger.debug(
+            #     "Found sourcemap URL %r for minified script %r", sourcemap_url[:256], url
+            # )
+            self.sourcemaps.link(url, sourcemap_url)
+            if sourcemap_url in self.sourcemaps:
+                return
+
+            # pull down sourcemap
+            try:
+                with sentry_sdk.start_span(
+                    op="JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
+                ) as span:
+                    span.set_data("sourcemap_url", sourcemap_url)
+                    sourcemap_view = fetch_sourcemap(
+                        sourcemap_url,
+                        # How do we get the body of the response if we stored the sourceview in the code.
+                        source=minified_result.body,
+                        project=self.project,
+                        release=self.release,
+                        dist=self.dist,
+                        allow_scraping=self.allow_scraping,
+                    )
+            except http.BadSource as exc:
+                # we don't perform the same check here as above, because if someone has
+                # uploaded a node_modules file, which has a sourceMappingURL, they
+                # presumably would like it mapped (and would like to know why it's not
+                # working, if that's the case). If they're not looking for it to be
+                # mapped, then they shouldn't be uploading the source file in the
+                # first place.
+                cache.add_error(url, exc.data)
+                return
+
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
+            ):
+                self.sourcemaps.add(sourcemap_url, sourcemap_view)
 
     def populate_source_cache(self, frames):
         """
@@ -1308,7 +1428,7 @@ def populate_source_cache(self, frames):
                 op="JavaScriptStacktraceProcessor.populate_source_cache.cache_source"
             ) as span:
                 span.set_data("filename", filename)
-                self.cache_source(filename=filename)
+                self._fetch_and_cache_sourceview(filename=filename)
 
     def close(self):
         StacktraceProcessor.close(self)
diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
index 08611de646d90d..d29a0101204f9e 100644
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -1453,7 +1453,7 @@ def test_file_no_source_records_error(self):
         # before caching, no errors
         assert len(processor.cache.get_errors(abs_path)) == 0
 
-        processor.cache_source(abs_path)
+        processor._fetch_and_cache_sourceview(abs_path)
 
         # now we have an error
         assert len(processor.cache.get_errors(abs_path)) == 1
@@ -1474,7 +1474,7 @@ def test_node_modules_file_no_source_no_error(self):
         # not a real url, so won't find file on the internet
         abs_path = "app:///../node_modules/i/dont/exist.js"
 
-        processor.cache_source(abs_path)
+        processor._fetch_and_cache_sourceview(abs_path)
 
         # no errors, even though the file can't have been found
         assert len(processor.cache.get_errors(abs_path)) == 0
@@ -1499,7 +1499,7 @@ def test_node_modules_file_with_source_is_used(self):
         # but since this is just a unit test, we have to set it manually
         processor.release = release
 
-        processor.cache_source(abs_path)
+        processor._fetch_and_cache_sourceview(abs_path)
 
         # file is cached, no errors are generated
         assert processor.cache.get(abs_path)
@@ -1532,7 +1532,7 @@ def test_node_modules_file_with_source_but_no_map_records_error(self, mock_disco
         # before caching, no errors
         assert len(processor.cache.get_errors(abs_path)) == 0
 
-        processor.cache_source(abs_path)
+        processor._fetch_and_cache_sourceview(abs_path)
 
         # now we have an error
         assert len(processor.cache.get_errors(abs_path)) == 1
diff --git a/tests/sentry/lang/javascript/test_sourcemaps.py b/tests/sentry/lang/javascript/test_sourcemaps.py
index a5b343ad8a0f8a..2d167db82421b3 100644
--- a/tests/sentry/lang/javascript/test_sourcemaps.py
+++ b/tests/sentry/lang/javascript/test_sourcemaps.py
@@ -102,16 +102,16 @@ def test_no_inline(self):
         # basic sourcemap fixture has no inlined sources, so expect None
         smap_view = SourceMapView.from_json_bytes(sourcemap)
 
-        source = smap_view.get_sourceview(0)
+        source = smap_view.get_or_fetch_sourceview(0)
         assert source is None
 
     def test_indexed_inline(self):
         smap_view = SourceMapView.from_json_bytes(indexed_sourcemap_example)
 
-        assert smap_view.get_sourceview(0).get_source() == (
+        assert smap_view.get_or_fetch_sourceview(0).get_source() == (
             " ONE.foo = function (bar) {\n" + "   return baz(bar);\n" + " };"
         )
-        assert smap_view.get_sourceview(1).get_source() == (
+        assert smap_view.get_or_fetch_sourceview(1).get_source() == (
             " TWO.inc = function (n) {\n" + "   return n + 1;\n" + " };"
         )
 

From 2312970b7bfb37cbd12d2c0436deee1f19606885 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 13:29:26 +0100
Subject: [PATCH 08/28] Improve

---
 src/sentry/lang/javascript/processor.py | 157 +++++++++++-------------
 1 file changed, 72 insertions(+), 85 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 96a9be4c8451a7..852ac7028c54d1 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -742,42 +742,6 @@ def get_max_age(headers):
     return min(max_age, CACHE_CONTROL_MAX)
 
 
-def fetch_sourcemap(url, source=b"", project=None, release=None, dist=None, allow_scraping=True):
-    if is_data_uri(url):
-        try:
-            body = base64.b64decode(
-                force_bytes(url[BASE64_PREAMBLE_LENGTH:])
-                + (b"=" * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4))
-            )
-        except TypeError as e:
-            raise UnparseableSourcemap({"url": "<base64>", "reason": str(e)})
-    else:
-        # look in the database and, if not found, optionally try to scrape the web
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.fetch_sourcemap.fetch_file"
-        ) as span:
-            span.set_data("url", url)
-            # result = fetch_file(
-            #     url,
-            #     project=project,
-            #     release=release,
-            #     dist=dist,
-            #     allow_scraping=allow_scraping,
-            # )
-            result = None
-        body = result.body
-    try:
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
-        ):
-            return SmCache.from_bytes(source, body)
-
-    except Exception as exc:
-        # This is in debug because the product shows an error already.
-        logger.debug(str(exc), exc_info=True)
-        raise UnparseableSourcemap({"url": http.expose_url(url)})
-
-
 def is_data_uri(url):
     return url[:BASE64_PREAMBLE_LENGTH] == BASE64_SOURCEMAP_PREAMBLE
 
@@ -1342,65 +1306,88 @@ def _get_cached_sourcemap(self, url, debug_id):
 
     def _fetch_and_cache_sourcemap(self, url, debug_id):
         if debug_id is not None:
+            sourcemap_view = self._handle_debug_id_sourcemap_lookup(debug_id)
+            if sourcemap_view is not None:
+                return sourcemap_view
 
-            minified_result = self.fetch_by_debug_id_result.get(debug_id)
-            if minified_result is None:
-                return None
+        if url is not None:
+            sourcemap_view = self._handle_url_sourcemap_lookup(url)
+            if sourcemap_view is not None:
+                return sourcemap_view
 
-            # TODO: use enum for source file type.
-            result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
-            if result is not None:
-                self.debug_id_sourcemap_cache = SmCache.from_bytes(
-                    result.body, minified_result.body
-                )
-                return result
+        return None
 
-        if url is not None:
-            minified_result = self.fetch_by_url_result.get(url)
-            if minified_result is None:
-                return None
+    def _handle_debug_id_sourcemap_lookup(self, debug_id):
+        minified_result = self.fetch_by_debug_id_result.get(debug_id)
+        if minified_result is None:
+            return None
 
-            sourcemap_url = self.minified_source_url_to_sourcemap_url.get(url)
-            if sourcemap_url is None:
-                return None
+        # TODO: use enum for source file type.
+        result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
+        if result is not None:
+            sourcemap_view = SmCache.from_bytes(minified_result.body, result.body)
+            self.debug_id_sourcemap_cache[debug_id] = sourcemap_view
+            return sourcemap_view
+
+    def _handle_url_sourcemap_lookup(self, url):
+        minified_result = self.fetch_by_url_result.get(url)
+        if minified_result is None:
+            return None
+
+        sourcemap_url = self.minified_source_url_to_sourcemap_url.get(url)
+        if sourcemap_url is None:
+            return None
 
-            # TODO: find a way to get the original url of the request.
-            # logger.debug(
-            #     "Found sourcemap URL %r for minified script %r", sourcemap_url[:256], url
-            # )
-            self.sourcemaps.link(url, sourcemap_url)
-            if sourcemap_url in self.sourcemaps:
-                return
+        try:
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
+            ) as span:
+                span.set_data("sourcemap_url", sourcemap_url)
+                sourcemap_view = self._fetch_sourcemap(sourcemap_url, source=minified_result.body)
+        except http.BadSource as exc:
+            # we don't perform the same check here as above, because if someone has
+            # uploaded a node_modules file, which has a sourceMappingURL, they
+            # presumably would like it mapped (and would like to know why it's not
+            # working, if that's the case). If they're not looking for it to be
+            # mapped, then they shouldn't be uploading the source file in the
+            # first place.
+            self.fetch_by_url_errors[url] = exc.data
+            return None
+        else:
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
+            ):
+                self.url_sourcemap_cache[sourcemap_url] = sourcemap_view
+                return sourcemap_view
 
-            # pull down sourcemap
+    def _fetch_sourcemap(self, url, source=b""):
+        if is_data_uri(url):
             try:
-                with sentry_sdk.start_span(
-                    op="JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
-                ) as span:
-                    span.set_data("sourcemap_url", sourcemap_url)
-                    sourcemap_view = fetch_sourcemap(
-                        sourcemap_url,
-                        # How do we get the body of the response if we stored the sourceview in the code.
-                        source=minified_result.body,
-                        project=self.project,
-                        release=self.release,
-                        dist=self.dist,
-                        allow_scraping=self.allow_scraping,
-                    )
-            except http.BadSource as exc:
-                # we don't perform the same check here as above, because if someone has
-                # uploaded a node_modules file, which has a sourceMappingURL, they
-                # presumably would like it mapped (and would like to know why it's not
-                # working, if that's the case). If they're not looking for it to be
-                # mapped, then they shouldn't be uploading the source file in the
-                # first place.
-                cache.add_error(url, exc.data)
-                return
+                body = base64.b64decode(
+                    force_bytes(url[BASE64_PREAMBLE_LENGTH:])
+                    + (b"=" * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4))
+                )
+            except TypeError as e:
+                raise UnparseableSourcemap({"url": "<base64>", "reason": str(e)})
+        else:
+            # look in the database and, if not found, optionally try to scrape the web
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap.fetch_file"
+            ) as span:
+                span.set_data("url", url)
+                result = self.fetcher.fetch_by_url(url)
 
+            body = result.body
+        try:
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
             ):
-                self.sourcemaps.add(sourcemap_url, sourcemap_view)
+                return SmCache.from_bytes(source, body)
+
+        except Exception as exc:
+            # This is in debug because the product shows an error already.
+            logger.debug(str(exc), exc_info=True)
+            raise UnparseableSourcemap({"url": http.expose_url(url)})
 
     def populate_source_cache(self, frames):
         """

From 43cd8dd487233b35e491efc8d636af26fb5f0192 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 13:53:20 +0100
Subject: [PATCH 09/28] Improve

---
 src/sentry/lang/javascript/processor.py       | 76 ++++++++++---------
 .../lang/javascript/test_plugin.py            | 18 ++---
 2 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 852ac7028c54d1..6b3c5bb730e991 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -610,6 +610,7 @@ def fetch_by_url(self, url):
             with sentry_sdk.start_span(
                 op="JavaScriptStacktraceProcessor.fetch_file.fetch_release_artifact"
             ):
+                # TODO: move this method internally in the class.
                 result = fetch_release_artifact(url, self.release, self.dist)
         else:
             result = None
@@ -875,15 +876,13 @@ def __init__(self, *args, **kwargs):
         # cache holding mangled code, original code, and errors associated with
         # each abs_path in the stacktrace
         # self.cache = SourceCache()
-        self.fetch_by_url_cache = {}
         self.fetch_by_url_result = {}
         self.fetch_by_url_errors = {}
 
         self.fetch_by_debug_id_result = {}
-        self.fetch_by_debug_id_cache = {}
 
         self.debug_id_sourcemap_cache = {}
-        self.url_sourcemap_cache = {}
+        self.sourcemap_url_sourcemap_cache = {}
 
         self.url_aliases = {}
         self.minified_source_url_to_sourcemap_url = {}
@@ -924,7 +923,7 @@ def preprocess_step(self, processing_task):
         with sentry_sdk.start_span(
             op="JavaScriptStacktraceProcessor.preprocess_step.populate_source_cache"
         ):
-            self.populate_source_cache(frames)
+            self.populate_sources_cache(frames)
 
         return True
 
@@ -944,8 +943,6 @@ def process_frame(self, processable_frame, processing_task):
         frame = processable_frame.frame
         token = None
 
-        cache = self.cache
-        sourcemaps = self.sourcemaps
         all_errors = []
         sourcemap_applied = False
 
@@ -968,21 +965,25 @@ def process_frame(self, processable_frame, processing_task):
         # `source` is used for pre/post and `context_line` frame expansion.
         # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
         # (or `None` if the token doesnt provide us with the `context_line`) down the road.
-        source = self.get_or_fetch_sourceview(frame["abs_path"])
+        # TODO: add support for debug ids.
+        # We fetch the sourcemap url for the given source.
+        sourceview = self.get_or_fetch_sourceview(url=frame["abs_path"])
+        sourcemap_view = self.get_or_fetch_sourcemap(url=frame["abs_path"])
+
+        sourcemap_url = self.minified_source_url_to_sourcemap_url.get(frame["abs_path"])
+        self.sourcemaps_touched.add(sourcemap_url)
+
         source_context = None
 
         in_app = None
         new_frame = dict(frame)
         raw_frame = dict(frame)
 
-        sourcemap_url, sourcemap_cache = sourcemaps.get_link(frame["abs_path"])
-        self.sourcemaps_touched.add(sourcemap_url)
-
-        if sourcemap_cache and frame.get("colno") is None:
+        if sourcemap_view and frame.get("colno") is None:
             all_errors.append(
                 {"type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"])}
             )
-        elif sourcemap_cache:
+        elif sourcemap_view:
             if is_data_uri(sourcemap_url):
                 sourcemap_label = frame["abs_path"]
             else:
@@ -993,7 +994,7 @@ def process_frame(self, processable_frame, processing_task):
             try:
                 # Errors are 1-indexed in the frames.
                 assert frame["lineno"] > 0, "line numbers are 1-indexed"
-                token = sourcemap_cache.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
+                token = sourcemap_view.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
             except Exception:
                 token = None
                 all_errors.append(
@@ -1027,9 +1028,9 @@ def process_frame(self, processable_frame, processing_task):
                 if token.context_line is not None:
                     source_context = token.pre_context, token.context_line, token.post_context
                 else:
-                    source = self.get_or_fetch_sourceview(abs_path)
+                    sourceview = self.get_or_fetch_sourceview(url=abs_path)
 
-                if source is None:
+                if sourceview is None:
                     errors = cache.get_errors(abs_path)
                     if errors:
                         all_errors.extend(errors)
@@ -1102,11 +1103,13 @@ def process_frame(self, processable_frame, processing_task):
                 new_frame.get("data") or {}, sourcemap=http.expose_url(sourcemap_url)
             )
 
-        changed_frame = self.expand_frame(new_frame, source_context=source_context, source=source)
+        changed_frame = self.expand_frame(
+            new_frame, source_context=source_context, source=sourceview
+        )
 
         # If we did not manage to match but we do have a line or column
         # we want to report an error here.
-        if not new_frame.get("context_line") and source and new_frame.get("colno") is not None:
+        if not new_frame.get("context_line") and sourceview and new_frame.get("colno") is not None:
             all_errors.append(
                 {
                     "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
@@ -1142,6 +1145,7 @@ def process_frame(self, processable_frame, processing_task):
                     self.tag_suspected_console_errors(new_frames)
             except Exception as exc:
                 logger.exception("Failed to tag suspected console errors", exc_info=exc)
+
             return new_frames, raw_frames, all_errors
 
     def tag_suspected_console_errors(self, new_frames):
@@ -1181,7 +1185,8 @@ def expand_frame(self, frame, source_context=None, source=None):
             return False
 
         if source_context is None:
-            source = source or self.get_or_fetch_sourceview(frame["abs_path"])
+            # TODO: add lookup by debug id in case abs_path has it.
+            source = source or self.get_or_fetch_sourceview(url=frame["abs_path"])
             if source is None:
                 logger.debug("No source found for %s", frame["abs_path"])
                 return False
@@ -1213,15 +1218,17 @@ def get_or_fetch_sourceview(self, url=None, debug_id=None, source_file_type=None
 
     def _get_cached_sourceview(self, url=None, debug_id=None, source_file_type=None):
         if debug_id is not None:
-            rv = self.fetch_by_debug_id_cache.get((debug_id, source_file_type))
-            if rv is not None:
-                return rv
+            source_result = self.fetch_by_debug_id_result.get((debug_id, source_file_type))
+            if source_result is not None:
+                sourceview = SourceView.from_bytes(source_result.body)
+                return sourceview
 
         if url is not None:
             real_url = self.url_aliases.get(url, url)
-            rv = self.fetch_by_url_cache.get(real_url)
-            if rv is not None:
-                return rv
+            source_result = self.fetch_by_url_result.get(real_url)
+            if source_result is not None:
+                sourceview = SourceView.from_bytes(source_result.body)
+                return sourceview
 
         return None
 
@@ -1247,8 +1254,6 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                     # we can delete the other dictionary.
                     self.fetch_by_debug_id_result[debug_id, source_file_type] = result
                     sourceview = SourceView.from_bytes(result.body)
-                    self.fetch_by_debug_id_cache[debug_id, source_file_type] = sourceview
-
                     return sourceview
 
             try:
@@ -1268,17 +1273,15 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                 # we can delete the other dictionary.
                 self.fetch_by_url_result[url] = result
                 sourceview = SourceView.from_bytes(result.body)
-                self.fetch_by_url_cache[url] = sourceview
 
+                # TODO: the discovery of sourcemap urls here is very opaque, consider of putting it closer to the
+                #  function's callsite.
                 sourcemap_url = discover_sourcemap(result)
                 if sourcemap_url:
                     self.minified_source_url_to_sourcemap_url[url] = sourcemap_url
 
                 return sourceview
 
-    # get_or_fetch_sourceview("test.js")
-    # get_or_fetch_sourcemap("test.js")
-    #
     # The url is of the minified file.
     # If this function returns None, it means that we didn't find the sourcemap, maybe because the file at url isn't
     # a minified file.
@@ -1298,7 +1301,7 @@ def _get_cached_sourcemap(self, url, debug_id):
         if url is not None:
             real_url = self.url_aliases.get(url, url)
             sourcemap_url = self.minified_source_url_to_sourcemap_url.get(real_url)
-            rv = self.url_sourcemap_cache.get(sourcemap_url)
+            rv = self.sourcemap_url_sourcemap_cache.get(sourcemap_url)
             if rv is not None:
                 return rv
 
@@ -1357,7 +1360,7 @@ def _handle_url_sourcemap_lookup(self, url):
             with sentry_sdk.start_span(
                 op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
             ):
-                self.url_sourcemap_cache[sourcemap_url] = sourcemap_view
+                self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_view
                 return sourcemap_view
 
     def _fetch_sourcemap(self, url, source=b""):
@@ -1389,7 +1392,7 @@ def _fetch_sourcemap(self, url, source=b""):
             logger.debug(str(exc), exc_info=True)
             raise UnparseableSourcemap({"url": http.expose_url(url)})
 
-    def populate_source_cache(self, frames):
+    def populate_sources_cache(self, frames):
         """
         Fetch all sources that we know are required (being referenced directly
         in frames).
@@ -1415,7 +1418,12 @@ def populate_source_cache(self, frames):
                 op="JavaScriptStacktraceProcessor.populate_source_cache.cache_source"
             ) as span:
                 span.set_data("filename", filename)
-                self._fetch_and_cache_sourceview(filename=filename)
+                # TODO: we want to implement the construction of the debug
+                # We first want to fetch the sourceview of the file.
+                self.get_or_fetch_sourceview(url=filename)
+                # We then want to fetch the sourcemap of the file, but this call can fail in case filename points to a
+                # non minified file.
+                self.get_or_fetch_sourcemap(url=filename)
 
     def close(self):
         StacktraceProcessor.close(self)
diff --git a/tests/relay_integration/lang/javascript/test_plugin.py b/tests/relay_integration/lang/javascript/test_plugin.py
index 76488093a8806e..08b4991cf8c54c 100644
--- a/tests/relay_integration/lang/javascript/test_plugin.py
+++ b/tests/relay_integration/lang/javascript/test_plugin.py
@@ -177,9 +177,9 @@ def test_source_expansion(self, mock_fetch_file):
         # no source map means no raw_stacktrace
         assert exception.values[0].raw_stacktrace is None
 
-    @patch("sentry.lang.javascript.processor.fetch_file")
+    @patch("sentry.lang.javascript.processor.Fetcher.fetch_by_url")
     @patch("sentry.lang.javascript.processor.discover_sourcemap")
-    def test_inlined_sources(self, mock_discover_sourcemap, mock_fetch_file):
+    def test_inlined_sources(self, mock_discover_sourcemap, mock_fetch_by_url):
         data = {
             "timestamp": self.min_ago,
             "message": "hello",
@@ -205,19 +205,13 @@ def test_inlined_sources(self, mock_discover_sourcemap, mock_fetch_file):
 
         mock_discover_sourcemap.return_value = BASE64_SOURCEMAP
 
-        mock_fetch_file.return_value.url = "http://example.com/test.min.js"
-        mock_fetch_file.return_value.body = force_bytes("\n".join("<generated source>"))
-        mock_fetch_file.return_value.encoding = None
+        mock_fetch_by_url.return_value.url = "http://example.com/test.min.js"
+        mock_fetch_by_url.return_value.body = force_bytes("\n".join("<generated source>"))
+        mock_fetch_by_url.return_value.encoding = None
 
         event = self.post_and_retrieve_event(data)
 
-        mock_fetch_file.assert_called_once_with(
-            "http://example.com/test.min.js",
-            project=self.project,
-            release=None,
-            dist=None,
-            allow_scraping=True,
-        )
+        mock_fetch_by_url.assert_called_once_with("http://example.com/test.min.js")
 
         exception = event.interfaces["exception"]
         frame_list = exception.values[0].stacktrace.frames

From 595ddd9d96e0062d39a46043db9ae707984fc986 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 14:02:47 +0100
Subject: [PATCH 10/28] Improve

---
 src/sentry/lang/javascript/processor.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 6b3c5bb730e991..d37f05b9fc9efd 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -873,25 +873,24 @@ def __init__(self, *args, **kwargs):
         self.fetch_count = 0
         self.sourcemaps_touched = set()
 
-        # cache holding mangled code, original code, and errors associated with
-        # each abs_path in the stacktrace
-        # self.cache = SourceCache()
+        # Cache holding the results of the fetching by url.
         self.fetch_by_url_result = {}
         self.fetch_by_url_errors = {}
 
+        # Cache holding the results of the fetching by debug id.
         self.fetch_by_debug_id_result = {}
 
+        # Cache holding the sourcemaps views indexed by either debug id or sourcemap url.
         self.debug_id_sourcemap_cache = {}
         self.sourcemap_url_sourcemap_cache = {}
 
+        # Additional caches for mapping purposes.
         self.url_aliases = {}
         self.minified_source_url_to_sourcemap_url = {}
 
+        # Component responsible for fetching the files.
         self.fetcher = None
 
-        # cache holding source URLs, corresponding source map URLs, and source map contents
-        self.sourcemaps = SourceMapCache()
-
         self.release = None
         self.dist = None
 
@@ -958,8 +957,8 @@ def process_frame(self, processable_frame, processing_task):
         ):
             return
 
-        errors = cache.get_errors(frame["abs_path"])
-        if errors:
+        errors = self.fetch_by_url_errors.get(frame["abs_path"])
+        if errors is not None:
             all_errors.extend(errors)
 
         # `source` is used for pre/post and `context_line` frame expansion.
@@ -1031,7 +1030,7 @@ def process_frame(self, processable_frame, processing_task):
                     sourceview = self.get_or_fetch_sourceview(url=abs_path)
 
                 if sourceview is None:
-                    errors = cache.get_errors(abs_path)
+                    errors = self.fetch_by_url_errors[abs_path]
                     if errors:
                         all_errors.extend(errors)
                     else:
@@ -1260,7 +1259,8 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                 result = self.fetcher.fetch_by_url(url)
             except http.BadSource as exc:
                 if not fetch_error_should_be_silienced(exc.data, url):
-                    self.fetch_by_url_errors[url] = exc.data
+                    self.fetch_by_url_errors.setdefault(url, [])
+                    self.fetch_by_url_errors[url].append(exc.data)
                 # either way, there's no more for us to do here, since we don't have
                 # a valid file to cache
                 return None
@@ -1354,7 +1354,8 @@ def _handle_url_sourcemap_lookup(self, url):
             # working, if that's the case). If they're not looking for it to be
             # mapped, then they shouldn't be uploading the source file in the
             # first place.
-            self.fetch_by_url_errors[url] = exc.data
+            self.fetch_by_url_errors.setdefault(url, [])
+            self.fetch_by_url_errors[url].append(exc.data)
             return None
         else:
             with sentry_sdk.start_span(

From 6a6ff675989731667756b2ad575bfdc04a1dec2a Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 14:07:09 +0100
Subject: [PATCH 11/28] Improve

---
 .../lang/javascript/test_plugin.py             | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/tests/relay_integration/lang/javascript/test_plugin.py b/tests/relay_integration/lang/javascript/test_plugin.py
index 08b4991cf8c54c..6dc0e8d3c1f17e 100644
--- a/tests/relay_integration/lang/javascript/test_plugin.py
+++ b/tests/relay_integration/lang/javascript/test_plugin.py
@@ -116,8 +116,8 @@ def test_adds_contexts_with_ps4_device(self):
             "brand": "Sony",
         }
 
-    @patch("sentry.lang.javascript.processor.fetch_file")
-    def test_source_expansion(self, mock_fetch_file):
+    @patch("sentry.lang.javascript.processor.Fetcher.fetch_by_url")
+    def test_source_expansion(self, mock_fetch_by_url):
         data = {
             "timestamp": self.min_ago,
             "message": "hello",
@@ -147,19 +147,13 @@ def test_source_expansion(self, mock_fetch_file):
             },
         }
 
-        mock_fetch_file.return_value.body = force_bytes("\n".join("hello world"))
-        mock_fetch_file.return_value.encoding = None
-        mock_fetch_file.return_value.headers = {}
+        mock_fetch_by_url.return_value.body = force_bytes("\n".join("hello world"))
+        mock_fetch_by_url.return_value.encoding = None
+        mock_fetch_by_url.return_value.headers = {}
 
         event = self.post_and_retrieve_event(data)
 
-        mock_fetch_file.assert_called_once_with(
-            "http://example.com/foo.js",
-            project=self.project,
-            release=None,
-            dist=None,
-            allow_scraping=True,
-        )
+        mock_fetch_by_url.assert_called_once_with("http://example.com/foo.js")
 
         exception = event.interfaces["exception"]
         frame_list = exception.values[0].stacktrace.frames

From 5a2db201021c0ad3ba11e48f7933d218565fec52 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 14:37:49 +0100
Subject: [PATCH 12/28] Improve

---
 src/sentry/lang/javascript/processor.py | 374 ++++++++++++------------
 1 file changed, 195 insertions(+), 179 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index d37f05b9fc9efd..a2a88fd5076910 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -39,8 +39,6 @@
 from sentry.utils.safe import get_path
 from sentry.utils.urls import non_standard_url_join
 
-from .cache import SourceMapCache
-
 __all__ = ["JavaScriptStacktraceProcessor"]
 
 
@@ -577,16 +575,139 @@ def fetch_release_artifact(url, release, dist):
     return result
 
 
+def is_html_response(result):
+    # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
+    # This cannot parse as valid JS/JSON.
+    # NOTE: not relying on Content-Type header because apps often don't set this correctly
+    # Discard leading whitespace (often found before doctype)
+    body_start = result.body[:20].lstrip()
+
+    if body_start[:1] == b"<":
+        return True
+    return False
+
+
+def get_max_age(headers):
+    cache_control = headers.get("cache-control")
+    max_age = CACHE_CONTROL_MIN
+
+    if cache_control:
+        match = CACHE_CONTROL_RE.search(cache_control)
+        if match:
+            max_age = max(CACHE_CONTROL_MIN, int(match.group(1)))
+    return min(max_age, CACHE_CONTROL_MAX)
+
+
+def is_data_uri(url):
+    return url[:BASE64_PREAMBLE_LENGTH] == BASE64_SOURCEMAP_PREAMBLE
+
+
+def generate_module(src):
+    """
+    Converts a url into a made-up module name by doing the following:
+     * Extract just the path name ignoring querystrings
+     * Trimming off the initial /
+     * Trimming off the file extension
+     * Removes off useless folder prefixes
+
+    e.g. http://google.com/js/v1.0/foo/bar/baz.js -> foo/bar/baz
+    """
+    if not src:
+        return UNKNOWN_MODULE
+
+    filename, ext = splitext(urlsplit(src).path)
+    if filename.endswith(".min"):
+        filename = filename[:-4]
+
+    # TODO(dcramer): replace CLEAN_MODULE_RE with tokenizer completely
+    tokens = filename.split("/")
+    for idx, token in enumerate(tokens):
+        # a SHA
+        if VERSION_RE.match(token):
+            return "/".join(tokens[idx + 1 :])
+
+    return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
+
+
+def is_valid_frame(frame):
+    return frame is not None and frame.get("lineno") is not None
+
+
+def get_function_for_token(frame, token, previous_frame=None):
+    """
+    Get function name for a given frame based on the token resolved by symbolic.
+    It tries following paths in order:
+    - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list),
+    - return mapped name of the caller (previous frame) token if it had,
+    - return token function name, including filtered values if it mapped to anything in the first place,
+    - return current frames function name as a fallback
+    """
+
+    frame_function_name = frame.get("function")
+    token_function_name = token.function_name
+
+    # Try to use the function name we got from sourcemap-cache, filtering useless names.
+    if token_function_name not in USELESS_FN_NAMES:
+        return token_function_name
+
+    # If not found, ask the callsite (previous token) for function name if possible.
+    if previous_frame is not None:
+        last_token = previous_frame.data.get("token")
+        if last_token is not None and last_token.name not in ("", None):
+            return last_token.name
+
+    # If there was no minified name at all, return even useless, filtered one from the original token.
+    if not frame_function_name:
+        return token_function_name
+
+    # Otherwise fallback to the old, minified name.
+    return frame_function_name
+
+
+def fold_function_name(function_name):
+    """
+    Fold multiple consecutive occurences of the same property name into a single group, excluding the last component.
+
+    foo | foo
+    foo.foo | foo.foo
+    foo.foo.foo | {foo#2}.foo
+    bar.foo.foo | bar.foo.foo
+    bar.foo.foo.foo | bar.{foo#2}.foo
+    bar.foo.foo.onError | bar.{foo#2}.onError
+    bar.bar.bar.foo.foo.onError | {bar#3}.{foo#2}.onError
+    bar.foo.foo.bar.bar.onError | bar.{foo#2}.{bar#2}.onError
+    """
+
+    parts = function_name.split(".")
+
+    if len(parts) == 1:
+        return function_name
+
+    tail = parts.pop()
+    grouped = [list(g) for _, g in groupby(parts)]
+
+    def format_groups(p):
+        if len(p) == 1:
+            return p[0]
+        return f"\u007b{p[0]}#{len(p)}\u007d"
+
+    return f'{".".join(map(format_groups, grouped))}.{tail}'
+
+
 class Fetcher:
+    """
+    Components responsible for fetching files either by url or debug_id that aims at hiding the complexity involved
+    in file fetching.
+    """
+
     def __init__(self, project, release, dist, allow_scraping):
         self.project = project
         self.release = release
         self.dist = dist
         self.allow_scraping = allow_scraping
 
-        self.sourcemap_cache = SourceMapCache()
-
     def fetch_by_debug_id(self, debug_id, source_file_type):
+        # TODO: implement debug id lookup.
         pass
 
     def fetch_by_url(self, url):
@@ -717,128 +838,10 @@ def fetch_by_url(self, url):
         return result
 
     def _fetch_sourcemap_by_debug_id(self):
+        # TODO: implement debug id lookup.
         pass
 
 
-def is_html_response(result):
-    # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
-    # This cannot parse as valid JS/JSON.
-    # NOTE: not relying on Content-Type header because apps often don't set this correctly
-    # Discard leading whitespace (often found before doctype)
-    body_start = result.body[:20].lstrip()
-
-    if body_start[:1] == b"<":
-        return True
-    return False
-
-
-def get_max_age(headers):
-    cache_control = headers.get("cache-control")
-    max_age = CACHE_CONTROL_MIN
-
-    if cache_control:
-        match = CACHE_CONTROL_RE.search(cache_control)
-        if match:
-            max_age = max(CACHE_CONTROL_MIN, int(match.group(1)))
-    return min(max_age, CACHE_CONTROL_MAX)
-
-
-def is_data_uri(url):
-    return url[:BASE64_PREAMBLE_LENGTH] == BASE64_SOURCEMAP_PREAMBLE
-
-
-def generate_module(src):
-    """
-    Converts a url into a made-up module name by doing the following:
-     * Extract just the path name ignoring querystrings
-     * Trimming off the initial /
-     * Trimming off the file extension
-     * Removes off useless folder prefixes
-
-    e.g. http://google.com/js/v1.0/foo/bar/baz.js -> foo/bar/baz
-    """
-    if not src:
-        return UNKNOWN_MODULE
-
-    filename, ext = splitext(urlsplit(src).path)
-    if filename.endswith(".min"):
-        filename = filename[:-4]
-
-    # TODO(dcramer): replace CLEAN_MODULE_RE with tokenizer completely
-    tokens = filename.split("/")
-    for idx, token in enumerate(tokens):
-        # a SHA
-        if VERSION_RE.match(token):
-            return "/".join(tokens[idx + 1 :])
-
-    return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
-
-
-def is_valid_frame(frame):
-    return frame is not None and frame.get("lineno") is not None
-
-
-def get_function_for_token(frame, token, previous_frame=None):
-    """
-    Get function name for a given frame based on the token resolved by symbolic.
-    It tries following paths in order:
-    - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list),
-    - return mapped name of the caller (previous frame) token if it had,
-    - return token function name, including filtered values if it mapped to anything in the first place,
-    - return current frames function name as a fallback
-    """
-
-    frame_function_name = frame.get("function")
-    token_function_name = token.function_name
-
-    # Try to use the function name we got from sourcemap-cache, filtering useless names.
-    if token_function_name not in USELESS_FN_NAMES:
-        return token_function_name
-
-    # If not found, ask the callsite (previous token) for function name if possible.
-    if previous_frame is not None:
-        last_token = previous_frame.data.get("token")
-        if last_token is not None and last_token.name not in ("", None):
-            return last_token.name
-
-    # If there was no minified name at all, return even useless, filtered one from the original token.
-    if not frame_function_name:
-        return token_function_name
-
-    # Otherwise fallback to the old, minified name.
-    return frame_function_name
-
-
-def fold_function_name(function_name):
-    """
-    Fold multiple consecutive occurences of the same property name into a single group, excluding the last component.
-
-    foo | foo
-    foo.foo | foo.foo
-    foo.foo.foo | {foo#2}.foo
-    bar.foo.foo | bar.foo.foo
-    bar.foo.foo.foo | bar.{foo#2}.foo
-    bar.foo.foo.onError | bar.{foo#2}.onError
-    bar.bar.bar.foo.foo.onError | {bar#3}.{foo#2}.onError
-    bar.foo.foo.bar.bar.onError | bar.{foo#2}.{bar#2}.onError
-    """
-
-    parts = function_name.split(".")
-
-    if len(parts) == 1:
-        return function_name
-
-    tail = parts.pop()
-    grouped = [list(g) for _, g in groupby(parts)]
-
-    def format_groups(p):
-        if len(p) == 1:
-            return p[0]
-        return f"\u007b{p[0]}#{len(p)}\u007d"
-
-    return f'{".".join(map(format_groups, grouped))}.{tail}'
-
-
 class JavaScriptStacktraceProcessor(StacktraceProcessor):
     """
     Modern SourceMap processor using symbolic-sourcemapcache.
@@ -884,8 +887,11 @@ def __init__(self, *args, **kwargs):
         self.debug_id_sourcemap_cache = {}
         self.sourcemap_url_sourcemap_cache = {}
 
-        # Additional caches for mapping purposes.
+        # Contains a mapping between the 'result.url' of the request and the file url from 'abs_path'.
         self.url_aliases = {}
+
+        # Contains a mapping between the minified source url ('abs_path' of the frame) and the sourcemap url
+        # which is discovered by looking at the minified source.
         self.minified_source_url_to_sourcemap_url = {}
 
         # Component responsible for fetching the files.
@@ -961,14 +967,14 @@ def process_frame(self, processable_frame, processing_task):
         if errors is not None:
             all_errors.extend(errors)
 
-        # `source` is used for pre/post and `context_line` frame expansion.
-        # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
+        # `sourceview` is used for pre/post and `context_line` frame expansion.
+        # Here it's pointing to the minified source, however the variable can be shadowed with the original sourceview
         # (or `None` if the token doesnt provide us with the `context_line`) down the road.
         # TODO: add support for debug ids.
-        # We fetch the sourcemap url for the given source.
         sourceview = self.get_or_fetch_sourceview(url=frame["abs_path"])
-        sourcemap_view = self.get_or_fetch_sourcemap(url=frame["abs_path"])
+        sourcemap_view = self.get_or_fetch_sourcemap_view(url=frame["abs_path"])
 
+        # We get the url that we discovered for the sourcemap which we use down in the processing pipeline.
         sourcemap_url = self.minified_source_url_to_sourcemap_url.get(frame["abs_path"])
         self.sourcemaps_touched.add(sourcemap_url)
 
@@ -1203,9 +1209,14 @@ def expand_frame(self, frame, source_context=None, source=None):
 
         return True
 
-    # If we return None it means that we weren't able to find the file.
     def get_or_fetch_sourceview(self, url=None, debug_id=None, source_file_type=None):
+        """
+        Gets from the cache or fetches the file at 'url' or 'debug_id'.
+
+        Returns None when a file with either 'url' or 'debug_id' cannot be found.
+        """
         # We require that artifact bundles with debug ids used embedded source in the source map.
+        # TODO: use enum for source file type when available.
         if debug_id is not None and source_file_type != "source":
             return None
 
@@ -1239,14 +1250,12 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
             cache.add_error(url, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
             return None
 
-        # TODO: respect cache-control/max-age headers to some extent
-        logger.debug("Attempting to cache source %r", url)
-        # this both looks in the database and tries to scrape the internet
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.cache_source.fetch_file"
-        ) as span:
-            span.set_data("filename", url)
-            if debug_id is not None:
+        if debug_id is not None:
+            logger.debug("Attempting to cache source with debug id %r", debug_id)
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor._fetch_and_cache_sourceview.fetch_by_debug_id"
+            ) as span:
+                span.set_data("debug_id", debug_id)
                 result = self.fetcher.fetch_by_debug_id(debug_id, source_file_type)
                 if result is not None:
                     # We temporarily store the result directly, in case we want to compute the sourceview on demand
@@ -1255,44 +1264,51 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                     sourceview = SourceView.from_bytes(result.body)
                     return sourceview
 
-            try:
+        try:
+            logger.debug("Attempting to cache source with url %r", url)
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor._fetch_and_cache_sourceview.fetch_by_url"
+            ) as span:
+                span.set_data("url", url)
                 result = self.fetcher.fetch_by_url(url)
-            except http.BadSource as exc:
-                if not fetch_error_should_be_silienced(exc.data, url):
-                    self.fetch_by_url_errors.setdefault(url, [])
-                    self.fetch_by_url_errors[url].append(exc.data)
-                # either way, there's no more for us to do here, since we don't have
-                # a valid file to cache
-                return None
-            else:
-                # TODO: it should do something but we don't know.
-                if result.url != url:
-                    self.url_aliases[result.url] = url
+        except http.BadSource as exc:
+            if not fetch_error_should_be_silienced(exc.data, url):
+                self.fetch_by_url_errors.setdefault(url, [])
+                self.fetch_by_url_errors[url].append(exc.data)
+            # either way, there's no more for us to do here, since we don't have
+            # a valid file to cache
+            return None
+        else:
+            # TODO: it should do something but we don't know.
+            if result.url != url:
+                self.url_aliases[result.url] = url
 
-                # We temporarily store the result directly, in case we want to compute the sourceview on demand
-                # we can delete the other dictionary.
-                self.fetch_by_url_result[url] = result
-                sourceview = SourceView.from_bytes(result.body)
+            # We temporarily store the result directly, in case we want to compute the sourceview on demand
+            # we can delete the other dictionary.
+            self.fetch_by_url_result[url] = result
+            sourceview = SourceView.from_bytes(result.body)
 
-                # TODO: the discovery of sourcemap urls here is very opaque, consider of putting it closer to the
-                #  function's callsite.
-                sourcemap_url = discover_sourcemap(result)
-                if sourcemap_url:
-                    self.minified_source_url_to_sourcemap_url[url] = sourcemap_url
+            # TODO: the discovery of sourcemap urls here is very opaque, consider of putting it closer to the
+            #  function's callsite.
+            sourcemap_url = discover_sourcemap(result)
+            if sourcemap_url:
+                self.minified_source_url_to_sourcemap_url[url] = sourcemap_url
 
-                return sourceview
+            return sourceview
+
+    def get_or_fetch_sourcemap_view(self, url=None, debug_id=None):
+        """
+        Gets from the cache or fetches the sourcemap view of the file at 'url' or 'debug_id'.
 
-    # The url is of the minified file.
-    # If this function returns None, it means that we didn't find the sourcemap, maybe because the file at url isn't
-    # a minified file.
-    def get_or_fetch_sourcemap(self, url=None, debug_id=None):
-        rv = self._get_cached_sourcemap(url, debug_id)
+        Returns None when a sourcemap corresponding to the file at 'url' is not found.
+        """
+        rv = self._get_cached_sourcemap_view(url, debug_id)
         if rv is not None:
             return rv
 
-        return self._fetch_and_cache_sourcemap(url, debug_id)
+        return self._fetch_and_cache_sourcemap_view(url, debug_id)
 
-    def _get_cached_sourcemap(self, url, debug_id):
+    def _get_cached_sourcemap_view(self, url, debug_id):
         if debug_id is not None:
             rv = self.debug_id_sourcemap_cache.get(debug_id)
             if rv is not None:
@@ -1307,7 +1323,7 @@ def _get_cached_sourcemap(self, url, debug_id):
 
         return None
 
-    def _fetch_and_cache_sourcemap(self, url, debug_id):
+    def _fetch_and_cache_sourcemap_view(self, url, debug_id):
         if debug_id is not None:
             sourcemap_view = self._handle_debug_id_sourcemap_lookup(debug_id)
             if sourcemap_view is not None:
@@ -1325,7 +1341,7 @@ def _handle_debug_id_sourcemap_lookup(self, debug_id):
         if minified_result is None:
             return None
 
-        # TODO: use enum for source file type.
+        # TODO: use enum for source file type when available.
         result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
         if result is not None:
             sourcemap_view = SmCache.from_bytes(minified_result.body, result.body)
@@ -1343,10 +1359,13 @@ def _handle_url_sourcemap_lookup(self, url):
 
         try:
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
+                op="JavaScriptStacktraceProcessor.handle_url_sourcemap_lookup.fetch_sourcemap_view_by_url"
             ) as span:
+                span.set_data("url", url)
                 span.set_data("sourcemap_url", sourcemap_url)
-                sourcemap_view = self._fetch_sourcemap(sourcemap_url, source=minified_result.body)
+                sourcemap_view = self._fetch_sourcemap_view_by_url(
+                    sourcemap_url, source=minified_result.body
+                )
         except http.BadSource as exc:
             # we don't perform the same check here as above, because if someone has
             # uploaded a node_modules file, which has a sourceMappingURL, they
@@ -1358,13 +1377,10 @@ def _handle_url_sourcemap_lookup(self, url):
             self.fetch_by_url_errors[url].append(exc.data)
             return None
         else:
-            with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
-            ):
-                self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_view
-                return sourcemap_view
+            self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_view
+            return sourcemap_view
 
-    def _fetch_sourcemap(self, url, source=b""):
+    def _fetch_sourcemap_view_by_url(self, url, source=b""):
         if is_data_uri(url):
             try:
                 body = base64.b64decode(
@@ -1376,7 +1392,7 @@ def _fetch_sourcemap(self, url, source=b""):
         else:
             # look in the database and, if not found, optionally try to scrape the web
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.fetch_sourcemap.fetch_file"
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap_view_by_url.fetch_by_url"
             ) as span:
                 span.set_data("url", url)
                 result = self.fetcher.fetch_by_url(url)
@@ -1384,7 +1400,7 @@ def _fetch_sourcemap(self, url, source=b""):
             body = result.body
         try:
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap_view_by_url.SmCache.from_bytes"
             ):
                 return SmCache.from_bytes(source, body)
 
@@ -1424,7 +1440,7 @@ def populate_sources_cache(self, frames):
                 self.get_or_fetch_sourceview(url=filename)
                 # We then want to fetch the sourcemap of the file, but this call can fail in case filename points to a
                 # non minified file.
-                self.get_or_fetch_sourcemap(url=filename)
+                self.get_or_fetch_sourcemap_view(url=filename)
 
     def close(self):
         StacktraceProcessor.close(self)

From bca29af343d04657dc2e609bdb0eceaaf7612026 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 15:20:27 +0100
Subject: [PATCH 13/28] Fix tests

---
 src/sentry/lang/javascript/processor.py       |  20 ++--
 .../sentry/lang/javascript/test_processor.py  | 101 +++++++++++-------
 2 files changed, 77 insertions(+), 44 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index a2a88fd5076910..d0b9e82e1716ff 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -700,7 +700,7 @@ class Fetcher:
     in file fetching.
     """
 
-    def __init__(self, project, release, dist, allow_scraping):
+    def __init__(self, project=None, release=None, dist=None, allow_scraping=True):
         self.project = project
         self.release = release
         self.dist = dist
@@ -923,15 +923,21 @@ def preprocess_step(self, processing_task):
                 date = timestamp and datetime.fromtimestamp(timestamp).replace(tzinfo=timezone.utc)
                 self.dist = self.release.add_dist(self.data["dist"], date)
 
-        self.fetcher = Fetcher(self.project, self.release, self.dist, self.allow_scraping)
+        # We initialize the fetcher during preprocessing, so that we can start fetching files during source cache
+        # population.
+        self._initialize_fetcher()
 
         with sentry_sdk.start_span(
             op="JavaScriptStacktraceProcessor.preprocess_step.populate_source_cache"
         ):
-            self.populate_sources_cache(frames)
+            self.populate_source_cache(frames)
 
         return True
 
+    def _initialize_fetcher(self):
+        # We initialize the fetcher once all data has been loaded.
+        self.fetcher = Fetcher(self.project, self.release, self.dist, self.allow_scraping)
+
     def handles_frame(self, frame, stacktrace_info):
         platform = frame.get("platform") or self.data.get("platform")
         return platform in ("javascript", "node")
@@ -1273,8 +1279,7 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                 result = self.fetcher.fetch_by_url(url)
         except http.BadSource as exc:
             if not fetch_error_should_be_silienced(exc.data, url):
-                self.fetch_by_url_errors.setdefault(url, [])
-                self.fetch_by_url_errors[url].append(exc.data)
+                self.fetch_by_url_errors.setdefault(url, []).append(exc.data)
             # either way, there's no more for us to do here, since we don't have
             # a valid file to cache
             return None
@@ -1373,8 +1378,7 @@ def _handle_url_sourcemap_lookup(self, url):
             # working, if that's the case). If they're not looking for it to be
             # mapped, then they shouldn't be uploading the source file in the
             # first place.
-            self.fetch_by_url_errors.setdefault(url, [])
-            self.fetch_by_url_errors[url].append(exc.data)
+            self.fetch_by_url_errors.setdefault(url, []).append(exc.data)
             return None
         else:
             self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_view
@@ -1409,7 +1413,7 @@ def _fetch_sourcemap_view_by_url(self, url, source=b""):
             logger.debug(str(exc), exc_info=True)
             raise UnparseableSourcemap({"url": http.expose_url(url)})
 
-    def populate_sources_cache(self, frames):
+    def populate_source_cache(self, frames):
         """
         Fetch all sources that we know are required (being referenced directly
         in frames).
diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
index d29a0101204f9e..e5dcec82bc56bd 100644
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -16,14 +16,13 @@
 from sentry.lang.javascript.processor import (
     CACHE_CONTROL_MAX,
     CACHE_CONTROL_MIN,
+    Fetcher,
     JavaScriptStacktraceProcessor,
     UnparseableSourcemap,
     cache,
     discover_sourcemap,
-    fetch_file,
     fetch_release_archive_for_url,
     fetch_release_file,
-    fetch_sourcemap,
     fold_function_name,
     generate_module,
     get_function_for_token,
@@ -556,7 +555,7 @@ def test_simple(self):
             responses.GET, "http://example.com", body="foo bar", content_type="application/json"
         )
 
-        result = fetch_file("http://example.com")
+        result = Fetcher().fetch_by_url("http://example.com")
 
         assert len(responses.calls) == 1
 
@@ -565,7 +564,7 @@ def test_simple(self):
         assert result.headers == {"content-type": "application/json"}
 
         # ensure we use the cached result
-        result2 = fetch_file("http://example.com")
+        result2 = Fetcher().fetch_by_url("http://example.com")
 
         assert len(responses.calls) == 1
 
@@ -594,7 +593,7 @@ def test_with_token(self):
             self.project.update_option("sentry:token_header", header_name_option_value)
 
             url = f"http://example.com/{i}/"
-            result = fetch_file(url, project=self.project)
+            result = Fetcher(project=self.project).fetch_by_url(url)
 
             assert result.url == url
             assert result.body == b"foo bar"
@@ -608,20 +607,20 @@ def test_connection_failure(self):
         responses.add(responses.GET, "http://example.com", body=RequestException())
 
         with pytest.raises(http.BadSource):
-            fetch_file("http://example.com")
+            Fetcher().fetch_by_url("http://example.com")
 
         assert len(responses.calls) == 1
 
         # ensure we use the cached domain-wide failure for the second call
         with pytest.raises(http.BadSource):
-            fetch_file("http://example.com/foo/bar")
+            Fetcher().fetch_by_url("http://example.com/foo/bar")
 
         assert len(responses.calls) == 1
 
     @responses.activate
     def test_non_url_without_release(self):
         with pytest.raises(http.BadSource):
-            fetch_file("/example.js")
+            Fetcher().fetch_by_url("/example.js")
 
     @responses.activate
     @patch("sentry.lang.javascript.processor.fetch_release_file")
@@ -634,7 +633,7 @@ def test_non_url_with_release(self, mock_fetch_release_file):
         release = Release.objects.create(version="1", organization_id=self.project.organization_id)
         release.add_project(self.project)
 
-        result = fetch_file("/example.js", release=release)
+        result = Fetcher(release=release).fetch_by_url("/example.js")
         assert result.url == "/example.js"
         assert result.body == b"foo"
         assert isinstance(result.body, bytes)
@@ -670,13 +669,13 @@ def test_non_url_with_release_archive(self):
 
         # Attempt to fetch nonexisting
         with pytest.raises(http.BadSource):
-            fetch_file("does-not-exist.js", release=release)
+            Fetcher(release=release).fetch_by_url("does-not-exist.js")
 
         # Attempt to fetch nonexsting again (to check if cache works)
         with pytest.raises(http.BadSource):
-            result = fetch_file("does-not-exist.js", release=release)
+            result = Fetcher(release=release).fetch_by_url("does-not-exist.js")
 
-        result = fetch_file("/example.js", release=release)
+        result = Fetcher(release=release).fetch_by_url("/example.js")
         assert result.url == "/example.js"
         assert result.body == b"foo"
         assert isinstance(result.body, bytes)
@@ -684,7 +683,7 @@ def test_non_url_with_release_archive(self):
         assert result.encoding == "utf-8"
 
         # Make sure cache loading works:
-        result2 = fetch_file("/example.js", release=release)
+        result2 = Fetcher(release=release).fetch_by_url("/example.js")
         assert result2 == result
 
     def _create_archive(self, release, url):
@@ -842,7 +841,7 @@ def test_unicode_body(self):
             content_type="application/json; charset=utf-8",
         )
 
-        result = fetch_file("http://example.com")
+        result = Fetcher().fetch_by_url("http://example.com")
 
         assert len(responses.calls) == 1
 
@@ -852,7 +851,7 @@ def test_unicode_body(self):
         assert result.encoding == "utf-8"
 
         # ensure we use the cached result
-        result2 = fetch_file("http://example.com")
+        result2 = Fetcher().fetch_by_url("http://example.com")
 
         assert len(responses.calls) == 1
 
@@ -878,7 +877,7 @@ def cache_get(key):
             )
 
             with pytest.raises(http.CannotFetch) as exc:
-                fetch_file("http://example.com")
+                Fetcher().fetch_by_url("http://example.com")
 
             assert exc.value.data["type"] == EventError.TOO_LARGE_FOR_CACHE
 
@@ -891,7 +890,7 @@ def cache_get(key):
     def test_truncated(self):
         url = truncatechars("http://example.com", 3)
         with pytest.raises(http.CannotFetch) as exc:
-            fetch_file(url)
+            Fetcher().fetch_by_url(url)
 
         assert exc.value.data["type"] == EventError.JS_MISSING_SOURCE
         assert exc.value.data["url"] == url
@@ -1158,7 +1157,11 @@ def test_dedupe_properties(self):
 
 class FetchSourcemapTest(TestCase):
     def test_simple_base64(self):
-        smap_view = fetch_sourcemap(base64_sourcemap)
+        processor = JavaScriptStacktraceProcessor(
+            data={}, stacktrace_infos=None, project=self.create_project()
+        )
+        processor._initialize_fetcher()
+        smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap)
         token = smap_view.lookup(1, 1, 0)
 
         assert token.src == "/test.js"
@@ -1167,7 +1170,11 @@ def test_simple_base64(self):
         assert token.context_line == 'console.log("hello, World!")'
 
     def test_base64_without_padding(self):
-        smap_view = fetch_sourcemap(base64_sourcemap.rstrip("="))
+        processor = JavaScriptStacktraceProcessor(
+            data={}, stacktrace_infos=None, project=self.create_project()
+        )
+        processor._initialize_fetcher()
+        smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap.rstrip("="))
         token = smap_view.lookup(1, 1, 0)
 
         assert token.src == "/test.js"
@@ -1177,7 +1184,11 @@ def test_base64_without_padding(self):
 
     def test_broken_base64(self):
         with pytest.raises(UnparseableSourcemap):
-            fetch_sourcemap("data:application/json;base64,xxx")
+            processor = JavaScriptStacktraceProcessor(
+                data={}, stacktrace_infos=None, project=self.create_project()
+            )
+            processor._initialize_fetcher()
+            processor._fetch_sourcemap_view_by_url("data:application/json;base64,xxx")
 
     @responses.activate
     def test_garbage_json(self):
@@ -1186,7 +1197,11 @@ def test_garbage_json(self):
         )
 
         with pytest.raises(UnparseableSourcemap):
-            fetch_sourcemap("http://example.com")
+            processor = JavaScriptStacktraceProcessor(
+                data={}, stacktrace_infos=None, project=self.create_project()
+            )
+            processor._initialize_fetcher()
+            processor._fetch_sourcemap_view_by_url("http://example.com")
 
 
 class TrimLineTest(unittest.TestCase):
@@ -1443,6 +1458,7 @@ def test_file_no_source_records_error(self):
         project = self.create_project()
 
         processor = JavaScriptStacktraceProcessor(data={}, stacktrace_infos=None, project=project)
+        processor._initialize_fetcher()
 
         # no release on the event, so won't find file in database
         assert processor.release is None
@@ -1451,13 +1467,16 @@ def test_file_no_source_records_error(self):
         abs_path = "app:///i/dont/exist.js"
 
         # before caching, no errors
-        assert len(processor.cache.get_errors(abs_path)) == 0
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
-        processor._fetch_and_cache_sourceview(abs_path)
+        processor.get_or_fetch_sourceview(url=abs_path)
 
         # now we have an error
-        assert len(processor.cache.get_errors(abs_path)) == 1
-        assert processor.cache.get_errors(abs_path)[0] == {"url": abs_path, "type": "js_no_source"}
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 1
+        assert processor.fetch_by_url_errors.get(abs_path, [])[0] == {
+            "url": abs_path,
+            "type": "js_no_source",
+        }
 
     def test_node_modules_file_no_source_no_error(self):
         """
@@ -1467,6 +1486,8 @@ def test_node_modules_file_no_source_no_error(self):
 
         project = self.create_project()
         processor = JavaScriptStacktraceProcessor(data={}, stacktrace_infos=None, project=project)
+        # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
+        processor._initialize_fetcher()
 
         # no release on the event, so won't find file in database
         assert processor.release is None
@@ -1474,10 +1495,10 @@ def test_node_modules_file_no_source_no_error(self):
         # not a real url, so won't find file on the internet
         abs_path = "app:///../node_modules/i/dont/exist.js"
 
-        processor._fetch_and_cache_sourceview(abs_path)
+        processor.get_or_fetch_sourceview(url=abs_path)
 
         # no errors, even though the file can't have been found
-        assert len(processor.cache.get_errors(abs_path)) == 0
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
     def test_node_modules_file_with_source_is_used(self):
         """
@@ -1498,12 +1519,14 @@ def test_node_modules_file_with_source_is_used(self):
         # dictionary passed to the JavaScriptStacktraceProcessor constructor,
         # but since this is just a unit test, we have to set it manually
         processor.release = release
+        # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
+        processor._initialize_fetcher()
 
-        processor._fetch_and_cache_sourceview(abs_path)
-
-        # file is cached, no errors are generated
-        assert processor.cache.get(abs_path)
-        assert len(processor.cache.get_errors(abs_path)) == 0
+        # We found the source view.
+        assert processor.get_or_fetch_sourceview(url=abs_path)
+        # Source view exists in cache.
+        assert processor.fetch_by_url_result.get(abs_path)
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
     @patch("sentry.lang.javascript.processor.discover_sourcemap")
     def test_node_modules_file_with_source_but_no_map_records_error(self, mock_discover_sourcemap):
@@ -1528,12 +1551,18 @@ def test_node_modules_file_with_source_but_no_map_records_error(self, mock_disco
         # dictionary passed to the JavaScriptStacktraceProcessor constructor,
         # but since this is just a unit test, we have to set it manually
         processor.release = release
+        # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
+        processor._initialize_fetcher()
 
         # before caching, no errors
-        assert len(processor.cache.get_errors(abs_path)) == 0
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
-        processor._fetch_and_cache_sourceview(abs_path)
+        processor.get_or_fetch_sourceview(url=abs_path)
+        processor.get_or_fetch_sourcemap_view(url=abs_path)
 
         # now we have an error
-        assert len(processor.cache.get_errors(abs_path)) == 1
-        assert processor.cache.get_errors(abs_path)[0] == {"url": map_url, "type": "js_no_source"}
+        assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 1
+        assert processor.fetch_by_url_errors.get(abs_path, [])[0] == {
+            "url": map_url,
+            "type": "js_no_source",
+        }

From 6929724a0804c23b6e71cf230fb85c785ae7b893 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Thu, 23 Feb 2023 16:10:20 +0100
Subject: [PATCH 14/28] Fix tests

---
 src/sentry/lang/javascript/processor.py       | 19 ++++++++++---------
 .../sentry/lang/javascript/test_sourcemaps.py |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index d0b9e82e1716ff..d4fedfc2f06a41 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -876,6 +876,10 @@ def __init__(self, *args, **kwargs):
         self.fetch_count = 0
         self.sourcemaps_touched = set()
 
+        # All the following dictionaries have been defined top level for simplicity reasons. Because this code will
+        # be ported to Symbolicator we wanted to keep it as simple and explicit as possible. This comment also
+        # applies to the many repetitions across the code.
+
         # Cache holding the results of the fetching by url.
         self.fetch_by_url_result = {}
         self.fetch_by_url_errors = {}
@@ -952,7 +956,6 @@ def process_frame(self, processable_frame, processing_task):
         Attempt to demangle the given frame.
         """
         frame = processable_frame.frame
-        token = None
 
         all_errors = []
         sourcemap_applied = False
@@ -1036,6 +1039,7 @@ def process_frame(self, processable_frame, processing_task):
                     "Mapping compressed source %r to mapping in %r", frame["abs_path"], abs_path
                 )
 
+                # This 'if' should always evaluate to True when debug_id is used.
                 if token.context_line is not None:
                     source_context = token.pre_context, token.context_line, token.post_context
                 else:
@@ -1253,13 +1257,15 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
 
         # TODO: this branch should only apply to http fetches.
         if self.fetch_count > self.max_fetches:
-            cache.add_error(url, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
+            self.fetch_by_url_errors.setdefault(url, []).append(
+                {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES}
+            )
             return None
 
         if debug_id is not None:
             logger.debug("Attempting to cache source with debug id %r", debug_id)
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor._fetch_and_cache_sourceview.fetch_by_debug_id"
+                op="JavaScriptStacktraceProcessor.fetch_and_cache_sourceview.fetch_by_debug_id"
             ) as span:
                 span.set_data("debug_id", debug_id)
                 result = self.fetcher.fetch_by_debug_id(debug_id, source_file_type)
@@ -1273,7 +1279,7 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
         try:
             logger.debug("Attempting to cache source with url %r", url)
             with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor._fetch_and_cache_sourceview.fetch_by_url"
+                op="JavaScriptStacktraceProcessor.fetch_and_cache_sourceview.fetch_by_url"
             ) as span:
                 span.set_data("url", url)
                 result = self.fetcher.fetch_by_url(url)
@@ -1293,8 +1299,6 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
             self.fetch_by_url_result[url] = result
             sourceview = SourceView.from_bytes(result.body)
 
-            # TODO: the discovery of sourcemap urls here is very opaque, consider of putting it closer to the
-            #  function's callsite.
             sourcemap_url = discover_sourcemap(result)
             if sourcemap_url:
                 self.minified_source_url_to_sourcemap_url[url] = sourcemap_url
@@ -1442,9 +1446,6 @@ def populate_source_cache(self, frames):
                 # TODO: we want to implement the construction of the debug
                 # We first want to fetch the sourceview of the file.
                 self.get_or_fetch_sourceview(url=filename)
-                # We then want to fetch the sourcemap of the file, but this call can fail in case filename points to a
-                # non minified file.
-                self.get_or_fetch_sourcemap_view(url=filename)
 
     def close(self):
         StacktraceProcessor.close(self)
diff --git a/tests/sentry/lang/javascript/test_sourcemaps.py b/tests/sentry/lang/javascript/test_sourcemaps.py
index 2d167db82421b3..a5b343ad8a0f8a 100644
--- a/tests/sentry/lang/javascript/test_sourcemaps.py
+++ b/tests/sentry/lang/javascript/test_sourcemaps.py
@@ -102,16 +102,16 @@ def test_no_inline(self):
         # basic sourcemap fixture has no inlined sources, so expect None
         smap_view = SourceMapView.from_json_bytes(sourcemap)
 
-        source = smap_view.get_or_fetch_sourceview(0)
+        source = smap_view.get_sourceview(0)
         assert source is None
 
     def test_indexed_inline(self):
         smap_view = SourceMapView.from_json_bytes(indexed_sourcemap_example)
 
-        assert smap_view.get_or_fetch_sourceview(0).get_source() == (
+        assert smap_view.get_sourceview(0).get_source() == (
             " ONE.foo = function (bar) {\n" + "   return baz(bar);\n" + " };"
         )
-        assert smap_view.get_or_fetch_sourceview(1).get_source() == (
+        assert smap_view.get_sourceview(1).get_source() == (
             " TWO.inc = function (n) {\n" + "   return n + 1;\n" + " };"
         )
 

From d12a6d11d4514ee112ce3148778a1b1b6d7adf3e Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Fri, 24 Feb 2023 09:25:10 +0100
Subject: [PATCH 15/28] Fix errors

---
 src/sentry/lang/javascript/processor.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index d4fedfc2f06a41..30f2c4098c1a37 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -972,17 +972,19 @@ def process_frame(self, processable_frame, processing_task):
         ):
             return
 
-        errors = self.fetch_by_url_errors.get(frame["abs_path"])
-        if errors is not None:
-            all_errors.extend(errors)
-
-        # `sourceview` is used for pre/post and `context_line` frame expansion.
+        # The 'sourceview' is used for pre/post and 'context_line' frame expansion.
         # Here it's pointing to the minified source, however the variable can be shadowed with the original sourceview
-        # (or `None` if the token doesnt provide us with the `context_line`) down the road.
+        # (or 'None' if the token doesnt provide us with the 'context_line') down the road.
         # TODO: add support for debug ids.
         sourceview = self.get_or_fetch_sourceview(url=frame["abs_path"])
         sourcemap_view = self.get_or_fetch_sourcemap_view(url=frame["abs_path"])
 
+        # We check for errors once both the minified file and the corresponding sourcemaps are loaded. In case errors
+        # happen in one of the two we will detect them and add them to 'all_errors'.
+        errors = self.fetch_by_url_errors.get(frame["abs_path"])
+        if errors is not None:
+            all_errors.extend(errors)
+
         # We get the url that we discovered for the sourcemap which we use down in the processing pipeline.
         sourcemap_url = self.minified_source_url_to_sourcemap_url.get(frame["abs_path"])
         self.sourcemaps_touched.add(sourcemap_url)

From db55adeb67765716d5cf8f4cb5795e1e0f136c19 Mon Sep 17 00:00:00 2001
From: Armin Ronacher <armin.ronacher@active-4.com>
Date: Fri, 24 Feb 2023 11:14:17 +0100
Subject: [PATCH 16/28] ref: Small refactor for the fetcher

---
 src/sentry/lang/javascript/processor.py       | 33 +++++++++---------
 .../sentry/lang/javascript/test_processor.py  | 34 +++++++------------
 2 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 30f2c4098c1a37..09f9fb7f09adf7 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -706,6 +706,11 @@ def __init__(self, project=None, release=None, dist=None, allow_scraping=True):
         self.dist = dist
         self.allow_scraping = allow_scraping
 
+    def bind_release(self, release=None, dist=None):
+        """Updates the fetcher with a project, release and dist."""
+        self.release = release
+        self.dist = dist
+
     def fetch_by_debug_id(self, debug_id, source_file_type):
         # TODO: implement debug id lookup.
         pass
@@ -869,9 +874,6 @@ def __init__(self, *args, **kwargs):
 
         self.organization = organization
         self.max_fetches = MAX_RESOURCE_FETCHES
-        self.allow_scraping = organization.get_option(
-            "sentry:scrape_javascript", True
-        ) is not False and self.project.get_option("sentry:scrape_javascript", True)
 
         self.fetch_count = 0
         self.sourcemaps_touched = set()
@@ -899,10 +901,10 @@ def __init__(self, *args, **kwargs):
         self.minified_source_url_to_sourcemap_url = {}
 
         # Component responsible for fetching the files.
-        self.fetcher = None
-
-        self.release = None
-        self.dist = None
+        self.fetcher = Fetcher(
+            allow_scraping=organization.get_option("sentry:scrape_javascript", True) is not False
+            and self.project.get_option("sentry:scrape_javascript", True)
+        )
 
     def get_valid_frames(self):
         # build list of frames that we can actually grab source for
@@ -920,16 +922,17 @@ def preprocess_step(self, processing_task):
             )
             return False
 
+        release = None
+        dist = None
+
         with sentry_sdk.start_span(op="JavaScriptStacktraceProcessor.preprocess_step.get_release"):
-            self.release = self.get_release(create=True)
-            if self.data.get("dist") and self.release:
+            release = self.get_release(create=True)
+            if self.data.get("dist") and release:
                 timestamp = self.data.get("timestamp")
                 date = timestamp and datetime.fromtimestamp(timestamp).replace(tzinfo=timezone.utc)
-                self.dist = self.release.add_dist(self.data["dist"], date)
+                dist = release.add_dist(self.data["dist"], date)
 
-        # We initialize the fetcher during preprocessing, so that we can start fetching files during source cache
-        # population.
-        self._initialize_fetcher()
+        self.fetcher.bind_release(release=release, dist=dist)
 
         with sentry_sdk.start_span(
             op="JavaScriptStacktraceProcessor.preprocess_step.populate_source_cache"
@@ -938,10 +941,6 @@ def preprocess_step(self, processing_task):
 
         return True
 
-    def _initialize_fetcher(self):
-        # We initialize the fetcher once all data has been loaded.
-        self.fetcher = Fetcher(self.project, self.release, self.dist, self.allow_scraping)
-
     def handles_frame(self, frame, stacktrace_info):
         platform = frame.get("platform") or self.data.get("platform")
         return platform in ("javascript", "node")
diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
index e5dcec82bc56bd..fcda647b89e30a 100644
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -56,18 +56,18 @@ def test_infers_allow_scraping(self):
         project = self.create_project()
         r = JavaScriptStacktraceProcessor({}, None, project)
         # defaults
-        assert r.allow_scraping
+        assert r.fetcher.allow_scraping
 
         # disabled for project
         project.update_option("sentry:scrape_javascript", False)
         r = JavaScriptStacktraceProcessor({}, None, project)
-        assert not r.allow_scraping
+        assert not r.fetcher.allow_scraping
 
         # disabled for org
         project.delete_option("sentry:scrape_javascript")
         project.organization.update_option("sentry:scrape_javascript", False)
         r = JavaScriptStacktraceProcessor({}, None, project)
-        assert not r.allow_scraping
+        assert not r.fetcher.allow_scraping
 
     @patch(
         "sentry.lang.javascript.processor.JavaScriptStacktraceProcessor.get_valid_frames",
@@ -87,15 +87,15 @@ def test_missing_dist(self, _1, _2):
             project=project,
         )
 
-        assert processor.release is None
-        assert processor.dist is None
+        assert processor.fetcher.release is None
+        assert processor.fetcher.dist is None
 
         processor.preprocess_step(None)
 
-        assert processor.release == release
-        assert processor.dist is not None
-        assert processor.dist.name == "foo"
-        assert processor.dist.date_added.timestamp() == processor.data["timestamp"]
+        assert processor.fetcher.release == release
+        assert processor.fetcher.dist is not None
+        assert processor.fetcher.dist.name == "foo"
+        assert processor.fetcher.dist.date_added.timestamp() == processor.data["timestamp"]
 
     @with_feature("organizations:javascript-console-error-tag")
     def test_tag_suspected_console_error(self):
@@ -1160,7 +1160,6 @@ def test_simple_base64(self):
         processor = JavaScriptStacktraceProcessor(
             data={}, stacktrace_infos=None, project=self.create_project()
         )
-        processor._initialize_fetcher()
         smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap)
         token = smap_view.lookup(1, 1, 0)
 
@@ -1173,7 +1172,6 @@ def test_base64_without_padding(self):
         processor = JavaScriptStacktraceProcessor(
             data={}, stacktrace_infos=None, project=self.create_project()
         )
-        processor._initialize_fetcher()
         smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap.rstrip("="))
         token = smap_view.lookup(1, 1, 0)
 
@@ -1187,7 +1185,6 @@ def test_broken_base64(self):
             processor = JavaScriptStacktraceProcessor(
                 data={}, stacktrace_infos=None, project=self.create_project()
             )
-            processor._initialize_fetcher()
             processor._fetch_sourcemap_view_by_url("data:application/json;base64,xxx")
 
     @responses.activate
@@ -1200,7 +1197,6 @@ def test_garbage_json(self):
             processor = JavaScriptStacktraceProcessor(
                 data={}, stacktrace_infos=None, project=self.create_project()
             )
-            processor._initialize_fetcher()
             processor._fetch_sourcemap_view_by_url("http://example.com")
 
 
@@ -1458,10 +1454,9 @@ def test_file_no_source_records_error(self):
         project = self.create_project()
 
         processor = JavaScriptStacktraceProcessor(data={}, stacktrace_infos=None, project=project)
-        processor._initialize_fetcher()
 
         # no release on the event, so won't find file in database
-        assert processor.release is None
+        assert processor.fetcher.release is None
 
         # not a real url, so won't find file on the internet
         abs_path = "app:///i/dont/exist.js"
@@ -1487,10 +1482,9 @@ def test_node_modules_file_no_source_no_error(self):
         project = self.create_project()
         processor = JavaScriptStacktraceProcessor(data={}, stacktrace_infos=None, project=project)
         # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
-        processor._initialize_fetcher()
 
         # no release on the event, so won't find file in database
-        assert processor.release is None
+        assert processor.fetcher.release is None
 
         # not a real url, so won't find file on the internet
         abs_path = "app:///../node_modules/i/dont/exist.js"
@@ -1518,9 +1512,8 @@ def test_node_modules_file_with_source_is_used(self):
         # in real life the preprocess step will pull release out of the data
         # dictionary passed to the JavaScriptStacktraceProcessor constructor,
         # but since this is just a unit test, we have to set it manually
-        processor.release = release
+        processor.fetcher.bind_release(release=release)
         # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
-        processor._initialize_fetcher()
 
         # We found the source view.
         assert processor.get_or_fetch_sourceview(url=abs_path)
@@ -1550,9 +1543,8 @@ def test_node_modules_file_with_source_but_no_map_records_error(self, mock_disco
         # in real life the preprocess step will pull release out of the data
         # dictionary passed to the JavaScriptStacktraceProcessor constructor,
         # but since this is just a unit test, we have to set it manually
-        processor.release = release
+        processor.fetcher.bind_release(release=release)
         # We need to initialize the fetcher so that it can capture the necessary context to execute file fetching.
-        processor._initialize_fetcher()
 
         # before caching, no errors
         assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0

From f8f9305d036bfbf0ba06b7f61b536a46e3a051e0 Mon Sep 17 00:00:00 2001
From: Armin Ronacher <armin.ronacher@active-4.com>
Date: Fri, 24 Feb 2023 11:41:52 +0100
Subject: [PATCH 17/28] test: Added dummy test to ensure debug_ids are properly
 ignored for now

---
 .../lang/javascript/test_plugin.py            | 101 ++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/tests/relay_integration/lang/javascript/test_plugin.py b/tests/relay_integration/lang/javascript/test_plugin.py
index 6dc0e8d3c1f17e..ad2d67c8460c2e 100644
--- a/tests/relay_integration/lang/javascript/test_plugin.py
+++ b/tests/relay_integration/lang/javascript/test_plugin.py
@@ -343,6 +343,107 @@ def test_sourcemap_source_expansion(self):
         # its raw and original form should be identical
         assert raw_frame_list[1] == frame_list[1]
 
+    @responses.activate
+    def test_sourcemap_ignore_debug_id(self):
+        # This tests the same as the one above, but it ensures that for now the debug_id
+        # code that is not yet implemented, will be not make any damage yet.
+        responses.add(
+            responses.GET,
+            "http://example.com/file.min.js",
+            body=load_fixture("file.min.js"),
+            content_type="application/javascript; charset=utf-8",
+        )
+        responses.add(
+            responses.GET,
+            "http://example.com/file1.js",
+            body=load_fixture("file1.js"),
+            content_type="application/javascript; charset=utf-8",
+        )
+        responses.add(
+            responses.GET,
+            "http://example.com/file2.js",
+            body=load_fixture("file2.js"),
+            content_type="application/javascript; charset=utf-8",
+        )
+        responses.add(
+            responses.GET,
+            "http://example.com/file.sourcemap.js",
+            body=load_fixture("file.sourcemap.js"),
+            content_type="application/javascript; charset=utf-8",
+        )
+        responses.add(responses.GET, "http://example.com/index.html", body="Not Found", status=404)
+
+        data = {
+            "timestamp": self.min_ago,
+            "message": "hello",
+            "platform": "javascript",
+            "debug_meta": {
+                "images": [
+                    {
+                        "type": "sourcemap",
+                        "debug_id": "c941d872-af1f-4f0c-a7ff-ad3d295fe153",
+                        "code_file": "http://example.com/file.min.js",
+                    }
+                ]
+            },
+            "exception": {
+                "values": [
+                    {
+                        "type": "Error",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "abs_path": "http://example.com/file.min.js",
+                                    "filename": "file.min.js",
+                                    "lineno": 1,
+                                    "colno": 39,
+                                },
+                                # NOTE: Intentionally source is not retrieved from this HTML file
+                                {
+                                    "function": 'function: "HTMLDocument.<anonymous>"',
+                                    "abs_path": "http//example.com/index.html",
+                                    "filename": "index.html",
+                                    "lineno": 283,
+                                    "colno": 17,
+                                    "in_app": False,
+                                },
+                            ]
+                        },
+                    }
+                ]
+            },
+        }
+
+        event = self.post_and_retrieve_event(data)
+
+        assert event.data["errors"] == [
+            {"type": "js_no_source", "url": "http//example.com/index.html"}
+        ]
+
+        exception = event.interfaces["exception"]
+        frame_list = exception.values[0].stacktrace.frames
+
+        frame = frame_list[0]
+        assert frame.pre_context == ["function add(a, b) {", '\t"use strict";']
+        expected = "\treturn a + b; // fôo"
+        assert frame.context_line == expected
+        assert frame.post_context == ["}", ""]
+
+        raw_frame_list = exception.values[0].raw_stacktrace.frames
+        raw_frame = raw_frame_list[0]
+        assert not raw_frame.pre_context
+        assert (
+            raw_frame.context_line
+            == 'function add(a,b){"use strict";return a+b}function multiply(a,b){"use strict";return a*b}function '
+            'divide(a,b){"use strict";try{return multip {snip}'
+        )
+        assert raw_frame.post_context == ["//@ sourceMappingURL=file.sourcemap.js", ""]
+        assert raw_frame.lineno == 1
+
+        # Since we couldn't expand source for the 2nd frame, both
+        # its raw and original form should be identical
+        assert raw_frame_list[1] == frame_list[1]
+
     @responses.activate
     def test_sourcemap_embedded_source_expansion(self):
         responses.add(

From 45cb57fa158071ffc855fdd01773d4fd932e211b Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Fri, 24 Feb 2023 11:48:31 +0100
Subject: [PATCH 18/28] Add invalid base64 sourcemap test

---
 src/sentry/lang/javascript/processor.py       |  3 +-
 .../lang/javascript/test_plugin.py            | 48 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 09f9fb7f09adf7..ac44aa05df4347 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -1,4 +1,5 @@
 import base64
+import binascii
 import errno
 import logging
 import re
@@ -1396,7 +1397,7 @@ def _fetch_sourcemap_view_by_url(self, url, source=b""):
                     force_bytes(url[BASE64_PREAMBLE_LENGTH:])
                     + (b"=" * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4))
                 )
-            except TypeError as e:
+            except binascii.Error as e:
                 raise UnparseableSourcemap({"url": "<base64>", "reason": str(e)})
         else:
             # look in the database and, if not found, optionally try to scrape the web
diff --git a/tests/relay_integration/lang/javascript/test_plugin.py b/tests/relay_integration/lang/javascript/test_plugin.py
index ad2d67c8460c2e..e2769402fbbb2a 100644
--- a/tests/relay_integration/lang/javascript/test_plugin.py
+++ b/tests/relay_integration/lang/javascript/test_plugin.py
@@ -22,6 +22,8 @@
     .replace("\n", "")
 )
 
+INVALID_BASE64_SOURCEMAP = "data:application/json;base64,A"
+
 
 def get_fixture_path(name):
     return os.path.join(os.path.dirname(__file__), "fixtures", name)
@@ -216,6 +218,52 @@ def test_inlined_sources(self, mock_discover_sourcemap, mock_fetch_by_url):
         assert not frame.post_context
         assert frame.data["sourcemap"] == "http://example.com/test.min.js"
 
+    @patch("sentry.lang.javascript.processor.Fetcher.fetch_by_url")
+    @patch("sentry.lang.javascript.processor.discover_sourcemap")
+    def test_invalid_base64_sourcemap_returns_an_error(
+        self, mock_discover_sourcemap, mock_fetch_by_url
+    ):
+        data = {
+            "timestamp": self.min_ago,
+            "message": "hello",
+            "platform": "javascript",
+            "exception": {
+                "values": [
+                    {
+                        "type": "Error",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "abs_path": "http://example.com/test.min.js",
+                                    "filename": "test.js",
+                                    "lineno": 1,
+                                    "colno": 1,
+                                }
+                            ]
+                        },
+                    }
+                ]
+            },
+        }
+
+        mock_discover_sourcemap.return_value = INVALID_BASE64_SOURCEMAP
+
+        mock_fetch_by_url.return_value.url = "http://example.com/test.min.js"
+        mock_fetch_by_url.return_value.body = force_bytes("\n".join("<generated source>"))
+        mock_fetch_by_url.return_value.encoding = None
+
+        event = self.post_and_retrieve_event(data)
+
+        mock_fetch_by_url.assert_called_once_with("http://example.com/test.min.js")
+
+        assert len(event.data["errors"]) == 1
+        assert event.data["errors"][0] == {
+            "url": "<base64>",
+            "reason": "Invalid base64-encoded string: "
+            "number of data characters (1) cannot be 1 more than a multiple of 4",
+            "type": "js_invalid_source",
+        }
+
     @responses.activate
     def test_error_message_translations(self):
         data = {

From daa99d7d82cc8fe415a8740863d1fcfd66380fa9 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Fri, 24 Feb 2023 11:52:09 +0100
Subject: [PATCH 19/28] Remove models

---
 src/sentry/models/artifactbundle.py | 90 -----------------------------
 1 file changed, 90 deletions(-)
 delete mode 100644 src/sentry/models/artifactbundle.py

diff --git a/src/sentry/models/artifactbundle.py b/src/sentry/models/artifactbundle.py
deleted file mode 100644
index 1c5f0822d9d273..00000000000000
--- a/src/sentry/models/artifactbundle.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from enum import Enum
-
-from django.db import models
-from django.utils import timezone
-
-from sentry.db.models import (
-    BoundedBigIntegerField,
-    BoundedPositiveIntegerField,
-    FlexibleForeignKey,
-    Model,
-    region_silo_only_model,
-)
-from sentry.models import DB_VERSION_LENGTH
-
-
-class SourceFileType(Enum):
-    SOURCE = 1
-    MINIFIED_SOURCE = 2
-    SOURCE_MAP = 3
-    INDEXED_RAM_BUNDLE = 4
-
-    @classmethod
-    def choices(cls):
-        return [(key.value, key.name) for key in cls]
-
-
-@region_silo_only_model
-class ArtifactBundle(Model):
-    __include_in_export__ = False
-
-    organization_id = BoundedBigIntegerField(db_index=True)
-    bundle_id = models.UUIDField(null=True)
-    file = FlexibleForeignKey("sentry.File")
-    artifact_count = BoundedPositiveIntegerField()
-
-    class Meta:
-        app_label = "sentry"
-        db_table = "sentry_artifactbundle"
-
-        unique_together = (("organization_id", "bundle_id"),)
-
-
-@region_silo_only_model
-class ReleaseArtifactBundle(Model):
-    __include_in_export__ = False
-
-    organization_id = BoundedBigIntegerField(db_index=True)
-    release_name = models.CharField(max_length=DB_VERSION_LENGTH)
-    dist_id = BoundedBigIntegerField(null=True)
-    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
-
-    class Meta:
-        app_label = "sentry"
-        db_table = "sentry_releaseartifactbundle"
-
-        unique_together = (("organization_id", "release_name", "dist_id", "artifact_bundle"),)
-
-
-@region_silo_only_model
-class DebugIdArtifactBundle(Model):
-    __include_in_export__ = False
-
-    organization_id = BoundedBigIntegerField(db_index=True)
-    debug_id = models.UUIDField()
-    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
-    source_file_type = models.IntegerField(choices=SourceFileType.choices())
-    date_added = models.DateTimeField(default=timezone.now)
-    date_last_accessed = models.DateTimeField(default=timezone.now)
-
-    class Meta:
-        app_label = "sentry"
-        db_table = "sentry_debugidartifactbundle"
-
-        # We can have the same debug_id pointing to different artifact_bundle(s) because the user might upload
-        # the same artifacts twice, or they might have certain build files that don't change across builds.
-        unique_together = (("debug_id", "artifact_bundle", "source_file_type"),)
-
-
-@region_silo_only_model
-class ProjectArtifactBundle(Model):
-    __include_in_export__ = False
-
-    project_id = BoundedBigIntegerField(db_index=True)
-    artifact_bundle = FlexibleForeignKey("sentry.ArtifactBundle")
-
-    class Meta:
-        app_label = "sentry"
-        db_table = "sentry_projectartifactbundle"
-
-        unique_together = (("project_id", "artifact_bundle"),)

From 0c21f128c87b17dc2de7c0ed5a4bea5a260bc6b4 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Fri, 24 Feb 2023 12:03:53 +0100
Subject: [PATCH 20/28] Update comments

---
 src/sentry/lang/javascript/processor.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index ac44aa05df4347..dd67c693042182 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -1292,7 +1292,8 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
             # a valid file to cache
             return None
         else:
-            # TODO: it should do something but we don't know.
+            # TODO: aliasing is not used in the codebase as of now, we might want to remove it because we always call
+            #  this function with url == frame.abs_path.
             if result.url != url:
                 self.url_aliases[result.url] = url
 
@@ -1445,8 +1446,8 @@ def populate_source_cache(self, frames):
                 op="JavaScriptStacktraceProcessor.populate_source_cache.cache_source"
             ) as span:
                 span.set_data("filename", filename)
-                # TODO: we want to implement the construction of the debug
-                # We first want to fetch the sourceview of the file.
+                # TODO: we want to implement the construction of the abs_path -> debug_id table.
+                # We first want to fetch the sourceview of the file but not the sourcemaps.
                 self.get_or_fetch_sourceview(url=filename)
 
     def close(self):

From 3e955cc8cfb794e2df01071f73fb47b793d5d30d Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Fri, 24 Feb 2023 12:05:25 +0100
Subject: [PATCH 21/28] Use return None

---
 src/sentry/lang/javascript/processor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index dd67c693042182..9848328f018317 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -714,7 +714,7 @@ def bind_release(self, release=None, dist=None):
 
     def fetch_by_debug_id(self, debug_id, source_file_type):
         # TODO: implement debug id lookup.
-        pass
+        return None
 
     def fetch_by_url(self, url):
         """
@@ -845,7 +845,7 @@ def fetch_by_url(self, url):
 
     def _fetch_sourcemap_by_debug_id(self):
         # TODO: implement debug id lookup.
-        pass
+        return None
 
 
 class JavaScriptStacktraceProcessor(StacktraceProcessor):

From 6d2b0a1eac820998222acb36aa16e0174fc766e3 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 27 Feb 2023 14:03:44 +0100
Subject: [PATCH 22/28] Fix error lookup

---
 src/sentry/lang/javascript/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 9848328f018317..aedba381d0479e 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -1048,7 +1048,7 @@ def process_frame(self, processable_frame, processing_task):
                     sourceview = self.get_or_fetch_sourceview(url=abs_path)
 
                 if sourceview is None:
-                    errors = self.fetch_by_url_errors[abs_path]
+                    errors = self.fetch_by_url_errors.get(abs_path)
                     if errors:
                         all_errors.extend(errors)
                     else:

From 4032847a3ce1ed6c794aad637b3d83d17dccf244 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 27 Feb 2023 15:50:27 +0100
Subject: [PATCH 23/28] Fix performance issues

---
 src/sentry/lang/javascript/processor.py       | 87 +++++++++----------
 .../lang/javascript/test_plugin.py            | 56 +++++++++++-
 .../sentry/lang/javascript/test_processor.py  | 10 +--
 3 files changed, 101 insertions(+), 52 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index aedba381d0479e..26b9d78c07ac97 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -884,19 +884,17 @@ def __init__(self, *args, **kwargs):
         # applies to the many repetitions across the code.
 
         # Cache holding the results of the fetching by url.
-        self.fetch_by_url_result = {}
+        self.fetch_by_url_sourceviews = {}
         self.fetch_by_url_errors = {}
+        self.result_map = {}
 
         # Cache holding the results of the fetching by debug id.
-        self.fetch_by_debug_id_result = {}
+        self.fetch_by_debug_id_sourceviews = {}
 
         # Cache holding the sourcemaps views indexed by either debug id or sourcemap url.
         self.debug_id_sourcemap_cache = {}
         self.sourcemap_url_sourcemap_cache = {}
 
-        # Contains a mapping between the 'result.url' of the request and the file url from 'abs_path'.
-        self.url_aliases = {}
-
         # Contains a mapping between the minified source url ('abs_path' of the frame) and the sourcemap url
         # which is discovered by looking at the minified source.
         self.minified_source_url_to_sourcemap_url = {}
@@ -977,7 +975,7 @@ def process_frame(self, processable_frame, processing_task):
         # (or 'None' if the token doesnt provide us with the 'context_line') down the road.
         # TODO: add support for debug ids.
         sourceview = self.get_or_fetch_sourceview(url=frame["abs_path"])
-        sourcemap_view = self.get_or_fetch_sourcemap_view(url=frame["abs_path"])
+        sourcemap_cache = self.get_or_fetch_sourcemap_cache(url=frame["abs_path"])
 
         # We check for errors once both the minified file and the corresponding sourcemaps are loaded. In case errors
         # happen in one of the two we will detect them and add them to 'all_errors'.
@@ -995,11 +993,11 @@ def process_frame(self, processable_frame, processing_task):
         new_frame = dict(frame)
         raw_frame = dict(frame)
 
-        if sourcemap_view and frame.get("colno") is None:
+        if sourcemap_cache and frame.get("colno") is None:
             all_errors.append(
                 {"type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"])}
             )
-        elif sourcemap_view:
+        elif sourcemap_cache:
             if is_data_uri(sourcemap_url):
                 sourcemap_label = frame["abs_path"]
             else:
@@ -1010,7 +1008,7 @@ def process_frame(self, processable_frame, processing_task):
             try:
                 # Errors are 1-indexed in the frames.
                 assert frame["lineno"] > 0, "line numbers are 1-indexed"
-                token = sourcemap_view.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
+                token = sourcemap_cache.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
             except Exception:
                 token = None
                 all_errors.append(
@@ -1240,16 +1238,13 @@ def get_or_fetch_sourceview(self, url=None, debug_id=None, source_file_type=None
 
     def _get_cached_sourceview(self, url=None, debug_id=None, source_file_type=None):
         if debug_id is not None:
-            source_result = self.fetch_by_debug_id_result.get((debug_id, source_file_type))
-            if source_result is not None:
-                sourceview = SourceView.from_bytes(source_result.body)
+            sourceview = self.fetch_by_debug_id_sourceviews.get((debug_id, source_file_type))
+            if sourceview is not None:
                 return sourceview
 
         if url is not None:
-            real_url = self.url_aliases.get(url, url)
-            source_result = self.fetch_by_url_result.get(real_url)
-            if source_result is not None:
-                sourceview = SourceView.from_bytes(source_result.body)
+            sourceview = self.fetch_by_url_sourceviews.get(url)
+            if sourceview is not None:
                 return sourceview
 
         return None
@@ -1272,10 +1267,8 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
                 span.set_data("debug_id", debug_id)
                 result = self.fetcher.fetch_by_debug_id(debug_id, source_file_type)
                 if result is not None:
-                    # We temporarily store the result directly, in case we want to compute the sourceview on demand
-                    # we can delete the other dictionary.
-                    self.fetch_by_debug_id_result[debug_id, source_file_type] = result
                     sourceview = SourceView.from_bytes(result.body)
+                    self.fetch_by_debug_id_sourceviews[debug_id, source_file_type] = sourceview
                     return sourceview
 
         try:
@@ -1292,15 +1285,9 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
             # a valid file to cache
             return None
         else:
-            # TODO: aliasing is not used in the codebase as of now, we might want to remove it because we always call
-            #  this function with url == frame.abs_path.
-            if result.url != url:
-                self.url_aliases[result.url] = url
-
-            # We temporarily store the result directly, in case we want to compute the sourceview on demand
-            # we can delete the other dictionary.
-            self.fetch_by_url_result[url] = result
             sourceview = SourceView.from_bytes(result.body)
+            self.fetch_by_url_sourceviews[url] = sourceview
+            self.result_map[url] = result.body
 
             sourcemap_url = discover_sourcemap(result)
             if sourcemap_url:
@@ -1308,34 +1295,34 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
 
             return sourceview
 
-    def get_or_fetch_sourcemap_view(self, url=None, debug_id=None):
+    def get_or_fetch_sourcemap_cache(self, url=None, debug_id=None):
         """
-        Gets from the cache or fetches the sourcemap view of the file at 'url' or 'debug_id'.
+        Gets from the cache or fetches the SmCache of the file at 'url' or 'debug_id'.
 
         Returns None when a sourcemap corresponding to the file at 'url' is not found.
         """
-        rv = self._get_cached_sourcemap_view(url, debug_id)
+        rv = self._get_cached_sourcemap_cache(url, debug_id)
         if rv is not None:
             return rv
 
-        return self._fetch_and_cache_sourcemap_view(url, debug_id)
+        return self._fetch_and_cache_sourcemap_cache(url, debug_id)
 
-    def _get_cached_sourcemap_view(self, url, debug_id):
+    def _get_cached_sourcemap_cache(self, url, debug_id):
         if debug_id is not None:
             rv = self.debug_id_sourcemap_cache.get(debug_id)
             if rv is not None:
                 return rv
 
         if url is not None:
-            real_url = self.url_aliases.get(url, url)
-            sourcemap_url = self.minified_source_url_to_sourcemap_url.get(real_url)
+            # We get the url of the sourcemap from the url of the minified file.
+            sourcemap_url = self.minified_source_url_to_sourcemap_url.get(url)
             rv = self.sourcemap_url_sourcemap_cache.get(sourcemap_url)
             if rv is not None:
                 return rv
 
         return None
 
-    def _fetch_and_cache_sourcemap_view(self, url, debug_id):
+    def _fetch_and_cache_sourcemap_cache(self, url, debug_id):
         if debug_id is not None:
             sourcemap_view = self._handle_debug_id_sourcemap_lookup(debug_id)
             if sourcemap_view is not None:
@@ -1349,20 +1336,27 @@ def _fetch_and_cache_sourcemap_view(self, url, debug_id):
         return None
 
     def _handle_debug_id_sourcemap_lookup(self, debug_id):
-        minified_result = self.fetch_by_debug_id_result.get(debug_id)
-        if minified_result is None:
+        minified_sourceview = self.fetch_by_debug_id_sourceviews.get(debug_id)
+        if minified_sourceview is None:
             return None
 
         # TODO: use enum for source file type when available.
         result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
         if result is not None:
-            sourcemap_view = SmCache.from_bytes(minified_result.body, result.body)
+            sourcemap_view = SmCache.from_bytes(
+                minified_sourceview.get_source().encode(), result.body
+            )
             self.debug_id_sourcemap_cache[debug_id] = sourcemap_view
             return sourcemap_view
 
     def _handle_url_sourcemap_lookup(self, url):
-        minified_result = self.fetch_by_url_result.get(url)
-        if minified_result is None:
+        # In case there are any fetching errors tied to the url of the minified file, we don't want to attempt the
+        # construction of the sourcemap cache.
+        if len(self.fetch_by_url_errors.get(url, [])) > 0:
+            return None
+
+        minified_sourceview = self.fetch_by_url_sourceviews.get(url)
+        if minified_sourceview is None:
             return None
 
         sourcemap_url = self.minified_source_url_to_sourcemap_url.get(url)
@@ -1375,8 +1369,8 @@ def _handle_url_sourcemap_lookup(self, url):
             ) as span:
                 span.set_data("url", url)
                 span.set_data("sourcemap_url", sourcemap_url)
-                sourcemap_view = self._fetch_sourcemap_view_by_url(
-                    sourcemap_url, source=minified_result.body
+                sourcemap_cache = self._fetch_sourcemap_cache_by_url(
+                    sourcemap_url, source=minified_sourceview.get_source().encode()
                 )
         except http.BadSource as exc:
             # we don't perform the same check here as above, because if someone has
@@ -1388,10 +1382,10 @@ def _handle_url_sourcemap_lookup(self, url):
             self.fetch_by_url_errors.setdefault(url, []).append(exc.data)
             return None
         else:
-            self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_view
-            return sourcemap_view
+            self.sourcemap_url_sourcemap_cache[sourcemap_url] = sourcemap_cache
+            return sourcemap_cache
 
-    def _fetch_sourcemap_view_by_url(self, url, source=b""):
+    def _fetch_sourcemap_cache_by_url(self, url, source=b""):
         if is_data_uri(url):
             try:
                 body = base64.b64decode(
@@ -1409,12 +1403,13 @@ def _fetch_sourcemap_view_by_url(self, url, source=b""):
                 result = self.fetcher.fetch_by_url(url)
 
             body = result.body
+
         try:
             with sentry_sdk.start_span(
                 op="JavaScriptStacktraceProcessor.fetch_sourcemap_view_by_url.SmCache.from_bytes"
             ):
+                # This is an expensive operation that should be executed as few times as possible.
                 return SmCache.from_bytes(source, body)
-
         except Exception as exc:
             # This is in debug because the product shows an error already.
             logger.debug(str(exc), exc_info=True)
diff --git a/tests/relay_integration/lang/javascript/test_plugin.py b/tests/relay_integration/lang/javascript/test_plugin.py
index e2769402fbbb2a..3e8ad4cb6fbc65 100644
--- a/tests/relay_integration/lang/javascript/test_plugin.py
+++ b/tests/relay_integration/lang/javascript/test_plugin.py
@@ -238,7 +238,7 @@ def test_invalid_base64_sourcemap_returns_an_error(
                                     "filename": "test.js",
                                     "lineno": 1,
                                     "colno": 1,
-                                }
+                                },
                             ]
                         },
                     }
@@ -264,6 +264,60 @@ def test_invalid_base64_sourcemap_returns_an_error(
             "type": "js_invalid_source",
         }
 
+    @patch("sentry.lang.javascript.processor.SmCache.from_bytes")
+    @patch("sentry.lang.javascript.processor.Fetcher.fetch_by_url")
+    @patch("sentry.lang.javascript.processor.discover_sourcemap")
+    def test_sourcemap_cache_is_constructed_only_once_if_an_error_is_raised(
+        self, mock_discover_sourcemap, mock_fetch_by_url, mock_from_bytes
+    ):
+        data = {
+            "timestamp": self.min_ago,
+            "message": "hello",
+            "platform": "javascript",
+            "exception": {
+                "values": [
+                    {
+                        "type": "Error",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "abs_path": "http://example.com/test.min.js",
+                                    "filename": "test.js",
+                                    "lineno": 1,
+                                    "colno": 1,
+                                },
+                                {
+                                    "abs_path": "http://example.com/test.min.js",
+                                    "filename": "test.js",
+                                    "lineno": 1,
+                                    "colno": 1,
+                                },
+                                {
+                                    "abs_path": "http://example.com/test.min.js",
+                                    "filename": "test.js",
+                                    "lineno": 1,
+                                    "colno": 1,
+                                },
+                            ]
+                        },
+                    }
+                ]
+            },
+        }
+
+        mock_discover_sourcemap.return_value = BASE64_SOURCEMAP
+
+        mock_fetch_by_url.return_value.url = "http://example.com/test.min.js"
+        mock_fetch_by_url.return_value.body = force_bytes("\n".join("<generated source>"))
+        mock_fetch_by_url.return_value.encoding = None
+
+        mock_from_bytes.side_effect = Exception()
+
+        self.post_and_retrieve_event(data)
+
+        mock_fetch_by_url.assert_called_once_with("http://example.com/test.min.js")
+        mock_from_bytes.assert_called_once()
+
     @responses.activate
     def test_error_message_translations(self):
         data = {
diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
index fcda647b89e30a..bb24c87d528381 100644
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -1160,7 +1160,7 @@ def test_simple_base64(self):
         processor = JavaScriptStacktraceProcessor(
             data={}, stacktrace_infos=None, project=self.create_project()
         )
-        smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap)
+        smap_view = processor._fetch_sourcemap_cache_by_url(base64_sourcemap)
         token = smap_view.lookup(1, 1, 0)
 
         assert token.src == "/test.js"
@@ -1172,7 +1172,7 @@ def test_base64_without_padding(self):
         processor = JavaScriptStacktraceProcessor(
             data={}, stacktrace_infos=None, project=self.create_project()
         )
-        smap_view = processor._fetch_sourcemap_view_by_url(base64_sourcemap.rstrip("="))
+        smap_view = processor._fetch_sourcemap_cache_by_url(base64_sourcemap.rstrip("="))
         token = smap_view.lookup(1, 1, 0)
 
         assert token.src == "/test.js"
@@ -1185,7 +1185,7 @@ def test_broken_base64(self):
             processor = JavaScriptStacktraceProcessor(
                 data={}, stacktrace_infos=None, project=self.create_project()
             )
-            processor._fetch_sourcemap_view_by_url("data:application/json;base64,xxx")
+            processor._fetch_sourcemap_cache_by_url("data:application/json;base64,xxx")
 
     @responses.activate
     def test_garbage_json(self):
@@ -1197,7 +1197,7 @@ def test_garbage_json(self):
             processor = JavaScriptStacktraceProcessor(
                 data={}, stacktrace_infos=None, project=self.create_project()
             )
-            processor._fetch_sourcemap_view_by_url("http://example.com")
+            processor._fetch_sourcemap_cache_by_url("http://example.com")
 
 
 class TrimLineTest(unittest.TestCase):
@@ -1550,7 +1550,7 @@ def test_node_modules_file_with_source_but_no_map_records_error(self, mock_disco
         assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
         processor.get_or_fetch_sourceview(url=abs_path)
-        processor.get_or_fetch_sourcemap_view(url=abs_path)
+        processor.get_or_fetch_sourcemap_cache(url=abs_path)
 
         # now we have an error
         assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 1

From f9fa5e05ab87daad47e586cefca06bf57164b705 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 27 Feb 2023 16:04:14 +0100
Subject: [PATCH 24/28] Improve

---
 src/sentry/lang/javascript/processor.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 26b9d78c07ac97..cb0ab8117df342 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -886,7 +886,6 @@ def __init__(self, *args, **kwargs):
         # Cache holding the results of the fetching by url.
         self.fetch_by_url_sourceviews = {}
         self.fetch_by_url_errors = {}
-        self.result_map = {}
 
         # Cache holding the results of the fetching by debug id.
         self.fetch_by_debug_id_sourceviews = {}
@@ -1287,7 +1286,6 @@ def _fetch_and_cache_sourceview(self, url, debug_id, source_file_type):
         else:
             sourceview = SourceView.from_bytes(result.body)
             self.fetch_by_url_sourceviews[url] = sourceview
-            self.result_map[url] = result.body
 
             sourcemap_url = discover_sourcemap(result)
             if sourcemap_url:

From a2300490d377aff9dc55d70933484facd15f2845 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 27 Feb 2023 16:23:32 +0100
Subject: [PATCH 25/28] noop


From 2499cad97021ff82a51fae2b659ff03b101656de Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Mon, 27 Feb 2023 16:25:14 +0100
Subject: [PATCH 26/28] Fix test

---
 tests/sentry/lang/javascript/test_processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
index bb24c87d528381..d0bf1700153ebd 100644
--- a/tests/sentry/lang/javascript/test_processor.py
+++ b/tests/sentry/lang/javascript/test_processor.py
@@ -1518,7 +1518,7 @@ def test_node_modules_file_with_source_is_used(self):
         # We found the source view.
         assert processor.get_or_fetch_sourceview(url=abs_path)
         # Source view exists in cache.
-        assert processor.fetch_by_url_result.get(abs_path)
+        assert processor.fetch_by_url_sourceviews.get(abs_path)
         assert len(processor.fetch_by_url_errors.get(abs_path, [])) == 0
 
     @patch("sentry.lang.javascript.processor.discover_sourcemap")

From ffbf97b41a3a1d0599daa21a540524007258907b Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Tue, 28 Feb 2023 13:50:28 +0100
Subject: [PATCH 27/28] Fix typing

---
 src/sentry/lang/javascript/processor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index cb0ab8117df342..9d3d76b400a3dd 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -1342,7 +1342,7 @@ def _handle_debug_id_sourcemap_lookup(self, debug_id):
         result = self.fetcher.fetch_by_debug_id(debug_id, "sourcemap")
         if result is not None:
             sourcemap_view = SmCache.from_bytes(
-                minified_sourceview.get_source().encode(), result.body
+                minified_sourceview.get_source().encode("utf-8"), result.body
             )
             self.debug_id_sourcemap_cache[debug_id] = sourcemap_view
             return sourcemap_view
@@ -1350,7 +1350,7 @@ def _handle_debug_id_sourcemap_lookup(self, debug_id):
     def _handle_url_sourcemap_lookup(self, url):
         # In case there are any fetching errors tied to the url of the minified file, we don't want to attempt the
         # construction of the sourcemap cache.
-        if len(self.fetch_by_url_errors.get(url, [])) > 0:
+        if url in self.fetch_by_url_errors:
             return None
 
         minified_sourceview = self.fetch_by_url_sourceviews.get(url)

From bff6baf154e57344c8a0a3fd2fe656a46c7765c4 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@sentry.io>
Date: Tue, 28 Feb 2023 15:11:20 +0100
Subject: [PATCH 28/28] Fix wrong condition

---
 src/sentry/lang/javascript/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 9d3d76b400a3dd..1ee577821f0023 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -1226,7 +1226,7 @@ def get_or_fetch_sourceview(self, url=None, debug_id=None, source_file_type=None
         """
         # We require that artifact bundles with debug ids used embedded source in the source map.
         # TODO: use enum for source file type when available.
-        if debug_id is not None and source_file_type != "source":
+        if debug_id is not None and source_file_type == "source":
             return None
 
         rv = self._get_cached_sourceview(url, debug_id, source_file_type)