Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
74e9c08
Strip sensitive data from URLs. refs 1742
antonpirker Nov 15, 2022
71d5e4a
Better function name
antonpirker Nov 15, 2022
743c3d1
Check send_default_pii before sanitizing url.
antonpirker Nov 15, 2022
0560891
Ignore typing on named tuples
antonpirker Nov 15, 2022
5d25063
Make it run in Python 2
antonpirker Nov 15, 2022
a16b5ab
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Jan 30, 2023
63df676
Split url into url, query and fragment
antonpirker Jan 30, 2023
4db535f
Some type fixes
antonpirker Jan 30, 2023
ce56e93
Preventing circular import
antonpirker Jan 30, 2023
e418033
Fixed some tests
antonpirker Jan 30, 2023
5bbd781
Make url a string to fix tests
antonpirker Jan 30, 2023
fcbd8d7
Fixing httpx tests again
antonpirker Jan 30, 2023
2bd870c
Fixing tests
antonpirker Jan 30, 2023
72a4675
Fix tests for old Python versions
antonpirker Jan 30, 2023
e8e05e9
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Jan 30, 2023
1639cc4
Fix tests with fragments in old Python versions
antonpirker Jan 30, 2023
11b9bf2
Merge branch 'antonpirker/1742-remove-sensitive-data-from-urls' of gi…
antonpirker Jan 30, 2023
c67be60
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Jan 30, 2023
90eb4db
Fixed utf8 chars in Python 2.7
antonpirker Jan 30, 2023
9215f45
Cleanup
antonpirker Jan 30, 2023
68dda23
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Jan 31, 2023
8a17864
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Jan 31, 2023
72a9305
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Feb 6, 2023
5c074d1
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Feb 15, 2023
1482ac6
Moved import outside of function
antonpirker Feb 15, 2023
6a82959
Revert "Moved import outside of function"
antonpirker Feb 15, 2023
773ed80
Always remove authority, but for now to not filter query values
antonpirker Feb 15, 2023
51ab32d
Moved import to the bottom of file to prevent circular import
antonpirker Feb 15, 2023
4eaafc0
Revert "Moved import to the bottom of file to prevent circular import"
antonpirker Feb 15, 2023
3022143
Moved SENSITIVE_DATA_SUBSTITUTE to utils.py to prevent circular imports
antonpirker Feb 15, 2023
75bea04
Merge branch 'master' into antonpirker/1742-remove-sensitive-data-fro…
antonpirker Feb 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@
DEFAULT_QUEUE_SIZE = 100
DEFAULT_MAX_BREADCRUMBS = 100

SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"


class INSTRUMENTER:
SENTRY = "sentry"
Expand Down
8 changes: 7 additions & 1 deletion sentry_sdk/integrations/boto3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from sentry_sdk._functools import partial
from sentry_sdk._types import MYPY
from sentry_sdk.utils import parse_url

if MYPY:
from typing import Any
Expand Down Expand Up @@ -66,9 +67,14 @@ def _sentry_request_created(service_id, request, operation_name, **kwargs):
op=OP.HTTP_CLIENT,
description=description,
)

parsed_url = parse_url(request.url, sanitize=False)

span.set_tag("aws.service_id", service_id)
span.set_tag("aws.operation_name", operation_name)
span.set_data("aws.request.url", request.url)
span.set_data("aws.request.url", parsed_url.url)
span.set_data("http.query", parsed_url.query)
span.set_data("http.fragment", parsed_url.fragment)

# We do it in order for subsequent http calls/retries be
# attached to this span.
Expand Down
3 changes: 2 additions & 1 deletion sentry_sdk/integrations/django/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import weakref

from sentry_sdk._types import MYPY
from sentry_sdk.consts import OP, SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.consts import OP
from sentry_sdk.hub import Hub, _should_send_default_pii
from sentry_sdk.scope import add_global_event_processor
from sentry_sdk.serializer import add_global_repr_processor
Expand All @@ -16,6 +16,7 @@
AnnotatedValue,
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
SENSITIVE_DATA_SUBSTITUTE,
logger,
capture_internal_exceptions,
event_from_exception,
Expand Down
24 changes: 19 additions & 5 deletions sentry_sdk/integrations/httpx.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sentry_sdk import Hub
from sentry_sdk.consts import OP
from sentry_sdk.integrations import Integration, DidNotEnable
from sentry_sdk.utils import logger
from sentry_sdk.utils import logger, parse_url

from sentry_sdk._types import MYPY

Expand Down Expand Up @@ -41,11 +41,17 @@ def send(self, request, **kwargs):
if hub.get_integration(HttpxIntegration) is None:
return real_send(self, request, **kwargs)

parsed_url = parse_url(str(request.url), sanitize=False)

with hub.start_span(
op=OP.HTTP_CLIENT, description="%s %s" % (request.method, request.url)
op=OP.HTTP_CLIENT,
description="%s %s" % (request.method, parsed_url.url),
) as span:
span.set_data("method", request.method)
span.set_data("url", str(request.url))
span.set_data("url", parsed_url.url)
span.set_data("http.query", parsed_url.query)
span.set_data("http.fragment", parsed_url.fragment)

for key, value in hub.iter_trace_propagation_headers():
logger.debug(
"[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format(
Expand All @@ -58,6 +64,7 @@ def send(self, request, **kwargs):
span.set_data("status_code", rv.status_code)
span.set_http_status(rv.status_code)
span.set_data("reason", rv.reason_phrase)

return rv

Client.send = send
Expand All @@ -73,11 +80,17 @@ async def send(self, request, **kwargs):
if hub.get_integration(HttpxIntegration) is None:
return await real_send(self, request, **kwargs)

parsed_url = parse_url(str(request.url), sanitize=False)

with hub.start_span(
op=OP.HTTP_CLIENT, description="%s %s" % (request.method, request.url)
op=OP.HTTP_CLIENT,
description="%s %s" % (request.method, parsed_url.url),
) as span:
span.set_data("method", request.method)
span.set_data("url", str(request.url))
span.set_data("url", parsed_url.url)
span.set_data("http.query", parsed_url.query)
span.set_data("http.fragment", parsed_url.fragment)

for key, value in hub.iter_trace_propagation_headers():
logger.debug(
"[Tracing] Adding `{key}` header {value} to outgoing request to {url}.".format(
Expand All @@ -90,6 +103,7 @@ async def send(self, request, **kwargs):
span.set_data("status_code", rv.status_code)
span.set_http_status(rv.status_code)
span.set_data("reason", rv.reason_phrase)

return rv

AsyncClient.send = send
8 changes: 6 additions & 2 deletions sentry_sdk/integrations/huey.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
from sentry_sdk._compat import reraise
from sentry_sdk._types import MYPY
from sentry_sdk import Hub
from sentry_sdk.consts import OP, SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.consts import OP
from sentry_sdk.hub import _should_send_default_pii
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.tracing import Transaction, TRANSACTION_SOURCE_TASK
from sentry_sdk.utils import capture_internal_exceptions, event_from_exception
from sentry_sdk.utils import (
capture_internal_exceptions,
event_from_exception,
SENSITIVE_DATA_SUBSTITUTE,
)

if MYPY:
from typing import Any, Callable, Optional, Union, TypeVar
Expand Down
16 changes: 13 additions & 3 deletions sentry_sdk/integrations/stdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from sentry_sdk.integrations import Integration
from sentry_sdk.scope import add_global_event_processor
from sentry_sdk.tracing_utils import EnvironHeaders
from sentry_sdk.utils import capture_internal_exceptions, logger, safe_repr
from sentry_sdk.utils import (
capture_internal_exceptions,
logger,
safe_repr,
parse_url,
)

from sentry_sdk._types import MYPY

Expand Down Expand Up @@ -79,12 +84,17 @@ def putrequest(self, method, url, *args, **kwargs):
url,
)

parsed_url = parse_url(real_url, sanitize=False)

span = hub.start_span(
op=OP.HTTP_CLIENT, description="%s %s" % (method, real_url)
op=OP.HTTP_CLIENT,
description="%s %s" % (method, parsed_url.url),
)

span.set_data("method", method)
span.set_data("url", real_url)
span.set_data("url", parsed_url.url)
span.set_data("http.query", parsed_url.query)
span.set_data("http.fragment", parsed_url.fragment)

rv = real_putrequest(self, method, url, *args, **kwargs)

Expand Down
97 changes: 94 additions & 3 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,25 @@
import sys
import threading
import time
from collections import namedtuple

try:
# Python 3
from urllib.parse import parse_qs
from urllib.parse import unquote
from urllib.parse import urlencode
from urllib.parse import urlsplit
from urllib.parse import urlunsplit

except ImportError:
# Python 2
from cgi import parse_qs # type: ignore
from urllib import unquote # type: ignore
from urllib import urlencode # type: ignore
from urlparse import urlsplit # type: ignore
from urlparse import urlunsplit # type: ignore


from datetime import datetime
from functools import partial

Expand Down Expand Up @@ -43,13 +62,14 @@

epoch = datetime(1970, 1, 1)


# The logger is created here but initialized in the debug support module
logger = logging.getLogger("sentry_sdk.errors")

MAX_STRING_LENGTH = 1024
BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$")

SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"


def json_dumps(data):
# type: (Any) -> bytes
Expand Down Expand Up @@ -374,8 +394,6 @@ def removed_because_over_size_limit(cls):
def substituted_because_contains_sensitive_data(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because it contained sensitive information."""
from sentry_sdk.consts import SENSITIVE_DATA_SUBSTITUTE

return AnnotatedValue(
value=SENSITIVE_DATA_SUBSTITUTE,
metadata={
Expand Down Expand Up @@ -1163,6 +1181,79 @@ def from_base64(base64_string):
return utf8_string


Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"])


def sanitize_url(url, remove_authority=True, remove_query_values=True):
# type: (str, bool, bool) -> str
"""
Removes the authority and query parameter values from a given URL.
"""
parsed_url = urlsplit(url)
query_params = parse_qs(parsed_url.query, keep_blank_values=True)

# strip username:password (netloc can be usr:[email protected])
if remove_authority:
netloc_parts = parsed_url.netloc.split("@")
if len(netloc_parts) > 1:
netloc = "%s:%s@%s" % (
SENSITIVE_DATA_SUBSTITUTE,
SENSITIVE_DATA_SUBSTITUTE,
netloc_parts[-1],
)
else:
netloc = parsed_url.netloc
else:
netloc = parsed_url.netloc

# strip values from query string
if remove_query_values:
query_string = unquote(
urlencode({key: SENSITIVE_DATA_SUBSTITUTE for key in query_params})
)
else:
query_string = parsed_url.query

safe_url = urlunsplit(
Components(
scheme=parsed_url.scheme,
netloc=netloc,
query=query_string,
path=parsed_url.path,
fragment=parsed_url.fragment,
)
)

return safe_url


ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"])


def parse_url(url, sanitize=True):

# type: (str, bool) -> ParsedUrl
"""
Splits a URL into a url (including path), query and fragment. If sanitize is True, the query
parameters will be sanitized to remove sensitive data. The autority (username and password)
in the URL will always be removed.
"""
url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize)

parsed_url = urlsplit(url)
base_url = urlunsplit(
Components(
scheme=parsed_url.scheme,
netloc=parsed_url.netloc,
query="",
path=parsed_url.path,
fragment="",
)
)

return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment)


if PY37:

def nanosecond_time():
Expand Down
2 changes: 2 additions & 0 deletions tests/integrations/httpx/test_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def before_breadcrumb(crumb, hint):
assert crumb["data"] == {
"url": url,
"method": "GET",
"http.fragment": "",
"http.query": "",
"status_code": 200,
"reason": "OK",
"extra": "foo",
Expand Down
2 changes: 2 additions & 0 deletions tests/integrations/requests/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def test_crumb_capture(sentry_init, capture_events):
assert crumb["data"] == {
"url": "https://httpbin.org/status/418",
"method": "GET",
"http.fragment": "",
"http.query": "",
"status_code": response.status_code,
"reason": response.reason,
}
Loading