From f8c5a6ff6db5f69b47e376314249c3397c95b545 Mon Sep 17 00:00:00 2001 From: Alden Quimby Date: Thu, 16 Feb 2023 10:58:42 -0500 Subject: [PATCH 1/6] feat(pii) Sanitize URLs in Span descriptions and breadcrumbs --- packages/node/src/integrations/http.ts | 30 +++++++++++++++--- packages/node/src/integrations/utils/http.ts | 3 +- packages/node/test/integrations/http.test.ts | 33 ++++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/packages/node/src/integrations/http.ts b/packages/node/src/integrations/http.ts index ea7a9eae173d..8eadada93009 100644 --- a/packages/node/src/integrations/http.ts +++ b/packages/node/src/integrations/http.ts @@ -192,13 +192,28 @@ function _createWrappedRequestMethodFactory( const scope = getCurrentHub().getScope(); + const spanData: Record = { + url: requestUrl, + method: requestOptions.method || 'GET', + }; + if (requestOptions.hash) { + // strip leading "#" + spanData['http.fragment'] = requestOptions.hash.substring(1); + } + if (requestOptions.search) { + // strip leading "?" + spanData['http.query'] = requestOptions.search.substring(1); + } + + // TODO potential breaking change... shouldCreateSpanForRequest no longer has access to query + fragment, is that a problem? if (scope && tracingOptions && shouldCreateSpan(requestUrl)) { parentSpan = scope.getSpan(); if (parentSpan) { requestSpan = parentSpan.startChild({ - description: `${requestOptions.method || 'GET'} ${requestUrl}`, + description: `${spanData.method} ${spanData.url}`, op: 'http.client', + data: spanData, }); if (shouldAttachTraceData(requestUrl)) { @@ -253,7 +268,7 @@ function _createWrappedRequestMethodFactory( // eslint-disable-next-line @typescript-eslint/no-this-alias const req = this; if (breadcrumbsEnabled) { - addRequestBreadcrumb('response', requestUrl, req, res); + addRequestBreadcrumb('response', spanData, req, res); } if (requestSpan) { if (res.statusCode) { @@ -268,7 +283,7 @@ function _createWrappedRequestMethodFactory( const req = this; if (breadcrumbsEnabled) { - addRequestBreadcrumb('error', requestUrl, req); + addRequestBreadcrumb('error', spanData, req); } if (requestSpan) { requestSpan.setHttpStatus(500); @@ -283,7 +298,12 @@ function _createWrappedRequestMethodFactory( /** * Captures Breadcrumb based on provided request/response pair */ -function addRequestBreadcrumb(event: string, url: string, req: http.ClientRequest, res?: http.IncomingMessage): void { +function addRequestBreadcrumb( + event: string, + spanData: Record, + req: http.ClientRequest, + res?: http.IncomingMessage, +): void { if (!getCurrentHub().getIntegration(Http)) { return; } @@ -294,7 +314,7 @@ function addRequestBreadcrumb(event: string, url: string, req: http.ClientReques data: { method: req.method, status_code: res && res.statusCode, - url, + ...spanData, }, type: 'http', }, diff --git a/packages/node/src/integrations/utils/http.ts b/packages/node/src/integrations/utils/http.ts index 1281f9d6329d..1bbe23a4455a 100644 --- a/packages/node/src/integrations/utils/http.ts +++ b/packages/node/src/integrations/utils/http.ts @@ -27,7 +27,8 @@ export function extractUrl(requestOptions: RequestOptions): string { // Don't log standard :80 (http) and :443 (https) ports to reduce the noise const port = !requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`; - const path = requestOptions.path ? requestOptions.path : '/'; + // do not include search or hash in span descriptions, per https://github.com/getsentry/develop/pull/773 + const path = requestOptions.pathname || '/'; return `${protocol}//${hostname}${port}${path}`; } diff --git a/packages/node/test/integrations/http.test.ts b/packages/node/test/integrations/http.test.ts index 446aa4ec0f82..6f85dadc5767 100644 --- a/packages/node/test/integrations/http.test.ts +++ b/packages/node/test/integrations/http.test.ts @@ -197,6 +197,39 @@ describe('tracing', () => { expect(loggerLogSpy).toBeCalledWith('HTTP Integration is skipped because of instrumenter configuration.'); }); + it('omits query and fragment from description and adds to span data instead', () => { + nock('http://dogs.are.great').get('/spaniel?tail=wag&cute=true#learn-more').reply(200); + + const transaction = createTransactionOnScope(); + const spans = (transaction as unknown as Span).spanRecorder?.spans as Span[]; + + http.get('http://dogs.are.great/spaniel?tail=wag&cute=true#learn-more'); + + expect(spans.length).toEqual(2); + + // our span is at index 1 because the transaction itself is at index 0 + expect(spans[1].description).toEqual('GET http://dogs.are.great/spaniel'); + expect(spans[1].op).toEqual('http.client'); + expect(spans[1].data.method).toEqual('GET'); + expect(spans[1].data.url).toEqual('http://dogs.are.great/spaniel'); + expect(spans[1].data['http.query']).toEqual('tail=wag&cute=true'); + expect(spans[1].data['http.fragment']).toEqual('learn-more'); + }); + + it('omits username and password entirely from span', () => { + nock('http://username:password@dogs.are.great').get('/').reply(200); + + const transaction = createTransactionOnScope(); + const spans = (transaction as unknown as Span).spanRecorder?.spans as Span[]; + + http.get('http://username:password@dogs.are.great/'); + + expect(spans.length).toEqual(2); + + // our span is at index 1 because the transaction itself is at index 0 + expect(spans[1].description).toEqual('GET http://dogs.are.great/'); + }); + describe('Tracing options', () => { beforeEach(() => { // hacky way of restoring monkey patched functions From e57ff257f0c20c13d7b05a31d44aed49c4236777 Mon Sep 17 00:00:00 2001 From: Alden Quimby Date: Fri, 17 Feb 2023 07:20:13 -0500 Subject: [PATCH 2/6] Reviewer suggestion Co-authored-by: Michi Hoffmann --- packages/node/test/integrations/http.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/node/test/integrations/http.test.ts b/packages/node/test/integrations/http.test.ts index 6f85dadc5767..38116e9988be 100644 --- a/packages/node/test/integrations/http.test.ts +++ b/packages/node/test/integrations/http.test.ts @@ -216,7 +216,7 @@ describe('tracing', () => { expect(spans[1].data['http.fragment']).toEqual('learn-more'); }); - it('omits username and password entirely from span', () => { + it('omits the authority (username and password) entirely from span', () => { nock('http://username:password@dogs.are.great').get('/').reply(200); const transaction = createTransactionOnScope(); From ad7285575fe412c46e2c9983457600190dc9c1d5 Mon Sep 17 00:00:00 2001 From: Alden Quimby Date: Fri, 17 Feb 2023 07:20:50 -0500 Subject: [PATCH 3/6] Refer to official docs, not a PR Co-authored-by: Michi Hoffmann --- packages/node/src/integrations/utils/http.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/node/src/integrations/utils/http.ts b/packages/node/src/integrations/utils/http.ts index 1bbe23a4455a..2b630c89c30a 100644 --- a/packages/node/src/integrations/utils/http.ts +++ b/packages/node/src/integrations/utils/http.ts @@ -27,7 +27,7 @@ export function extractUrl(requestOptions: RequestOptions): string { // Don't log standard :80 (http) and :443 (https) ports to reduce the noise const port = !requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`; - // do not include search or hash in span descriptions, per https://github.com/getsentry/develop/pull/773 + // do not include search or hash in span descriptions, per https://develop.sentry.dev/sdk/data-handling/#structuring-data const path = requestOptions.pathname || '/'; return `${protocol}//${hostname}${port}${path}`; From a1126b4b50a9f058058cb27cc9a72ef564c65712 Mon Sep 17 00:00:00 2001 From: Alden Quimby Date: Fri, 17 Feb 2023 07:36:19 -0500 Subject: [PATCH 4/6] Filter authority --- packages/node/src/integrations/utils/http.ts | 6 ++++-- packages/node/test/integrations/http.test.ts | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/node/src/integrations/utils/http.ts b/packages/node/src/integrations/utils/http.ts index 2b630c89c30a..f708a2b0eb72 100644 --- a/packages/node/src/integrations/utils/http.ts +++ b/packages/node/src/integrations/utils/http.ts @@ -29,8 +29,9 @@ export function extractUrl(requestOptions: RequestOptions): string { !requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`; // do not include search or hash in span descriptions, per https://develop.sentry.dev/sdk/data-handling/#structuring-data const path = requestOptions.pathname || '/'; + const authority = requestOptions.auth ? `${requestOptions.auth}@` : ''; - return `${protocol}//${hostname}${port}${path}`; + return `${protocol}//${authority}${hostname}${port}${path}`; } /** @@ -102,7 +103,8 @@ export function urlToOptions(url: URL): RequestOptions { options.port = Number(url.port); } if (url.username || url.password) { - options.auth = `${url.username}:${url.password}`; + // always filter authority, see https://develop.sentry.dev/sdk/data-handling/#structuring-data + options.auth = '[Filtered]:[Filtered]'; } return options; } diff --git a/packages/node/test/integrations/http.test.ts b/packages/node/test/integrations/http.test.ts index 38116e9988be..db159b7212c6 100644 --- a/packages/node/test/integrations/http.test.ts +++ b/packages/node/test/integrations/http.test.ts @@ -216,7 +216,7 @@ describe('tracing', () => { expect(spans[1].data['http.fragment']).toEqual('learn-more'); }); - it('omits the authority (username and password) entirely from span', () => { + it('filters the authority (username and password) in span description', () => { nock('http://username:password@dogs.are.great').get('/').reply(200); const transaction = createTransactionOnScope(); @@ -227,7 +227,7 @@ describe('tracing', () => { expect(spans.length).toEqual(2); // our span is at index 1 because the transaction itself is at index 0 - expect(spans[1].description).toEqual('GET http://dogs.are.great/'); + expect(spans[1].description).toEqual('GET http://[Filtered]:[Filtered]@dogs.are.great/'); }); describe('Tracing options', () => { From 98c3a5891f152181bb3859a9274f549f87e4e2c8 Mon Sep 17 00:00:00 2001 From: Alden Quimby Date: Wed, 15 Mar 2023 07:31:02 -0400 Subject: [PATCH 5/6] Updates from code review --- packages/node/src/integrations/http.ts | 37 +++++++++++++------- packages/node/src/integrations/utils/http.ts | 13 +++++++ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/packages/node/src/integrations/http.ts b/packages/node/src/integrations/http.ts index 8eadada93009..aaa6581add9d 100644 --- a/packages/node/src/integrations/http.ts +++ b/packages/node/src/integrations/http.ts @@ -14,7 +14,7 @@ import { LRUMap } from 'lru_map'; import type { NodeClient } from '../client'; import type { RequestMethod, RequestMethodArgs } from './utils/http'; -import { cleanSpanDescription, extractUrl, isSentryRequest, normalizeRequestArgs } from './utils/http'; +import { cleanSpanDescription, extractRawUrl, extractUrl, isSentryRequest, normalizeRequestArgs } from './utils/http'; const NODE_VERSION = parseSemver(process.versions.node); @@ -129,6 +129,16 @@ type OriginalRequestMethod = RequestMethod; type WrappedRequestMethod = RequestMethod; type WrappedRequestMethodFactory = (original: OriginalRequestMethod) => WrappedRequestMethod; +/** + * See https://develop.sentry.dev/sdk/data-handling/#structuring-data + */ +type RequestSpanData = { + url: string; + method: string; + 'http.fragment'?: string; + 'http.query'?: string; +}; + /** * Function which creates a function which creates wrapped versions of internal `request` and `get` calls within `http` * and `https` modules. (NB: Not a typo - this is a creator^2!) @@ -180,6 +190,8 @@ function _createWrappedRequestMethodFactory( return function wrappedMethod(this: unknown, ...args: RequestMethodArgs): http.ClientRequest { const requestArgs = normalizeRequestArgs(httpModule, args); const requestOptions = requestArgs[0]; + // eslint-disable-next-line deprecation/deprecation + const rawRequestUrl = extractRawUrl(requestOptions); const requestUrl = extractUrl(requestOptions); // we don't want to record requests to Sentry as either breadcrumbs or spans, so just use the original method @@ -192,31 +204,30 @@ function _createWrappedRequestMethodFactory( const scope = getCurrentHub().getScope(); - const spanData: Record = { + const requestSpanData: RequestSpanData = { url: requestUrl, method: requestOptions.method || 'GET', }; if (requestOptions.hash) { // strip leading "#" - spanData['http.fragment'] = requestOptions.hash.substring(1); + requestSpanData['http.fragment'] = requestOptions.hash.substring(1); } if (requestOptions.search) { // strip leading "?" - spanData['http.query'] = requestOptions.search.substring(1); + requestSpanData['http.query'] = requestOptions.search.substring(1); } - // TODO potential breaking change... shouldCreateSpanForRequest no longer has access to query + fragment, is that a problem? - if (scope && tracingOptions && shouldCreateSpan(requestUrl)) { + if (scope && tracingOptions && shouldCreateSpan(rawRequestUrl)) { parentSpan = scope.getSpan(); if (parentSpan) { requestSpan = parentSpan.startChild({ - description: `${spanData.method} ${spanData.url}`, + description: `${requestSpanData.method} ${requestSpanData.url}`, op: 'http.client', - data: spanData, + data: requestSpanData, }); - if (shouldAttachTraceData(requestUrl)) { + if (shouldAttachTraceData(rawRequestUrl)) { const sentryTraceHeader = requestSpan.toTraceparent(); __DEBUG_BUILD__ && logger.log( @@ -268,7 +279,7 @@ function _createWrappedRequestMethodFactory( // eslint-disable-next-line @typescript-eslint/no-this-alias const req = this; if (breadcrumbsEnabled) { - addRequestBreadcrumb('response', spanData, req, res); + addRequestBreadcrumb('response', requestSpanData, req, res); } if (requestSpan) { if (res.statusCode) { @@ -283,7 +294,7 @@ function _createWrappedRequestMethodFactory( const req = this; if (breadcrumbsEnabled) { - addRequestBreadcrumb('error', spanData, req); + addRequestBreadcrumb('error', requestSpanData, req); } if (requestSpan) { requestSpan.setHttpStatus(500); @@ -300,7 +311,7 @@ function _createWrappedRequestMethodFactory( */ function addRequestBreadcrumb( event: string, - spanData: Record, + requestSpanData: RequestSpanData, req: http.ClientRequest, res?: http.IncomingMessage, ): void { @@ -314,7 +325,7 @@ function addRequestBreadcrumb( data: { method: req.method, status_code: res && res.statusCode, - ...spanData, + ...requestSpanData, }, type: 'http', }, diff --git a/packages/node/src/integrations/utils/http.ts b/packages/node/src/integrations/utils/http.ts index f708a2b0eb72..9acf1fb0a36b 100644 --- a/packages/node/src/integrations/utils/http.ts +++ b/packages/node/src/integrations/utils/http.ts @@ -15,6 +15,19 @@ export function isSentryRequest(url: string): boolean { return dsn ? url.includes(dsn.host) : false; } +/** + * @deprecated Please use extractUrl instead. Only exists for backwards compatability as we sanitize spans and breadcrumbs. + */ +export function extractRawUrl(requestOptions: RequestOptions): string { + const protocol = requestOptions.protocol || ''; + const hostname = requestOptions.hostname || requestOptions.host || ''; + // Don't log standard :80 (http) and :443 (https) ports to reduce the noise + const port = + !requestOptions.port || requestOptions.port === 80 || requestOptions.port === 443 ? '' : `:${requestOptions.port}`; + const path = requestOptions.path ? requestOptions.path : '/'; + return `${protocol}//${hostname}${port}${path}`; +} + /** * Assemble a URL to be used for breadcrumbs and spans. * From 63a7decd93e39c6006249cdbda74edc018938b78 Mon Sep 17 00:00:00 2001 From: Lukas Stracke Date: Wed, 29 Mar 2023 17:15:16 +0200 Subject: [PATCH 6/6] Update packages/node/src/integrations/utils/http.ts --- packages/node/src/integrations/utils/http.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/node/src/integrations/utils/http.ts b/packages/node/src/integrations/utils/http.ts index 9acf1fb0a36b..ec1d18606f8c 100644 --- a/packages/node/src/integrations/utils/http.ts +++ b/packages/node/src/integrations/utils/http.ts @@ -15,9 +15,6 @@ export function isSentryRequest(url: string): boolean { return dsn ? url.includes(dsn.host) : false; } -/** - * @deprecated Please use extractUrl instead. Only exists for backwards compatability as we sanitize spans and breadcrumbs. - */ export function extractRawUrl(requestOptions: RequestOptions): string { const protocol = requestOptions.protocol || ''; const hostname = requestOptions.hostname || requestOptions.host || '';