1- type PartialURL = {
2- host ?: string ;
3- path ?: string ;
4- protocol ?: string ;
5- relative ?: string ;
6- search ?: string ;
7- hash ?: string ;
8- } ;
1+ type PartialURL = Partial < {
2+ host : string ;
3+ path : string ;
4+ protocol : string ;
5+ relative : string ;
6+ search : string ;
7+ hash : string ;
8+ urlInstance : URL ;
9+ } > ;
10+
11+ const urlRegex = / ^ ( ( [ ^ : / ? # ] + ) : ) ? ( \/ \/ ( [ ^ / ? # ] * ) ) ? ( [ ^ ? # ] * ) ( \? ( [ ^ # ] * ) ) ? ( # ( .* ) ) ? $ / ;
912
1013/**
1114 * Parses string form of URL into an object
@@ -19,7 +22,31 @@ export function parseUrl(url: string): PartialURL {
1922 return { } ;
2023 }
2124
22- const match = url . match ( / ^ ( ( [ ^ : / ? # ] + ) : ) ? ( \/ \/ ( [ ^ / ? # ] * ) ) ? ( [ ^ ? # ] * ) ( \? ( [ ^ # ] * ) ) ? ( # ( .* ) ) ? $ / ) ;
25+ // Node.js v16 and above supports WHATWG URL API. We can use it when available.
26+ if ( typeof URL !== undefined ) {
27+ try {
28+ const parsed = new URL ( url ) ;
29+ const pathname = parsed . pathname ;
30+
31+ return {
32+ host : parsed . host ,
33+ // WHATWG URL API includes the leading slash in the pathname
34+ // Example: Returns `/` for `https://sentry.io`
35+ path : pathname . length === 1 ? '' : pathname ,
36+ // WHATWG URL API includes the trailing colon in the protocol
37+ // Example: Returns `https:` for `https://sentry.io`
38+ protocol : parsed . protocol . slice ( 0 , - 1 ) ,
39+ search : parsed . search ,
40+ hash : parsed . hash ,
41+ relative : parsed . pathname + parsed . search + parsed . hash ,
42+ urlInstance : parsed ,
43+ } ;
44+ } catch {
45+ // If URL is invalid, fallback to regex parsing to support URLs without protocols.
46+ }
47+ }
48+
49+ const match = url . match ( urlRegex ) ;
2350
2451 if ( ! match ) {
2552 return { } ;
@@ -62,15 +89,53 @@ export function getNumberOfUrlSegments(url: string): number {
6289 * see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
6390 */
6491export function getSanitizedUrlString ( url : PartialURL ) : string {
65- const { protocol, host, path } = url ;
92+ const { protocol, host, path, urlInstance } = url ;
93+
94+ // This means that the environment supports WHATWG URL API.
95+ // This case will not be executed if URL does not have a protocol
96+ // since WHATWG URL specification requires protocol to be present.
97+ if ( urlInstance !== undefined ) {
98+ const { port, username, password, hostname, pathname, protocol } = urlInstance ;
99+ const hasAuthority = username . length > 0 || password . length > 0 ;
100+ let output = `${ protocol } //` ;
101+
102+ if ( hasAuthority ) {
103+ if ( username ) {
104+ output += '[filtered]' ;
105+
106+ if ( password ) {
107+ output += ':' ;
108+ }
109+ }
110+
111+ if ( password ) {
112+ output += '[filtered]' ;
113+ }
114+
115+ output += '@' ;
116+ }
117+
118+ output += hostname ;
119+
120+ if ( port && port !== '80' && port !== '443' ) {
121+ output += `:${ port } ` ;
122+ }
123+
124+ // Do not append pathname if it is empty.
125+ // For example: Pathname is `/` for `https://sentry.io`
126+ if ( pathname . length > 1 ) {
127+ output += pathname ;
128+ }
129+
130+ return output ;
131+ }
66132
67133 const filteredHost =
68134 ( host &&
69135 host
70136 // Always filter out authority
71137 . replace ( / ^ .* @ / , '[filtered]:[filtered]@' )
72138 // Don't show standard :80 (http) and :443 (https) ports to reduce the noise
73- // TODO: Use new URL global if it exists
74139 . replace ( / ( : 8 0 ) $ / , '' )
75140 . replace ( / ( : 4 4 3 ) $ / , '' ) ) ||
76141 '' ;
0 commit comments