diff --git a/packages/browser/test/unit/string.test.ts b/packages/browser/test/unit/string.test.ts new file mode 100644 index 000000000000..56119c19a906 --- /dev/null +++ b/packages/browser/test/unit/string.test.ts @@ -0,0 +1,64 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { base64ToUnicode, unicodeToBase64 } from '@sentry/utils'; +import { expect } from 'chai'; + +// See https://tools.ietf.org/html/rfc4648#section-4 for base64 spec +// eslint-disable-next-line no-useless-escape +const BASE64_REGEX = /([a-zA-Z0-9+/]{4})*(|([a-zA-Z0-9+/]{3}=)|([a-zA-Z0-9+/]{2}==))/; + +// NOTE: These tests are copied (and adapted for chai syntax) from `string.test.ts` in `@sentry/utils`. The +// base64-conversion functions have a different implementation in browser and node, so they're copied here to prove they +// work in a real live browser. If you make changes here, make sure to also port them over to that copy. +describe('base64ToUnicode/unicodeToBase64', () => { + const unicodeString = 'Dogs are great!'; + const base64String = 'RG9ncyBhcmUgZ3JlYXQh'; + + it('converts to valid base64', () => { + expect(BASE64_REGEX.test(unicodeToBase64(unicodeString))).to.be.true; + }); + + it('works as expected', () => { + expect(unicodeToBase64(unicodeString)).to.equal(base64String); + expect(base64ToUnicode(base64String)).to.equal(unicodeString); + }); + + it('conversion functions are inverses', () => { + expect(base64ToUnicode(unicodeToBase64(unicodeString))).to.equal(unicodeString); + expect(unicodeToBase64(base64ToUnicode(base64String))).to.equal(base64String); + }); + + it('can handle and preserve multi-byte characters in original string', () => { + ['🐶', 'Καλό κορίτσι, Μάιζεϊ!', 'Of margir hundar! Ég geri ráð fyrir að ég þurfi stærra rúm.'].forEach(orig => { + expect(() => { + unicodeToBase64(orig); + }).not.to.throw; + expect(base64ToUnicode(unicodeToBase64(orig))).to.equal(orig); + }); + }); + + it('throws an error when given invalid input', () => { + expect(() => { + unicodeToBase64(null as any); + }).to.throw('Unable to convert to base64'); + expect(() => { + unicodeToBase64(undefined as any); + }).to.throw('Unable to convert to base64'); + expect(() => { + unicodeToBase64({} as any); + }).to.throw('Unable to convert to base64'); + + expect(() => { + base64ToUnicode(null as any); + }).to.throw('Unable to convert from base64'); + expect(() => { + base64ToUnicode(undefined as any); + }).to.throw('Unable to convert from base64'); + expect(() => { + base64ToUnicode({} as any); + }).to.throw('Unable to convert from base64'); + + // Note that by design, in node base64 encoding and decoding will accept any string, whether or not it's valid + // base64, by ignoring all invalid characters, including whitespace. Therefore, no wacky strings have been included + // here because they don't actually error. + }); +}); diff --git a/packages/utils/src/string.ts b/packages/utils/src/string.ts index 39d1fd251094..12e11941e1a8 100644 --- a/packages/utils/src/string.ts +++ b/packages/utils/src/string.ts @@ -104,6 +104,14 @@ export function isMatchingPattern(value: string, pattern: RegExp | string): bool return false; } +type GlobalWithBase64Helpers = { + // browser + atob?: (base64String: string) => string; + btoa?: (utf8String: string) => string; + // Node + Buffer?: { from: (input: string, encoding: string) => { toString: (encoding: string) => string } }; +}; + /** * Convert a Unicode string to a base64 string. * @@ -112,42 +120,49 @@ export function isMatchingPattern(value: string, pattern: RegExp | string): bool * @returns A base64-encoded version of the string */ export function unicodeToBase64(plaintext: string): string { - const global = getGlobalObject(); - - // Cast to a string just in case we're given something else - const stringifiedInput = String(plaintext); - const errMsg = `Unable to convert to base64: ${ - stringifiedInput.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput - }`; + const globalObject = getGlobalObject(); // To account for the fact that different platforms use different character encodings natively, our `tracestate` // spec calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder. try { + if (typeof plaintext !== 'string') { + throw new Error(`Input must be a string. Received input of type '${typeof plaintext}'.`); + } + // browser - if ('btoa' in global) { + if ('btoa' in globalObject) { // encode using UTF-8 const bytes = new TextEncoder().encode(plaintext); // decode using UTF-16 (JS's native encoding) since `btoa` requires string input const bytesAsString = String.fromCharCode(...bytes); - return btoa(bytesAsString); + // TODO: if TS ever learns about "in", we can get rid of the non-null assertion + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + return globalObject.btoa!(bytesAsString); } // Node - if ('Buffer' in global) { + if ('Buffer' in globalObject) { // encode using UTF-8 - const bytes = Buffer.from(plaintext, 'utf-8'); + // TODO: if TS ever learns about "in", we can get rid of the non-null assertion + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const bytes = globalObject.Buffer!.from(plaintext, 'utf-8'); // unlike the browser, Node can go straight from bytes to base64 return bytes.toString('base64'); } + + // we shouldn't ever get here, because one of `btoa` and `Buffer` should exist, but just in case... + throw new SentryError('Neither `window.btoa` nor `global.Buffer` is defined.'); } catch (err) { + // Cast to a string just in case we're given something else + const stringifiedInput = JSON.stringify(plaintext); + const errMsg = `Unable to convert to base64: ${ + stringifiedInput?.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput + }.`; throw new SentryError(`${errMsg}\nGot error: ${err}`); } - - // we shouldn't ever get here, because one of `btoa` and `Buffer` should exist, but just in case... - throw new SentryError(errMsg); } /** @@ -158,22 +173,22 @@ export function unicodeToBase64(plaintext: string): string { * @returns A Unicode string */ export function base64ToUnicode(base64String: string): string { - const globalObject = getGlobalObject(); - - // we cast to a string just in case we're given something else - const stringifiedInput = String(base64String); - const errMsg = `Unable to convert from base64: ${ - stringifiedInput.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput - }`; + const globalObject = getGlobalObject(); // To account for the fact that different platforms use different character encodings natively, our `tracestate` spec // calls for all jsonified data to be encoded in UTF-8 bytes before being passed to the base64 encoder. So to reverse // the process, decode from base64 to bytes, then feed those bytes to a UTF-8 decoder. try { + if (typeof base64String !== 'string') { + throw new Error(`Input must be a string. Received input of type '${typeof base64String}'.`); + } + // browser if ('atob' in globalObject) { // `atob` returns a string rather than bytes, so we first need to encode using the native encoding (UTF-16) - const bytesAsString = atob(base64String); + // TODO: if TS ever learns about "in", we can get rid of the non-null assertion + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const bytesAsString = globalObject.atob!(base64String); const bytes = [...bytesAsString].map(char => char.charCodeAt(0)); // decode using UTF-8 (cast the `bytes` arry to a Uint8Array just because that's the format `decode()` expects) @@ -183,15 +198,22 @@ export function base64ToUnicode(base64String: string): string { // Node if ('Buffer' in globalObject) { // unlike the browser, Node can go straight from base64 to bytes - const bytes = Buffer.from(base64String, 'base64'); + // TODO: if TS ever learns about "in", we can get rid of the non-null assertion + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const bytes = globalObject.Buffer!.from(base64String, 'base64'); // decode using UTF-8 return bytes.toString('utf-8'); } + + // we shouldn't ever get here, because one of `atob` and `Buffer` should exist, but just in case... + throw new SentryError('Neither `window.atob` nor `global.Buffer` is defined.'); } catch (err) { + // we cast to a string just in case we're given something else + const stringifiedInput = JSON.stringify(base64String); + const errMsg = `Unable to convert from base64: ${ + stringifiedInput?.length > 256 ? `${stringifiedInput.slice(0, 256)}...` : stringifiedInput + }.`; throw new SentryError(`${errMsg}\nGot error: ${err}`); } - - // we shouldn't ever get here, because one of `atob` and `Buffer` should exist, but just in case... - throw new SentryError(errMsg); } diff --git a/packages/utils/test/string.test.ts b/packages/utils/test/string.test.ts index 1ed7a2d23264..fe8bfda35481 100644 --- a/packages/utils/test/string.test.ts +++ b/packages/utils/test/string.test.ts @@ -50,6 +50,9 @@ describe('isMatchingPattern()', () => { }); }); +// NOTE: These tests are copied (and adapted for chai syntax) to `string.test.ts` in `@sentry/browser`. The +// base64-conversion functions have a different implementation in browser and node, so they're copied there to prove +// they work in a real live browser. If you make changes here, make sure to also port them over to that copy. describe('base64ToUnicode/unicodeToBase64', () => { const unicodeString = 'Dogs are great!'; const base64String = 'RG9ncyBhcmUgZ3JlYXQh';