Skip to content

Commit f16251a

Browse files
author
Brian Vaughn
committed
Switched UTF string encoding approach to handle multibyte characters
Also removed LRU cache since the caching approach seems to slow things down overall, based on benchmarking.
1 parent 8464d69 commit f16251a

File tree

4 files changed

+85
-68
lines changed

4 files changed

+85
-68
lines changed

packages/react-devtools-shared/src/__tests__/setupEnv.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,3 @@ global.process.env.DARK_MODE_DIMMED_LOG_COLOR = DARK_MODE_DIMMED_LOG_COLOR;
2424
global.process.env.LIGHT_MODE_DIMMED_WARNING_COLOR = LIGHT_MODE_DIMMED_WARNING_COLOR;
2525
global.process.env.LIGHT_MODE_DIMMED_ERROR_COLOR = LIGHT_MODE_DIMMED_ERROR_COLOR;
2626
global.process.env.LIGHT_MODE_DIMMED_LOG_COLOR = LIGHT_MODE_DIMMED_LOG_COLOR;
27-
28-
global.TextEncoder = require('util').TextEncoder;
29-
global.TextDecoder = require('util').TextDecoder;

packages/react-devtools-shared/src/__tests__/store-test.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,19 @@ describe('Store', () => {
101101
`);
102102
});
103103

104+
it('should handle multibyte character strings', () => {
105+
const Component = () => null;
106+
Component.displayName = '🟩💜🔵';
107+
108+
const container = document.createElement('div');
109+
110+
act(() => legacyRender(<Component />, container));
111+
expect(store).toMatchInlineSnapshot(`
112+
[root]
113+
<🟩💜🔵>
114+
`);
115+
});
116+
104117
describe('collapseNodesByDefault:false', () => {
105118
beforeEach(() => {
106119
store.collapseNodesByDefault = false;

packages/react-devtools-shared/src/backend/renderer.js

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,11 +1513,16 @@ export function attach(
15131513

15141514
type OperationsArray = Array<number>;
15151515

1516+
type StringTableEntry = {|
1517+
encodedString: Array<number>,
1518+
id: number,
1519+
|};
1520+
15161521
const pendingOperations: OperationsArray = [];
15171522
const pendingRealUnmountedIDs: Array<number> = [];
15181523
const pendingSimulatedUnmountedIDs: Array<number> = [];
15191524
let pendingOperationsQueue: Array<OperationsArray> | null = [];
1520-
const pendingStringTable: Map<string, number> = new Map();
1525+
const pendingStringTable: Map<string, StringTableEntry> = new Map();
15211526
let pendingStringTableLength: number = 0;
15221527
let pendingUnmountedRootID: number | null = null;
15231528

@@ -1735,13 +1740,19 @@ export function attach(
17351740
// Now fill in the string table.
17361741
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
17371742
operations[i++] = pendingStringTableLength;
1738-
pendingStringTable.forEach((value, key) => {
1739-
operations[i++] = key.length;
1740-
const encodedKey = utfEncodeString(key);
1741-
for (let j = 0; j < encodedKey.length; j++) {
1742-
operations[i + j] = encodedKey[j];
1743+
pendingStringTable.forEach((entry, stringKey) => {
1744+
const encodedString = entry.encodedString;
1745+
1746+
// Don't use the string length.
1747+
// It won't work for multibyte characters (like emoji).
1748+
const length = encodedString.length;
1749+
1750+
operations[i++] = length;
1751+
for (let j = 0; j < encodedString.length; j++) {
1752+
operations[i + j] = encodedString[j];
17431753
}
1744-
i += key.length;
1754+
1755+
i += length;
17451756
});
17461757

17471758
if (numUnmountIDs > 0) {
@@ -1788,21 +1799,31 @@ export function attach(
17881799
pendingStringTableLength = 0;
17891800
}
17901801

1791-
function getStringID(str: string | null): number {
1792-
if (str === null) {
1802+
function getStringID(string: string | null): number {
1803+
if (string === null) {
17931804
return 0;
17941805
}
1795-
const existingID = pendingStringTable.get(str);
1796-
if (existingID !== undefined) {
1797-
return existingID;
1798-
}
1799-
const stringID = pendingStringTable.size + 1;
1800-
pendingStringTable.set(str, stringID);
1801-
// The string table total length needs to account
1802-
// both for the string length, and for the array item
1803-
// that contains the length itself. Hence + 1.
1804-
pendingStringTableLength += str.length + 1;
1805-
return stringID;
1806+
const existingEntry = pendingStringTable.get(string);
1807+
if (existingEntry !== undefined) {
1808+
return existingEntry.id;
1809+
}
1810+
1811+
const id = pendingStringTable.size + 1;
1812+
const encodedString = utfEncodeString(string);
1813+
1814+
pendingStringTable.set(string, {
1815+
encodedString,
1816+
id,
1817+
});
1818+
1819+
// The string table total length needs to account both for the string length,
1820+
// and for the array item that contains the length itself.
1821+
//
1822+
// Don't use string length for this table.
1823+
// It won't work for multibyte characters (like emoji).
1824+
pendingStringTableLength += encodedString.length + 1;
1825+
1826+
return id;
18061827
}
18071828

18081829
function recordMount(fiber: Fiber, parentFiber: Fiber | null) {

packages/react-devtools-shared/src/utils.js

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
* @flow
88
*/
99

10-
import LRU from 'lru-cache';
1110
import {
1211
isElement,
1312
typeOf,
@@ -50,19 +49,9 @@ import {localStorageGetItem, localStorageSetItem} from './storage';
5049
import {meta} from './hydration';
5150

5251
import type {ComponentFilter, ElementType} from './types';
53-
import type {LRUCache} from 'react-devtools-shared/src/types';
5452

5553
const cachedDisplayNames: WeakMap<Function, string> = new WeakMap();
5654

57-
// On large trees, encoding takes significant time.
58-
// Try to reuse the already encoded strings.
59-
const encodedStringCache: LRUCache<
60-
string,
61-
Array<number> | Uint8Array,
62-
> = new LRU({
63-
max: 1000,
64-
});
65-
6655
export function alphaSortKeys(
6756
a: string | number | Symbol,
6857
b: string | number | Symbol,
@@ -128,47 +117,44 @@ export function getUID(): number {
128117
return ++uidCounter;
129118
}
130119

131-
const isTextEncoderSupported =
132-
typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
133-
134120
export function utfDecodeString(array: Array<number>): string {
135-
if (isTextEncoderSupported) {
136-
// Handles multi-byte characters; use if available.
137-
return new TextDecoder().decode(new Uint8Array(array));
138-
} else {
139-
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
140-
// Functions arguments are first placed on the stack before the function is called
141-
// which throws a RangeError for large arrays.
142-
// See github.com/facebook/react/issues/22293
143-
let string = '';
144-
for (let i = 0; i < array.length; i++) {
145-
const char = array[i];
146-
string += String.fromCodePoint(char);
147-
}
148-
return string;
121+
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
122+
// Functions arguments are first placed on the stack before the function is called
123+
// which throws a RangeError for large arrays.
124+
// See github.com/facebook/react/issues/22293
125+
let string = '';
126+
for (let i = 0; i < array.length; i++) {
127+
const char = array[i];
128+
string += String.fromCodePoint(char);
149129
}
130+
return string;
150131
}
151132

152-
export function utfEncodeString(string: string): Array<number> | Uint8Array {
153-
const cached = encodedStringCache.get(string);
154-
if (cached !== undefined) {
155-
return cached;
156-
}
133+
function surrogatePairToCodePoint(
134+
charCode1: number,
135+
charCode2: number,
136+
): number {
137+
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
138+
}
157139

158-
let encoded;
159-
if (isTextEncoderSupported) {
160-
// Handles multi-byte characters; use if available.
161-
encoded = new TextEncoder().encode(string);
162-
} else {
163-
encoded = new Array(string.length);
164-
for (let i = 0; i < string.length; i++) {
165-
encoded[i] = string.codePointAt(i);
140+
// Credit for this encoding approach goes to Tim Down:
141+
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
142+
export function utfEncodeString(string: string): Array<number> {
143+
const codePoints = [];
144+
let i = 0;
145+
let charCode;
146+
while (i < string.length) {
147+
charCode = string.charCodeAt(i);
148+
if ((charCode & 0xf800) === 0xd800) {
149+
codePoints.push(
150+
surrogatePairToCodePoint(charCode, string.charCodeAt(++i)),
151+
);
152+
} else {
153+
codePoints.push(charCode);
166154
}
155+
++i;
167156
}
168-
169-
encodedStringCache.set(string, encoded);
170-
171-
return encoded;
157+
return codePoints;
172158
}
173159

174160
export function printOperationsArray(operations: Array<number>) {

0 commit comments

Comments
 (0)