Skip to content

Commit a5300ad

Browse files
committed
feat(NODE-5909): optimize writing basic latin strings
1 parent 3242587 commit a5300ad

File tree

4 files changed

+122
-17
lines changed

4 files changed

+122
-17
lines changed

src/utils/latin.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,42 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st
5959

6060
return String.fromCharCode(...latinBytes);
6161
}
62+
63+
/**
64+
* This function is an optimization for writing small basic latin strings.
65+
* @internal
66+
* @remarks
67+
* ### Important characteristics:
68+
* - If the string length is 0 return 0, do not perform any work
69+
* - If a string is longer than 25 code units return null
70+
* - If any code unit exceeds 128 this function returns null
71+
*
72+
* @param destination - The uint8array to serialize the string to
73+
* @param source - The string to turn into UTF-8 bytes if it fits in the basic latin range
74+
* @param offset - The position in the destination to begin writing bytes to
75+
* @returns the number of bytes written to destination if all code units are below 128, otherwise null
76+
*/
77+
export function tryWriteLatin(
78+
destination: Uint8Array,
79+
source: string,
80+
offset: number
81+
): number | null {
82+
if (source.length === 0) return 0;
83+
84+
if (source.length > 25) return null;
85+
86+
if (destination.length - offset < source.length) return null;
87+
88+
for (
89+
let charOffset = 0, destinationOffset = offset;
90+
charOffset < source.length;
91+
charOffset++, destinationOffset++
92+
) {
93+
const char = source.charCodeAt(charOffset);
94+
if (char > 127) return null;
95+
96+
destination[destinationOffset] = char;
97+
}
98+
99+
return source.length;
100+
}

src/utils/node_byte_utils.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { BSONError } from '../error';
22
import { validateUtf8 } from '../validate_utf8';
3-
import { tryLatin } from './latin';
3+
import { tryLatin, tryWriteLatin } from './latin';
44

55
type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
66
type NodeJsBuffer = ArrayBufferView &
@@ -149,6 +149,11 @@ export const nodeJsByteUtils = {
149149
},
150150

151151
encodeUTF8Into(buffer: Uint8Array, source: string, byteOffset: number): number {
152+
const latinBytesWritten = tryWriteLatin(buffer, source, byteOffset);
153+
if (latinBytesWritten != null) {
154+
return latinBytesWritten;
155+
}
156+
152157
return nodeJsByteUtils.toLocalBufferType(buffer).write(source, byteOffset, undefined, 'utf8');
153158
},
154159

test/node/byte_utils.test.ts

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -365,33 +365,35 @@ const toISO88591Tests: ByteUtilTest<'toISO88591'>[] = [
365365
}
366366
}
367367
];
368-
const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
368+
const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
369369
{
370-
name: 'should create buffer from utf8 input',
371-
inputs: [Buffer.from('abc\u{1f913}', 'utf8').toString('utf8')],
370+
name: 'should insert utf8 bytes into buffer',
371+
inputs: [Buffer.alloc(7), 'abc\u{1f913}', 0],
372372
expectation({ output, error }) {
373373
expect(error).to.be.null;
374-
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8'));
374+
expect(output).to.equal(7);
375+
expect(this.inputs[0]).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8'));
375376
}
376377
},
377378
{
378-
name: 'should return empty buffer for empty string input',
379-
inputs: [''],
379+
name: 'should return 0 and not modify input buffer',
380+
inputs: [Uint8Array.from([2, 2]), '', 0],
380381
expectation({ output, error }) {
381382
expect(error).to.be.null;
382-
expect(output).to.have.property('byteLength', 0);
383+
expect(output).to.equal(0);
384+
expect(this.inputs[0]).to.deep.equal(Uint8Array.from([2, 2]));
383385
}
384386
},
385387
{
386-
name: 'should return bytes with replacement character if string is not encodable',
387-
inputs: ['\u{1f913}'.slice(0, 1)],
388+
name: 'should insert replacement character bytes if string is not encodable',
389+
inputs: [Uint8Array.from({ length: 10 }, () => 2), '\u{1f913}'.slice(0, 1), 2],
388390
expectation({ output, error }) {
389391
expect(error).to.be.null;
390-
expect(output).to.have.property('byteLength', 3);
391-
expect(output).to.have.property('0', 0xef);
392-
expect(output).to.have.property('1', 0xbf);
393-
expect(output).to.have.property('2', 0xbd);
394-
const backToString = Buffer.from(output!).toString('utf8');
392+
expect(output).to.equal(3);
393+
expect(this.inputs[0]).to.have.property('2', 0xef);
394+
expect(this.inputs[0]).to.have.property('3', 0xbf);
395+
expect(this.inputs[0]).to.have.property('4', 0xbd);
396+
const backToString = Buffer.from(this.inputs[0].subarray(2, 5)).toString('utf8');
395397
const replacementCharacter = '\u{fffd}';
396398
expect(backToString).to.equal(replacementCharacter);
397399
}
@@ -507,7 +509,7 @@ const table = new Map<keyof ByteUtils, ByteUtilTest<keyof ByteUtils>[]>([
507509
['toHex', toHexTests],
508510
['fromISO88591', fromISO88591Tests],
509511
['toISO88591', toISO88591Tests],
510-
['fromUTF8', fromUTF8Tests],
512+
['encodeUTF8Into', fromUTF8Tests],
511513
['toUTF8', toUTF8Tests],
512514
['utf8ByteLength', utf8ByteLengthTests],
513515
['randomBytes', randomBytesTests]

test/node/utils/latin.test.ts

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { expect } from 'chai';
2-
import { tryLatin } from '../../../src/utils/latin';
2+
import { tryLatin, tryWriteLatin } from '../../../src/utils/latin';
33
import * as sinon from 'sinon';
44

55
describe('tryLatin()', () => {
@@ -116,3 +116,62 @@ describe('tryLatin()', () => {
116116
});
117117
});
118118
});
119+
120+
describe('tryWriteLatin()', () => {
121+
context('when given a string of length 0', () => {
122+
it('returns 0 and does not modify the destination', () => {
123+
const input = Uint8Array.from({ length: 10 }, () => 1);
124+
expect(tryWriteLatin(input, '', 2)).to.equal(0);
125+
expect(input).to.deep.equal(Uint8Array.from({ length: 10 }, () => 1));
126+
});
127+
});
128+
129+
context('when given a string with a length larger than the buffer', () => {
130+
it('returns null', () => {
131+
const input = Uint8Array.from({ length: 10 }, () => 1);
132+
expect(tryWriteLatin(input, 'a'.repeat(11), 0)).to.be.null;
133+
expect(tryWriteLatin(input, 'a'.repeat(13), 2)).to.be.null;
134+
});
135+
});
136+
137+
let charCodeAtSpy;
138+
139+
beforeEach(() => {
140+
charCodeAtSpy = sinon.spy(String.prototype, 'charCodeAt');
141+
});
142+
143+
afterEach(() => {
144+
sinon.restore();
145+
});
146+
147+
for (let stringLength = 1; stringLength <= 25; stringLength++) {
148+
context(`when there is ${stringLength} bytes`, () => {
149+
context('that exceed 127', () => {
150+
it('returns null', () => {
151+
expect(
152+
tryWriteLatin(
153+
new Uint8Array(stringLength * 3),
154+
'a'.repeat(stringLength - 1) + '\x80',
155+
0
156+
)
157+
).be.null;
158+
});
159+
});
160+
161+
it(`calls charCodeAt ${stringLength}`, () => {
162+
tryWriteLatin(
163+
new Uint8Array(stringLength * 3),
164+
String.fromCharCode(127).repeat(stringLength),
165+
stringLength
166+
);
167+
expect(charCodeAtSpy).to.have.callCount(stringLength);
168+
});
169+
});
170+
}
171+
172+
context('when there is >25 characters', () => {
173+
it('returns null', () => {
174+
expect(tryWriteLatin(new Uint8Array(75), 'a'.repeat(26), 0)).be.null;
175+
});
176+
});
177+
});

0 commit comments

Comments
 (0)