From 9823808807d965777743cf1810d595ab9eae33c8 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Mon, 10 Feb 2025 13:21:31 -0500 Subject: [PATCH 1/3] Revert the UTF-8 encoding in string sorting --- .../local/indexeddb_remote_document_cache.ts | 4 - packages/firestore/src/model/path.ts | 11 +- packages/firestore/src/model/values.ts | 10 +- packages/firestore/src/util/misc.ts | 17 -- .../test/integration/api/database.test.ts | 195 ------------------ 5 files changed, 11 insertions(+), 226 deletions(-) diff --git a/packages/firestore/src/local/indexeddb_remote_document_cache.ts b/packages/firestore/src/local/indexeddb_remote_document_cache.ts index 9b23c64fcf5..b3d4658d53d 100644 --- a/packages/firestore/src/local/indexeddb_remote_document_cache.ts +++ b/packages/firestore/src/local/indexeddb_remote_document_cache.ts @@ -655,9 +655,5 @@ export function dbKeyComparator(l: DocumentKey, r: DocumentKey): number { return cmp; } - // TODO(b/329441702): Document IDs should be sorted by UTF-8 encoded byte - // order, but IndexedDB sorts strings lexicographically. Document ID - // comparison here still relies on primitive comparison to avoid mismatches - // observed in snapshot listeners with Unicode characters in documentIds return primitiveComparator(left[left.length - 1], right[right.length - 1]); } diff --git a/packages/firestore/src/model/path.ts b/packages/firestore/src/model/path.ts index e7aeb6f61cc..64cb0376a0e 100644 --- a/packages/firestore/src/model/path.ts +++ b/packages/firestore/src/model/path.ts @@ -19,7 +19,6 @@ import { Integer } from '@firebase/webchannel-wrapper/bloom-blob'; import { debugAssert, fail } from '../util/assert'; import { Code, FirestoreError } from '../util/error'; -import { primitiveComparator, compareUtf8Strings } from '../util/misc'; export const DOCUMENT_KEY_NAME = '__name__'; @@ -182,7 +181,7 @@ abstract class BasePath> { return comparison; } } - return primitiveComparator(p1.length, p2.length); + return Math.sign(p1.length - p2.length); } private static compareSegments(lhs: string, rhs: string): number { @@ -202,7 +201,13 @@ abstract class BasePath> { ); } else { // both non-numeric - return compareUtf8Strings(lhs, rhs); + if (lhs < rhs) { + return -1; + } + if (lhs > rhs) { + return 1; + } + return 0; } } diff --git a/packages/firestore/src/model/values.ts b/packages/firestore/src/model/values.ts index 2e3417e674f..1977767515e 100644 --- a/packages/firestore/src/model/values.ts +++ b/packages/firestore/src/model/values.ts @@ -25,11 +25,7 @@ import { Value } from '../protos/firestore_proto_api'; import { fail } from '../util/assert'; -import { - arrayEquals, - primitiveComparator, - compareUtf8Strings -} from '../util/misc'; +import { arrayEquals, primitiveComparator } from '../util/misc'; import { forEach, objectSize } from '../util/obj'; import { isNegativeZero } from '../util/types'; @@ -255,7 +251,7 @@ export function valueCompare(left: Value, right: Value): number { getLocalWriteTime(right) ); case TypeOrder.StringValue: - return compareUtf8Strings(left.stringValue!, right.stringValue!); + return primitiveComparator(left.stringValue!, right.stringValue!); case TypeOrder.BlobValue: return compareBlobs(left.bytesValue!, right.bytesValue!); case TypeOrder.RefValue: @@ -404,7 +400,7 @@ function compareMaps(left: MapValue, right: MapValue): number { rightKeys.sort(); for (let i = 0; i < leftKeys.length && i < rightKeys.length; ++i) { - const keyCompare = compareUtf8Strings(leftKeys[i], rightKeys[i]); + const keyCompare = primitiveComparator(leftKeys[i], rightKeys[i]); if (keyCompare !== 0) { return keyCompare; } diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 6af1238398e..acaff77abb6 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -16,7 +16,6 @@ */ import { randomBytes } from '../platform/random_bytes'; -import { newTextEncoder } from '../platform/text_serializer'; import { debugAssert } from './assert'; @@ -75,22 +74,6 @@ export interface Equatable { isEqual(other: T): boolean; } -/** Compare strings in UTF-8 encoded byte order */ -export function compareUtf8Strings(left: string, right: string): number { - // Convert the string to UTF-8 encoded bytes - const encodedLeft = newTextEncoder().encode(left); - const encodedRight = newTextEncoder().encode(right); - - for (let i = 0; i < Math.min(encodedLeft.length, encodedRight.length); i++) { - const comparison = primitiveComparator(encodedLeft[i], encodedRight[i]); - if (comparison !== 0) { - return comparison; - } - } - - return primitiveComparator(encodedLeft.length, encodedRight.length); -} - export interface Iterable { forEach: (cb: (v: V) => void) => void; } diff --git a/packages/firestore/test/integration/api/database.test.ts b/packages/firestore/test/integration/api/database.test.ts index 1304b7b4aba..1cda49d9229 100644 --- a/packages/firestore/test/integration/api/database.test.ts +++ b/packages/firestore/test/integration/api/database.test.ts @@ -2424,199 +2424,4 @@ apiDescribe('Database', persistence => { }); }); }); - - describe('Sort unicode strings', () => { - const expectedDocs = ['b', 'a', 'c', 'f', 'e', 'd', 'g']; - it('snapshot listener sorts unicode strings the same as server', async () => { - const testDocs = { - 'a': { value: 'Łukasiewicz' }, - 'b': { value: 'Sierpiński' }, - 'c': { value: '岩澤' }, - 'd': { value: '🄟' }, - 'e': { value: 'P' }, - 'f': { value: '︒' }, - 'g': { value: '🐵' } - }; - - return withTestCollection(persistence, testDocs, async collectionRef => { - const orderedQuery = query(collectionRef, orderBy('value')); - - const getSnapshot = await getDocsFromServer(orderedQuery); - expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - - await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); - }); - }); - - it('snapshot listener sorts unicode strings in array the same as server', async () => { - const testDocs = { - 'a': { value: ['Łukasiewicz'] }, - 'b': { value: ['Sierpiński'] }, - 'c': { value: ['岩澤'] }, - 'd': { value: ['🄟'] }, - 'e': { value: ['P'] }, - 'f': { value: ['︒'] }, - 'g': { value: ['🐵'] } - }; - - return withTestCollection(persistence, testDocs, async collectionRef => { - const orderedQuery = query(collectionRef, orderBy('value')); - - const getSnapshot = await getDocsFromServer(orderedQuery); - expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - - await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); - }); - }); - - it('snapshot listener sorts unicode strings in map the same as server', async () => { - const testDocs = { - 'a': { value: { foo: 'Łukasiewicz' } }, - 'b': { value: { foo: 'Sierpiński' } }, - 'c': { value: { foo: '岩澤' } }, - 'd': { value: { foo: '🄟' } }, - 'e': { value: { foo: 'P' } }, - 'f': { value: { foo: '︒' } }, - 'g': { value: { foo: '🐵' } } - }; - - return withTestCollection(persistence, testDocs, async collectionRef => { - const orderedQuery = query(collectionRef, orderBy('value')); - - const getSnapshot = await getDocsFromServer(orderedQuery); - expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - - await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); - }); - }); - - it('snapshot listener sorts unicode strings in map key the same as server', async () => { - const testDocs = { - 'a': { value: { 'Łukasiewicz': true } }, - 'b': { value: { 'Sierpiński': true } }, - 'c': { value: { '岩澤': true } }, - 'd': { value: { '🄟': true } }, - 'e': { value: { 'P': true } }, - 'f': { value: { '︒': true } }, - 'g': { value: { '🐵': true } } - }; - - return withTestCollection(persistence, testDocs, async collectionRef => { - const orderedQuery = query(collectionRef, orderBy('value')); - - const getSnapshot = await getDocsFromServer(orderedQuery); - expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - - await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); - }); - }); - - it('snapshot listener sorts unicode strings in document key the same as server', async () => { - const testDocs = { - 'Łukasiewicz': { value: true }, - 'Sierpiński': { value: true }, - '岩澤': { value: true }, - '🄟': { value: true }, - 'P': { value: true }, - '︒': { value: true }, - '🐵': { value: true } - }; - - return withTestCollection(persistence, testDocs, async collectionRef => { - const orderedQuery = query(collectionRef, orderBy(documentId())); - - const getSnapshot = await getDocsFromServer(orderedQuery); - const expectedDocs = [ - 'Sierpiński', - 'Łukasiewicz', - '岩澤', - '︒', - 'P', - '🄟', - '🐵' - ]; - expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - - await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); - }); - }); - - // eslint-disable-next-line no-restricted-properties - (persistence.storage === 'indexeddb' ? it.skip : it)( - 'snapshot listener sorts unicode strings in document key the same as server with persistence', - async () => { - const testDocs = { - 'Łukasiewicz': { value: true }, - 'Sierpiński': { value: true }, - '岩澤': { value: true }, - '🄟': { value: true }, - 'P': { value: true }, - '︒': { value: true }, - '🐵': { value: true } - }; - - return withTestCollection( - persistence, - testDocs, - async collectionRef => { - const orderedQuery = query(collectionRef, orderBy('value')); - - const getSnapshot = await getDocsFromServer(orderedQuery); - expect(toIds(getSnapshot)).to.deep.equal([ - 'Sierpiński', - 'Łukasiewicz', - '岩澤', - '︒', - 'P', - '🄟', - '🐵' - ]); - - const storeEvent = new EventsAccumulator(); - const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); - const watchSnapshot = await storeEvent.awaitEvent(); - // TODO: IndexedDB sorts string lexicographically, and misses the document with ID '🄟','🐵' - expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); - - unsubscribe(); - } - ); - } - ); - }); }); From 064b68b71b61e7f9424dc395f2dbebc628c46cde Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Mon, 10 Feb 2025 14:53:07 -0500 Subject: [PATCH 2/3] add changeset --- .changeset/slimy-chicken-mix.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/slimy-chicken-mix.md diff --git a/.changeset/slimy-chicken-mix.md b/.changeset/slimy-chicken-mix.md new file mode 100644 index 00000000000..4469c6e01f9 --- /dev/null +++ b/.changeset/slimy-chicken-mix.md @@ -0,0 +1,6 @@ +--- +'@firebase/firestore': patch +'firebase': patch +--- + +Reverted a change to use UTF-8 encoding in string comparisons which caused a performance issue. From 1d426072648851c8c6b1b957203f3223449cd9d8 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Mon, 10 Feb 2025 15:51:10 -0500 Subject: [PATCH 3/3] update changeset --- .changeset/slimy-chicken-mix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/slimy-chicken-mix.md b/.changeset/slimy-chicken-mix.md index 4469c6e01f9..3b0de94b287 100644 --- a/.changeset/slimy-chicken-mix.md +++ b/.changeset/slimy-chicken-mix.md @@ -3,4 +3,4 @@ 'firebase': patch --- -Reverted a change to use UTF-8 encoding in string comparisons which caused a performance issue. +Reverted a change to use UTF-8 encoding in string comparisons which caused a performance issue. See [GitHub issue #8778](https://github.com/firebase/firebase-js-sdk/issues/8778)