Skip to content

Commit 743c571

Browse files
committed
fix(firstore-bigquery-export): comment out unused role for now and use logging
1 parent 0f638c2 commit 743c571

File tree

4 files changed

+139
-29
lines changed

4 files changed

+139
-29
lines changed

firestore-bigquery-export/extension.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ roles:
4848
- role: datastore.user
4949
reason: Allows the extension to write updates to the database.
5050

51-
- role: storage.objectAdmin
52-
reason: Allows the extension to create objects in the storage bucket.
51+
# - role: storage.objectAdmin
52+
# reason: Allows the extension to create objects in the storage bucket.
5353

5454
resources:
5555
- name: fsexportbigquery
@@ -207,7 +207,7 @@ params:
207207
value: yes
208208
- label: No
209209
value: no
210-
required: true
210+
default: yes
211211

212212
- param: WILDCARD_IDS
213213
label: Enable Wildcard Column field with Parent Firestore Document IDs

firestore-bigquery-export/functions/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ async function attemptToEnqueue(
310310
await events.recordErrorEvent(enqueueErr as Error);
311311

312312
// Log the error if it has not been logged already.
313-
if (!enqueueErr.logged) {
313+
if (!enqueueErr.logged && config.logFailedExportData) {
314314
logs.error(
315315
true,
316316
"Failed to enqueue task to syncBigQuery",

firestore-bigquery-export/functions/stress_test/main.js

Lines changed: 98 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,57 @@
11
const { Worker } = require("worker_threads");
22
const { performance } = require("perf_hooks");
33
const path = require("path");
4+
const admin = require("firebase-admin");
45

5-
const totalDocs = 10000000; // Total number of documents to write
6+
// Initialize Firebase Admin SDK
7+
admin.initializeApp({
8+
projectId: "vertex-testing-1efc3",
9+
});
10+
11+
// Get a reference to the Firestore service
12+
const db = admin.firestore();
13+
14+
const totalDocs = 1000000; // Total number of documents to write
615
const maxThreads = 20; // Maximum number of worker threads
716
const batchSize = 500; // Documents per batch
8-
const rampUpDelay = 2000; // 5 seconds delay between ramp-ups
9-
const rampUps = 20; // Number of ramp-ups (planned)
10-
17+
const targetRate = 500; // Target docs per second
18+
const rampUpDelay = 1000; // Delay between ramp-ups
19+
const rampUps = 5; // Number of ramp-ups
1120
const docsPerRampUp = Math.ceil(totalDocs / rampUps); // Documents per ramp-up
1221

22+
// Calculate the delay needed to meet the target rate (in milliseconds)
23+
const delayBetweenBatches = Math.max(1000 / (targetRate / batchSize), 0); // Delay between batches in ms
24+
25+
// Hardcoded collection paths with the form: A/{aid}/B/{bid}/C/{cid}/D/{did}/E/{eid}/F/{fid}/G
26+
const collectionPaths = [
27+
"A/aid1/B/bid1/C/cid1/D/did1/E/eid1/F/fid1/G",
28+
"A/aid2/B/bid2/C/cid2/D/did2/E/eid2/F/fid2/G",
29+
"A/aid3/B/bid3/C/cid3/D/did3/E/eid3/F/fid3/G",
30+
"A/aid4/B/bid4/C/cid4/D/did4/E/eid4/F/fid4/G",
31+
"A/aid5/B/bid5/C/cid5/D/did5/E/eid5/F/fid5/G",
32+
];
33+
1334
// Start measuring total execution time
1435
const totalStartTime = performance.now();
1536

1637
const workerJsPath = path.resolve(__dirname, "worker.js");
1738

1839
// Function to spawn worker threads for a specific ramp-up
19-
const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => {
20-
console.log(`Spawning ${activeThreads} worker(s)...`);
40+
const spawnWorkers = async (
41+
activeThreads,
42+
startDoc,
43+
docsPerRampUp,
44+
collectionPath
45+
) => {
46+
console.log(
47+
`Spawning ${activeThreads} worker(s) for collection ${collectionPath}...`
48+
);
2149
let promises = [];
2250
const docsPerThread = Math.ceil(docsPerRampUp / activeThreads);
2351

2452
for (let i = 0; i < activeThreads; i++) {
2553
const docsForThisThread = Math.min(docsPerThread, docsPerRampUp);
26-
const start = startDoc + i * docsPerThread;
54+
const start = startDoc + i * docsForThisThread;
2755
const end = Math.min(start + docsForThisThread, startDoc + docsPerRampUp);
2856

2957
promises.push(
@@ -33,6 +61,8 @@ const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => {
3361
start,
3462
end,
3563
batchSize,
64+
collectionPath, // Pass the collection path to the worker
65+
delayBetweenBatches, // Pass the delay to the worker
3666
},
3767
});
3868

@@ -64,13 +94,44 @@ const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => {
6494
}
6595
};
6696

97+
// Function to query Firestore for the total document count using count() aggregation
98+
const getCollectionCounts = async () => {
99+
let counts = {};
100+
101+
for (const collectionPath of collectionPaths) {
102+
const collectionRef = db.collection(collectionPath);
103+
const snapshot = await collectionRef.count().get(); // Use the count aggregation query
104+
const count = snapshot.data().count;
105+
counts[collectionPath] = count;
106+
console.log(`Collection ${collectionPath} has ${count} documents.`);
107+
}
108+
109+
return counts;
110+
};
111+
112+
// Function to calculate the difference between two count objects
113+
const calculateCountDifference = (beforeCounts, afterCounts) => {
114+
let totalDifference = 0;
115+
116+
for (const collectionPath in beforeCounts) {
117+
const beforeCount = beforeCounts[collectionPath] || 0;
118+
const afterCount = afterCounts[collectionPath] || 0;
119+
const difference = afterCount - beforeCount;
120+
console.log(`Collection ${collectionPath} difference: ${difference}`);
121+
totalDifference += difference;
122+
}
123+
124+
return totalDifference;
125+
};
126+
67127
// Function to execute ramp-ups
68128
const executeRampUps = async () => {
69129
let activeThreads = 1;
70130
let startDoc = 0;
71131

72132
for (let i = 0; i < rampUps; i++) {
73-
await spawnWorkers(activeThreads, startDoc, docsPerRampUp);
133+
const collectionPath = collectionPaths[i % collectionPaths.length]; // Rotate through collections
134+
await spawnWorkers(activeThreads, startDoc, docsPerRampUp, collectionPath);
74135
startDoc += docsPerRampUp;
75136

76137
if (activeThreads < maxThreads) {
@@ -88,17 +149,38 @@ const executeRampUps = async () => {
88149
}
89150
};
90151

91-
// Run the ramp-ups
92-
executeRampUps()
93-
.then(() => {
152+
// Main execution flow
153+
const main = async () => {
154+
try {
155+
// Count documents before writing
156+
console.log("Counting documents before the operation...");
157+
const beforeCounts = await getCollectionCounts();
158+
159+
// Perform the writing operation
160+
await executeRampUps();
161+
162+
// Count documents after writing
163+
console.log("Counting documents after the operation...");
164+
const afterCounts = await getCollectionCounts();
165+
166+
// Calculate and log the difference
167+
const totalDocsWritten = calculateCountDifference(
168+
beforeCounts,
169+
afterCounts
170+
);
171+
console.log(`Total documents written: ${totalDocsWritten}`);
172+
94173
const totalEndTime = performance.now();
95174
const totalDuration = (totalEndTime - totalStartTime) / 1000; // Convert to seconds
96175
console.log(
97-
`Successfully written ${totalDocs} documents to the collection in ${totalDuration.toFixed(
176+
`Successfully written ${totalDocsWritten} documents in ${totalDuration.toFixed(
98177
2
99178
)} seconds.`
100179
);
101-
})
102-
.catch((error) => {
103-
console.error("Error in worker threads: ", error);
104-
});
180+
} catch (error) {
181+
console.error("Error during execution: ", error);
182+
}
183+
};
184+
185+
// Run the main function
186+
main();

firestore-bigquery-export/functions/stress_test/worker.js

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,48 @@ admin.initializeApp({
1010

1111
// Get a reference to the Firestore service
1212
const db = admin.firestore();
13-
const collectionName = "posts_2";
1413

15-
// Generate a random document
14+
// Generate a large random document closer to 1MB
1615
const generateRandomDocument = () => {
16+
// const largeString = "x".repeat(300000); // A string of 300,000 characters (~300 KB)
17+
// const largeArray = new Array(5000).fill().map((_, i) => ({
18+
// index: i,
19+
// value: `Value_${Math.random().toString(36).substring(7)}`,
20+
// }));
21+
1722
return {
1823
id: uuidv4(),
1924
name: `Name_${Math.random().toString(36).substring(7)}`,
2025
age: Math.floor(Math.random() * 60) + 18, // Random age between 18 and 78
2126
email: `user_${Math.random().toString(36).substring(7)}@example.com`,
2227
isActive: Math.random() > 0.5, // Random boolean value
2328
createdAt: admin.firestore.Timestamp.now(),
29+
// largeString, // Large string field
30+
// largeArray, // Large array field
2431
};
2532
};
2633

27-
// Write a batch of documents to Firestore
28-
const writeBatch = async (start, end, batchSize) => {
34+
// Delay function for rate control
35+
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
36+
37+
// Write a batch of documents to a specific collection in Firestore
38+
const writeBatch = async (
39+
start,
40+
end,
41+
batchSize,
42+
collectionPath,
43+
delayBetweenBatches
44+
) => {
2945
let count = start;
3046
while (count < end) {
3147
const batchStartTime = performance.now();
3248

3349
let batch = db.batch();
34-
for (let i = 0; i < batchSize && count < end; i++) {
35-
let docRef = db.collection(collectionName).doc();
50+
const remainingDocs = end - count;
51+
const adjustedBatchSize = Math.min(batchSize, remainingDocs); // Adjust batch size if remaining docs < batchSize
52+
53+
for (let i = 0; i < adjustedBatchSize && count < end; i++) {
54+
let docRef = db.collection(collectionPath).doc();
3655
batch.set(docRef, generateRandomDocument());
3756
count++;
3857
}
@@ -42,15 +61,24 @@ const writeBatch = async (start, end, batchSize) => {
4261
const batchEndTime = performance.now();
4362
const batchDuration = (batchEndTime - batchStartTime) / 1000; // Convert to seconds
4463
parentPort.postMessage(
45-
`Batch of ${batchSize} documents written in ${batchDuration.toFixed(
64+
`Batch of ${adjustedBatchSize} documents written in ${batchDuration.toFixed(
4665
2
47-
)} seconds.`
66+
)} seconds to ${collectionPath}.`
4867
);
68+
69+
// Introduce delay between batches to meet target rate
70+
await delay(delayBetweenBatches);
4971
}
5072
};
5173

5274
// Start writing in batches
53-
writeBatch(workerData.start, workerData.end, workerData.batchSize)
75+
writeBatch(
76+
workerData.start,
77+
workerData.end,
78+
workerData.batchSize,
79+
workerData.collectionPath,
80+
workerData.delayBetweenBatches // Pass the delay for rate control
81+
)
5482
.then(() => {
5583
parentPort.postMessage("Completed writing documents.");
5684
})

0 commit comments

Comments
 (0)