Skip to content

Commit ad56a07

Browse files
Merge branch 'main' into ghes-review-agent
2 parents c4c4d10 + 1dff20d commit ad56a07

File tree

53 files changed

+941
-304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+941
-304
lines changed

CHANGELOG.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Fixed
11+
- Fixed spurious infinite loads with explore panel, file tree, and file search command. [#617](https://github.com/sourcebot-dev/sourcebot/pull/617)
12+
- Wipe search context on init if entitlement no longer exists [#618](https://github.com/sourcebot-dev/sourcebot/pull/618)
13+
- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611)
14+
15+
## [4.9.2] - 2025-11-13
16+
17+
### Changed
18+
- Bumped the default requested search result count from 5k to 10k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
19+
1020
### Fixed
1121
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
1222
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
13-
- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611)
23+
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
24+
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
25+
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
26+
27+
### Added
28+
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
29+
- Added environment variable to configure default search result count. [#616](https://github.com/sourcebot-dev/sourcebot/pull/616)
1430

1531
## [4.9.1] - 2025-11-07
1632

docs/docs/configuration/environment-variables.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ The following environment variables allow you to configure your Sourcebot deploy
3434
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
3535
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
3636
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
37+
| `DEFAULT_MAX_MATCH_COUNT` | `10000` | <p>The default maximum number of search results to return when using search in the web app.</p> |
3738

3839
### Enterprise Environment Variables
3940
| Variable | Default | Description |

packages/backend/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
"cross-fetch": "^4.0.0",
4141
"dotenv": "^16.4.5",
4242
"express": "^4.21.2",
43+
"express-async-errors": "^3.1.1",
44+
"fast-deep-equal": "^3.1.3",
4345
"git-url-parse": "^16.1.0",
4446
"gitea-js": "^1.22.0",
4547
"glob": "^11.0.0",

packages/backend/src/api.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { PrismaClient, RepoIndexingJobType } from '@sourcebot/db';
2+
import { createLogger } from '@sourcebot/shared';
3+
import express, { Request, Response } from 'express';
4+
import 'express-async-errors';
5+
import * as http from "http";
6+
import z from 'zod';
7+
import { ConnectionManager } from './connectionManager.js';
8+
import { PromClient } from './promClient.js';
9+
import { RepoIndexManager } from './repoIndexManager.js';
10+
11+
const logger = createLogger('api');
12+
const PORT = 3060;
13+
14+
export class Api {
15+
private server: http.Server;
16+
17+
constructor(
18+
promClient: PromClient,
19+
private prisma: PrismaClient,
20+
private connectionManager: ConnectionManager,
21+
private repoIndexManager: RepoIndexManager,
22+
) {
23+
const app = express();
24+
app.use(express.json());
25+
app.use(express.urlencoded({ extended: true }));
26+
27+
// Prometheus metrics endpoint
28+
app.use('/metrics', async (_req: Request, res: Response) => {
29+
res.set('Content-Type', promClient.registry.contentType);
30+
const metrics = await promClient.registry.metrics();
31+
res.end(metrics);
32+
});
33+
34+
app.post('/api/sync-connection', this.syncConnection.bind(this));
35+
app.post('/api/index-repo', this.indexRepo.bind(this));
36+
37+
this.server = app.listen(PORT, () => {
38+
logger.info(`API server is running on port ${PORT}`);
39+
});
40+
}
41+
42+
private async syncConnection(req: Request, res: Response) {
43+
const schema = z.object({
44+
connectionId: z.number(),
45+
}).strict();
46+
47+
const parsed = schema.safeParse(req.body);
48+
if (!parsed.success) {
49+
res.status(400).json({ error: parsed.error.message });
50+
return;
51+
}
52+
53+
const { connectionId } = parsed.data;
54+
const connection = await this.prisma.connection.findUnique({
55+
where: {
56+
id: connectionId,
57+
}
58+
});
59+
60+
if (!connection) {
61+
res.status(404).json({ error: 'Connection not found' });
62+
return;
63+
}
64+
65+
const [jobId] = await this.connectionManager.createJobs([connection]);
66+
67+
res.status(200).json({ jobId });
68+
}
69+
70+
private async indexRepo(req: Request, res: Response) {
71+
const schema = z.object({
72+
repoId: z.number(),
73+
}).strict();
74+
75+
const parsed = schema.safeParse(req.body);
76+
if (!parsed.success) {
77+
res.status(400).json({ error: parsed.error.message });
78+
return;
79+
}
80+
81+
const { repoId } = parsed.data;
82+
const repo = await this.prisma.repo.findUnique({
83+
where: { id: repoId },
84+
});
85+
86+
if (!repo) {
87+
res.status(404).json({ error: 'Repo not found' });
88+
return;
89+
}
90+
91+
const [jobId] = await this.repoIndexManager.createJobs([repo], RepoIndexingJobType.INDEX);
92+
res.status(200).json({ jobId });
93+
}
94+
95+
public async dispose() {
96+
return new Promise<void>((resolve, reject) => {
97+
this.server.close((err) => {
98+
if (err) reject(err);
99+
else resolve(undefined);
100+
});
101+
});
102+
}
103+
}

packages/backend/src/configManager.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import chokidar, { FSWatcher } from 'chokidar';
66
import { ConnectionManager } from "./connectionManager.js";
77
import { SINGLE_TENANT_ORG_ID } from "./constants.js";
88
import { syncSearchContexts } from "./ee/syncSearchContexts.js";
9+
import isEqual from 'fast-deep-equal';
910

1011
const logger = createLogger('config-manager');
1112

@@ -64,8 +65,8 @@ export class ConfigManager {
6465

6566
const existingConnectionConfig = existingConnection ? existingConnection.config as unknown as ConnectionConfig : undefined;
6667
const connectionNeedsSyncing =
67-
!existingConnection ||
68-
(JSON.stringify(existingConnectionConfig) !== JSON.stringify(newConnectionConfig));
68+
!existingConnectionConfig ||
69+
!isEqual(existingConnectionConfig, newConnectionConfig);
6970

7071
// Either update the existing connection or create a new one.
7172
const connection = existingConnection ?
@@ -93,8 +94,8 @@ export class ConfigManager {
9394
});
9495

9596
if (connectionNeedsSyncing) {
96-
const [jobId] = await this.connectionManager.createJobs([connection]);
97-
logger.info(`Change detected for connection '${key}' (id: ${connection.id}). Created sync job ${jobId}.`);
97+
logger.info(`Change detected for connection '${key}' (id: ${connection.id}). Creating sync job.`);
98+
await this.connectionManager.createJobs([connection]);
9899
}
99100
}
100101
}

packages/backend/src/connectionManager.ts

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ import { groupmqLifecycleExceptionWrapper, setIntervalAsync } from "./utils.js";
1111
import { syncSearchContexts } from "./ee/syncSearchContexts.js";
1212
import { captureEvent } from "./posthog.js";
1313
import { PromClient } from "./promClient.js";
14+
import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS } from "./constants.js";
1415

1516
const LOG_TAG = 'connection-manager';
1617
const logger = createLogger(LOG_TAG);
1718
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
19+
const QUEUE_NAME = 'connection-sync-queue';
1820

1921
type JobPayload = {
2022
jobId: string,
@@ -30,19 +32,19 @@ type JobResult = {
3032
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 2; // 2 hour timeout
3133

3234
export class ConnectionManager {
33-
private worker: Worker;
35+
private worker: Worker<JobPayload>;
3436
private queue: Queue<JobPayload>;
3537
private interval?: NodeJS.Timeout;
3638

3739
constructor(
3840
private db: PrismaClient,
3941
private settings: Settings,
40-
redis: Redis,
42+
private redis: Redis,
4143
private promClient: PromClient,
4244
) {
4345
this.queue = new Queue<JobPayload>({
4446
redis,
45-
namespace: 'connection-sync-queue',
47+
namespace: QUEUE_NAME,
4648
jobTimeoutMs: JOB_TIMEOUT_MS,
4749
maxAttempts: 3,
4850
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
@@ -62,6 +64,10 @@ export class ConnectionManager {
6264
this.worker.on('failed', this.onJobFailed.bind(this));
6365
this.worker.on('stalled', this.onJobStalled.bind(this));
6466
this.worker.on('error', this.onWorkerError.bind(this));
67+
// graceful-timeout is triggered when a job is still processing after
68+
// worker.close() is called and the timeout period has elapsed. In this case,
69+
// we fail the job with no retry.
70+
this.worker.on('graceful-timeout', this.onJobGracefulTimeout.bind(this));
6571
}
6672

6773
public startScheduler() {
@@ -128,6 +134,7 @@ export class ConnectionManager {
128134
});
129135

130136
for (const job of jobs) {
137+
logger.info(`Scheduling job ${job.id} for connection ${job.connection.name} (id: ${job.connectionId})`);
131138
await this.queue.add({
132139
groupId: `connection:${job.connectionId}`,
133140
data: {
@@ -150,6 +157,22 @@ export class ConnectionManager {
150157
const logger = createJobLogger(jobId);
151158
logger.info(`Running connection sync job ${jobId} for connection ${connectionName} (id: ${job.data.connectionId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
152159

160+
const currentStatus = await this.db.connectionSyncJob.findUniqueOrThrow({
161+
where: {
162+
id: jobId,
163+
},
164+
select: {
165+
status: true,
166+
}
167+
});
168+
169+
// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
170+
// is in an invalid state and should be skipped.
171+
if (currentStatus.status !== ConnectionSyncJobStatus.PENDING && currentStatus.status !== ConnectionSyncJobStatus.IN_PROGRESS) {
172+
throw new Error(`Job ${jobId} is not in a valid state. Expected: ${ConnectionSyncJobStatus.PENDING} or ${ConnectionSyncJobStatus.IN_PROGRESS}. Actual: ${currentStatus.status}. Skipping.`);
173+
}
174+
175+
153176
this.promClient.pendingConnectionSyncJobs.dec({ connection: connectionName });
154177
this.promClient.activeConnectionSyncJobs.inc({ connection: connectionName });
155178

@@ -178,7 +201,7 @@ export class ConnectionManager {
178201
const result = await (async () => {
179202
switch (config.type) {
180203
case 'github': {
181-
return await compileGithubConfig(config, job.data.connectionId, abortController);
204+
return await compileGithubConfig(config, job.data.connectionId, abortController.signal);
182205
}
183206
case 'gitlab': {
184207
return await compileGitlabConfig(config, job.data.connectionId);
@@ -200,7 +223,7 @@ export class ConnectionManager {
200223
}
201224
}
202225
})();
203-
226+
204227
let { repoData, warnings } = result;
205228

206229
await this.db.connectionSyncJob.update({
@@ -383,6 +406,33 @@ export class ConnectionManager {
383406
});
384407
});
385408

409+
private onJobGracefulTimeout = async (job: Job<JobPayload>) =>
410+
groupmqLifecycleExceptionWrapper('onJobGracefulTimeout', logger, async () => {
411+
const logger = createJobLogger(job.id);
412+
413+
const { connection } = await this.db.connectionSyncJob.update({
414+
where: { id: job.id },
415+
data: {
416+
status: ConnectionSyncJobStatus.FAILED,
417+
completedAt: new Date(),
418+
errorMessage: 'Job timed out',
419+
},
420+
select: {
421+
connection: true,
422+
}
423+
});
424+
425+
this.promClient.activeConnectionSyncJobs.dec({ connection: connection.name });
426+
this.promClient.connectionSyncJobFailTotal.inc({ connection: connection.name });
427+
428+
logger.error(`Job ${job.id} timed out for connection ${connection.name} (id: ${connection.id})`);
429+
430+
captureEvent('backend_connection_sync_job_failed', {
431+
connectionId: connection.id,
432+
error: 'Job timed out',
433+
});
434+
});
435+
386436
private async onWorkerError(error: Error) {
387437
Sentry.captureException(error);
388438
logger.error(`Connection syncer worker error.`, error);
@@ -392,8 +442,28 @@ export class ConnectionManager {
392442
if (this.interval) {
393443
clearInterval(this.interval);
394444
}
395-
await this.worker.close();
396-
await this.queue.close();
445+
446+
const inProgressJobs = this.worker.getCurrentJobs();
447+
await this.worker.close(GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS);
448+
449+
// Manually release group locks for in progress jobs to prevent deadlocks.
450+
// @see: https://github.com/Openpanel-dev/groupmq/issues/8
451+
for (const { job } of inProgressJobs) {
452+
const lockKey = `groupmq:${QUEUE_NAME}:lock:${job.groupId}`;
453+
logger.debug(`Releasing group lock ${lockKey} for in progress job ${job.id}`);
454+
try {
455+
await this.redis.del(lockKey);
456+
} catch (error) {
457+
Sentry.captureException(error);
458+
logger.error(`Failed to release group lock ${lockKey} for in progress job ${job.id}. Error: `, error);
459+
}
460+
}
461+
462+
// @note: As of groupmq v1.0.0, queue.close() will just close the underlying
463+
// redis connection. Since we share the same redis client between, skip this
464+
// step and close the redis client directly in index.ts.
465+
// @see: https://github.com/Openpanel-dev/groupmq/blob/main/src/queue.ts#L1900
466+
// await this.queue.close();
397467
}
398468
}
399469

packages/backend/src/constants.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,24 @@ export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES: CodeHostType[] = [
1010
];
1111

1212
export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos');
13-
export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index');
13+
export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index');
14+
15+
// Maximum time to wait for current job to finish
16+
export const GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS = 5 * 1000; // 5 seconds
17+
18+
// List of shutdown signals
19+
export const SHUTDOWN_SIGNALS: string[] = [
20+
'SIGHUP',
21+
'SIGINT',
22+
'SIGQUIT',
23+
'SIGILL',
24+
'SIGTRAP',
25+
'SIGABRT',
26+
'SIGBUS',
27+
'SIGFPE',
28+
'SIGSEGV',
29+
'SIGUSR2',
30+
'SIGTERM',
31+
// @note: SIGKILL and SIGSTOP cannot have listeners installed.
32+
// @see: https://nodejs.org/api/process.html#signal-events
33+
];

0 commit comments

Comments
 (0)