Skip to content

Commit c3fae1a

Browse files
feat(web): Improved search performance on unbounded searches (#555)
1 parent 18ba1d2 commit c3fae1a

File tree

12 files changed

+310
-152
lines changed

12 files changed

+310
-152
lines changed

.env.development

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"
44

55
# Zoekt
66
ZOEKT_WEBSERVER_URL="http://localhost:6070"
7-
# SHARD_MAX_MATCH_COUNT=10000
8-
# TOTAL_MAX_MATCH_COUNT=100000
97
# The command to use for generating ctags.
108
CTAGS_COMMAND=ctags
119
# logging, strict

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
<!-- @NOTE: On next release, please bump the MCP pacakge as there are breaking changes in this! -->
11+
1012
### Fixed
1113
- Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553)
1214

1315
### Changed
1416
- Remove spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
17+
- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555)
1518

1619
### Added
1720
- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)

docs/docs/configuration/environment-variables.mdx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,13 @@ The following environment variables allow you to configure your Sourcebot deploy
2828
| `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> |
2929
| `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> |
3030
| `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> |
31-
| `SHARD_MAX_MATCH_COUNT` | `10000` | <p>The maximum shard count per query</p> |
3231
| `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
3332
| `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> |
3433
| `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> |
3534
| `SOURCEBOT_LOG_LEVEL` | `info` | <p>The Sourcebot logging level. Valid values are `debug`, `info`, `warn`, `error`, in order of severity.</p> |
3635
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
3736
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
3837
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
39-
| `TOTAL_MAX_MATCH_COUNT` | `100000` | <p>The maximum number of matches per query</p> |
40-
| `ZOEKT_MAX_WALL_TIME_MS` | `10000` | <p>The maximum real world duration (in milliseconds) per zoekt query</p> |
4138

4239
### Enterprise Environment Variables
4340
| Variable | Default | Description |

packages/mcp/src/schemas.ts

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,32 +38,82 @@ export const repositoryInfoSchema = z.object({
3838
name: z.string(),
3939
displayName: z.string().optional(),
4040
webUrl: z.string().optional(),
41-
})
41+
});
42+
43+
// Many of these fields are defined in zoekt/api.go.
44+
export const searchStatsSchema = z.object({
45+
// The actual number of matches returned by the search.
46+
// This will always be less than or equal to `totalMatchCount`.
47+
actualMatchCount: z.number(),
48+
49+
// The total number of matches found during the search.
50+
totalMatchCount: z.number(),
51+
52+
// The duration (in nanoseconds) of the search.
53+
duration: z.number(),
54+
55+
// Number of files containing a match.
56+
fileCount: z.number(),
57+
58+
// Candidate files whose contents weren't examined because we
59+
// gathered enough matches.
60+
filesSkipped: z.number(),
61+
62+
// Amount of I/O for reading contents.
63+
contentBytesLoaded: z.number(),
64+
65+
// Amount of I/O for reading from index.
66+
indexBytesLoaded: z.number(),
67+
68+
// Number of search shards that had a crash.
69+
crashes: z.number(),
70+
71+
// Number of files in shards that we considered.
72+
shardFilesConsidered: z.number(),
73+
74+
// Files that we evaluated. Equivalent to files for which all
75+
// atom matches (including negations) evaluated to true.
76+
filesConsidered: z.number(),
77+
78+
// Files for which we loaded file content to verify substring matches
79+
filesLoaded: z.number(),
80+
81+
// Shards that we scanned to find matches.
82+
shardsScanned: z.number(),
83+
84+
// Shards that we did not process because a query was canceled.
85+
shardsSkipped: z.number(),
86+
87+
// Shards that we did not process because the query was rejected by the
88+
// ngram filter indicating it had no matches.
89+
shardsSkippedFilter: z.number(),
90+
91+
// Number of candidate matches as a result of searching ngrams.
92+
ngramMatches: z.number(),
93+
94+
// NgramLookups is the number of times we accessed an ngram in the index.
95+
ngramLookups: z.number(),
96+
97+
// Wall clock time for queued search.
98+
wait: z.number(),
99+
100+
// Aggregate wall clock time spent constructing and pruning the match tree.
101+
// This accounts for time such as lookups in the trigram index.
102+
matchTreeConstruction: z.number(),
103+
104+
// Aggregate wall clock time spent searching the match tree. This accounts
105+
// for the bulk of search work done looking for matches.
106+
matchTreeSearch: z.number(),
107+
108+
// Number of times regexp was called on files that we evaluated.
109+
regexpsConsidered: z.number(),
110+
111+
// FlushReason explains why results were flushed.
112+
flushReason: z.number(),
113+
});
42114

43115
export const searchResponseSchema = z.object({
44-
zoektStats: z.object({
45-
// The duration (in nanoseconds) of the search.
46-
duration: z.number(),
47-
fileCount: z.number(),
48-
matchCount: z.number(),
49-
filesSkipped: z.number(),
50-
contentBytesLoaded: z.number(),
51-
indexBytesLoaded: z.number(),
52-
crashes: z.number(),
53-
shardFilesConsidered: z.number(),
54-
filesConsidered: z.number(),
55-
filesLoaded: z.number(),
56-
shardsScanned: z.number(),
57-
shardsSkipped: z.number(),
58-
shardsSkippedFilter: z.number(),
59-
ngramMatches: z.number(),
60-
ngramLookups: z.number(),
61-
wait: z.number(),
62-
matchTreeConstruction: z.number(),
63-
matchTreeSearch: z.number(),
64-
regexpsConsidered: z.number(),
65-
flushReason: z.number(),
66-
}),
116+
stats: searchStatsSchema,
67117
files: z.array(z.object({
68118
fileName: z.object({
69119
// The name of the file
@@ -90,6 +140,7 @@ export const searchResponseSchema = z.object({
90140
})),
91141
repositoryInfo: z.array(repositoryInfoSchema),
92142
isBranchFilteringEnabled: z.boolean(),
143+
isSearchExhaustive: z.boolean(),
93144
});
94145

95146
enum RepoIndexingStatus {

packages/web/src/app/[domain]/search/page.tsx

Lines changed: 85 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,21 @@ import { FilterPanel } from "./components/filterPanel";
2121
import { SearchResultsPanel } from "./components/searchResultsPanel";
2222
import { useDomain } from "@/hooks/useDomain";
2323
import { useToast } from "@/components/hooks/use-toast";
24-
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
24+
import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types";
2525
import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle";
2626
import { useFilteredMatches } from "./components/filterPanel/useFilterMatches";
2727
import { Button } from "@/components/ui/button";
2828
import { ImperativePanelHandle } from "react-resizable-panels";
29-
import { FilterIcon } from "lucide-react";
29+
import { AlertTriangleIcon, BugIcon, FilterIcon } from "lucide-react";
3030
import { useHotkeys } from "react-hotkeys-hook";
3131
import { useLocalStorage } from "@uidotdev/usehooks";
3232
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
3333
import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint";
3434
import { SearchBar } from "../components/searchBar";
35+
import { CodeSnippet } from "@/app/components/codeSnippet";
36+
import { CopyIconButton } from "../components/copyIconButton";
3537

36-
const DEFAULT_MAX_MATCH_COUNT = 10000;
38+
const DEFAULT_MAX_MATCH_COUNT = 500;
3739

3840
export default function SearchPage() {
3941
// We need a suspense boundary here since we are accessing query params
@@ -58,7 +60,12 @@ const SearchPageInternal = () => {
5860
const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`);
5961
const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount;
6062

61-
const { data: searchResponse, isLoading: isSearchLoading, error } = useQuery({
63+
const {
64+
data: searchResponse,
65+
isPending: isSearchPending,
66+
isFetching: isFetching,
67+
error
68+
} = useQuery({
6269
queryKey: ["search", searchQuery, maxMatchCount],
6370
queryFn: () => measure(() => unwrapServiceError(search({
6471
query: searchQuery,
@@ -68,12 +75,12 @@ const SearchPageInternal = () => {
6875
}, domain)), "client.search"),
6976
select: ({ data, durationMs }) => ({
7077
...data,
71-
durationMs,
78+
totalClientSearchDurationMs: durationMs,
7279
}),
7380
enabled: searchQuery.length > 0,
7481
refetchOnWindowFocus: false,
7582
retry: false,
76-
staleTime: Infinity,
83+
staleTime: 0,
7784
});
7885

7986
useEffect(() => {
@@ -109,58 +116,31 @@ const SearchPageInternal = () => {
109116
const fileLanguages = searchResponse.files?.map(file => file.language) || [];
110117

111118
captureEvent("search_finished", {
112-
durationMs: searchResponse.durationMs,
113-
fileCount: searchResponse.zoektStats.fileCount,
114-
matchCount: searchResponse.zoektStats.matchCount,
115-
filesSkipped: searchResponse.zoektStats.filesSkipped,
116-
contentBytesLoaded: searchResponse.zoektStats.contentBytesLoaded,
117-
indexBytesLoaded: searchResponse.zoektStats.indexBytesLoaded,
118-
crashes: searchResponse.zoektStats.crashes,
119-
shardFilesConsidered: searchResponse.zoektStats.shardFilesConsidered,
120-
filesConsidered: searchResponse.zoektStats.filesConsidered,
121-
filesLoaded: searchResponse.zoektStats.filesLoaded,
122-
shardsScanned: searchResponse.zoektStats.shardsScanned,
123-
shardsSkipped: searchResponse.zoektStats.shardsSkipped,
124-
shardsSkippedFilter: searchResponse.zoektStats.shardsSkippedFilter,
125-
ngramMatches: searchResponse.zoektStats.ngramMatches,
126-
ngramLookups: searchResponse.zoektStats.ngramLookups,
127-
wait: searchResponse.zoektStats.wait,
128-
matchTreeConstruction: searchResponse.zoektStats.matchTreeConstruction,
129-
matchTreeSearch: searchResponse.zoektStats.matchTreeSearch,
130-
regexpsConsidered: searchResponse.zoektStats.regexpsConsidered,
131-
flushReason: searchResponse.zoektStats.flushReason,
119+
durationMs: searchResponse.totalClientSearchDurationMs,
120+
fileCount: searchResponse.stats.fileCount,
121+
matchCount: searchResponse.stats.totalMatchCount,
122+
actualMatchCount: searchResponse.stats.actualMatchCount,
123+
filesSkipped: searchResponse.stats.filesSkipped,
124+
contentBytesLoaded: searchResponse.stats.contentBytesLoaded,
125+
indexBytesLoaded: searchResponse.stats.indexBytesLoaded,
126+
crashes: searchResponse.stats.crashes,
127+
shardFilesConsidered: searchResponse.stats.shardFilesConsidered,
128+
filesConsidered: searchResponse.stats.filesConsidered,
129+
filesLoaded: searchResponse.stats.filesLoaded,
130+
shardsScanned: searchResponse.stats.shardsScanned,
131+
shardsSkipped: searchResponse.stats.shardsSkipped,
132+
shardsSkippedFilter: searchResponse.stats.shardsSkippedFilter,
133+
ngramMatches: searchResponse.stats.ngramMatches,
134+
ngramLookups: searchResponse.stats.ngramLookups,
135+
wait: searchResponse.stats.wait,
136+
matchTreeConstruction: searchResponse.stats.matchTreeConstruction,
137+
matchTreeSearch: searchResponse.stats.matchTreeSearch,
138+
regexpsConsidered: searchResponse.stats.regexpsConsidered,
139+
flushReason: searchResponse.stats.flushReason,
132140
fileLanguages,
133141
});
134142
}, [captureEvent, searchQuery, searchResponse]);
135143

136-
const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo, matchCount } = useMemo(() => {
137-
if (!searchResponse) {
138-
return {
139-
fileMatches: [],
140-
searchDurationMs: 0,
141-
totalMatchCount: 0,
142-
isBranchFilteringEnabled: false,
143-
repositoryInfo: {},
144-
matchCount: 0,
145-
};
146-
}
147-
148-
return {
149-
fileMatches: searchResponse.files ?? [],
150-
searchDurationMs: Math.round(searchResponse.durationMs),
151-
totalMatchCount: searchResponse.zoektStats.matchCount,
152-
isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
153-
repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
154-
acc[repo.id] = repo;
155-
return acc;
156-
}, {} as Record<number, RepositoryInfo>),
157-
matchCount: searchResponse.stats.matchCount,
158-
}
159-
}, [searchResponse]);
160-
161-
const isMoreResultsButtonVisible = useMemo(() => {
162-
return totalMatchCount > maxMatchCount;
163-
}, [totalMatchCount, maxMatchCount]);
164144

165145
const onLoadMoreResults = useCallback(() => {
166146
const url = createPathWithQueryParams(`/${domain}/search`,
@@ -183,20 +163,27 @@ const SearchPageInternal = () => {
183163
/>
184164
</TopBar>
185165

186-
{(isSearchLoading) ? (
166+
{(isSearchPending || isFetching) ? (
187167
<div className="flex flex-col items-center justify-center h-full gap-2">
188168
<SymbolIcon className="h-6 w-6 animate-spin" />
189169
<p className="font-semibold text-center">Searching...</p>
190170
</div>
171+
) : error ? (
172+
<div className="flex flex-col items-center justify-center h-full gap-2">
173+
<AlertTriangleIcon className="h-6 w-6" />
174+
<p className="font-semibold text-center">Failed to search</p>
175+
<p className="text-sm text-center">{error.message}</p>
176+
</div>
191177
) : (
192178
<PanelGroup
193-
fileMatches={fileMatches}
194-
isMoreResultsButtonVisible={isMoreResultsButtonVisible}
179+
fileMatches={searchResponse.files}
180+
isMoreResultsButtonVisible={searchResponse.isSearchExhaustive === false}
195181
onLoadMoreResults={onLoadMoreResults}
196-
isBranchFilteringEnabled={isBranchFilteringEnabled}
197-
repoInfo={repositoryInfo}
198-
searchDurationMs={searchDurationMs}
199-
numMatches={matchCount}
182+
isBranchFilteringEnabled={searchResponse.isBranchFilteringEnabled}
183+
repoInfo={searchResponse.repositoryInfo}
184+
searchDurationMs={searchResponse.totalClientSearchDurationMs}
185+
numMatches={searchResponse.stats.actualMatchCount}
186+
searchStats={searchResponse.stats}
200187
/>
201188
)}
202189
</div>
@@ -208,19 +195,21 @@ interface PanelGroupProps {
208195
isMoreResultsButtonVisible?: boolean;
209196
onLoadMoreResults: () => void;
210197
isBranchFilteringEnabled: boolean;
211-
repoInfo: Record<number, RepositoryInfo>;
198+
repoInfo: RepositoryInfo[];
212199
searchDurationMs: number;
213200
numMatches: number;
201+
searchStats?: SearchStats;
214202
}
215203

216204
const PanelGroup = ({
217205
fileMatches,
218206
isMoreResultsButtonVisible,
219207
onLoadMoreResults,
220208
isBranchFilteringEnabled,
221-
repoInfo,
222-
searchDurationMs,
209+
repoInfo: _repoInfo,
210+
searchDurationMs: _searchDurationMs,
223211
numMatches,
212+
searchStats,
224213
}: PanelGroupProps) => {
225214
const [previewedFile, setPreviewedFile] = useState<SearchResultFile | undefined>(undefined);
226215
const filteredFileMatches = useFilteredMatches(fileMatches);
@@ -241,6 +230,17 @@ const PanelGroup = ({
241230
description: "Toggle filter panel",
242231
});
243232

233+
const searchDurationMs = useMemo(() => {
234+
return Math.round(_searchDurationMs);
235+
}, [_searchDurationMs]);
236+
237+
const repoInfo = useMemo(() => {
238+
return _repoInfo.reduce((acc, repo) => {
239+
acc[repo.id] = repo;
240+
return acc;
241+
}, {} as Record<number, RepositoryInfo>);
242+
}, [_repoInfo]);
243+
244244
return (
245245
<ResizablePanelGroup
246246
direction="horizontal"
@@ -297,7 +297,27 @@ const PanelGroup = ({
297297
order={2}
298298
>
299299
<div className="py-1 px-2 flex flex-row items-center">
300-
<InfoCircledIcon className="w-4 h-4 mr-2" />
300+
<Tooltip>
301+
<TooltipTrigger asChild>
302+
<InfoCircledIcon className="w-4 h-4 mr-2" />
303+
</TooltipTrigger>
304+
<TooltipContent side="right" className="flex flex-col items-start gap-2 p-4">
305+
<div className="flex flex-row items-center w-full">
306+
<BugIcon className="w-4 h-4 mr-1.5" />
307+
<p className="text-md font-medium">Search stats for nerds</p>
308+
<CopyIconButton
309+
onCopy={() => {
310+
navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2));
311+
return true;
312+
}}
313+
className="ml-auto"
314+
/>
315+
</div>
316+
<CodeSnippet renderNewlines>
317+
{JSON.stringify(searchStats, null, 2)}
318+
</CodeSnippet>
319+
</TooltipContent>
320+
</Tooltip>
301321
{
302322
fileMatches.length > 0 ? (
303323
<p className="text-sm font-medium">{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}</p>

0 commit comments

Comments
 (0)