diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index a796c255c1857..bd13bdcaeae38 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte index 689415f8df84b..f07eb7a4395a4 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte @@ -72,12 +72,6 @@ } } - function handleScroll() { - if (isOpen) { - updateMenuPosition(); - } - } - async function handleSelect(value: string | undefined) { if (!value) return; @@ -259,7 +253,7 @@ } - + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte index 7ade6bc61f333..e47a5a7dba9e6 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte @@ -2,6 +2,7 @@ import { getDeletionInfo } from '$lib/stores/chat.svelte'; import { copyToClipboard } from '$lib/utils/copy'; import { isIMEComposing } from '$lib/utils/is-ime-composing'; + import type { ApiChatCompletionToolCall } from '$lib/types/api'; import ChatMessageAssistant from './ChatMessageAssistant.svelte'; import ChatMessageUser from './ChatMessageUser.svelte'; @@ -54,6 +55,29 @@ return null; }); + let toolCallContent = $derived.by((): ApiChatCompletionToolCall[] | string | null => { + if (message.role === 'assistant') { + const trimmedToolCalls = message.toolCalls?.trim(); + + if (!trimmedToolCalls) { + return null; + } + + try { + const parsed = JSON.parse(trimmedToolCalls); + + if (Array.isArray(parsed)) { + return parsed as ApiChatCompletionToolCall[]; + } + } catch { + // Harmony-only path: fall back to the raw string so issues surface visibly. + } + + return trimmedToolCalls; + } + return null; + }); + function handleCancelEdit() { isEditing = false; editedContent = message.content; @@ -171,5 +195,6 @@ {showDeleteDialog} {siblingInfo} {thinkingContent} + {toolCallContent} /> {/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index d8f5630fd14f7..b9682d5c715c0 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -11,7 +11,8 @@ Gauge, Clock, WholeWord, - ChartNoAxesColumn + ChartNoAxesColumn, + Wrench } from '@lucide/svelte'; import { Button } from '$lib/components/ui/button'; import { Checkbox } from '$lib/components/ui/checkbox'; @@ -21,6 +22,7 @@ import { config } from '$lib/stores/settings.svelte'; import { modelName as serverModelName } from '$lib/stores/server.svelte'; import { copyToClipboard } from '$lib/utils/copy'; + import type { ApiChatCompletionToolCall } from '$lib/types/api'; interface Props { class?: string; @@ -51,6 +53,7 @@ siblingInfo?: ChatMessageSiblingInfo | null; textareaElement?: HTMLTextAreaElement; thinkingContent: string | null; + toolCallContent: ApiChatCompletionToolCall[] | string | null; } let { @@ -76,9 +79,17 @@ shouldBranchAfterEdit = false, siblingInfo = null, textareaElement = $bindable(), - thinkingContent + thinkingContent, + toolCallContent = null }: Props = $props(); + const parsedToolCalls = $derived(() => + Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null + ); + const fallbackToolCallContent = $derived(() => + typeof toolCallContent === 'string' ? toolCallContent : null + ); + const processingState = useProcessingState(); let currentConfig = $derived(config()); let serverModel = $derived(serverModelName()); @@ -97,6 +108,58 @@ void copyToClipboard(model ?? ''); } + + function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) { + const callNumber = index + 1; + const functionName = toolCall.function?.name?.trim(); + const label = functionName || `Call #${callNumber}`; + + const payload: Record = {}; + + const id = toolCall.id?.trim(); + if (id) { + payload.id = id; + } + + const type = toolCall.type?.trim(); + if (type) { + payload.type = type; + } + + if (toolCall.function) { + const fnPayload: Record = {}; + + const name = toolCall.function.name?.trim(); + if (name) { + fnPayload.name = name; + } + + const rawArguments = toolCall.function.arguments?.trim(); + if (rawArguments) { + try { + fnPayload.arguments = JSON.parse(rawArguments); + } catch { + fnPayload.arguments = rawArguments; + } + } + + if (Object.keys(fnPayload).length > 0) { + payload.function = fnPayload; + } + } + + const formattedPayload = JSON.stringify(payload, null, 2); + + return { + label, + tooltip: formattedPayload, + copyValue: formattedPayload + }; + } + + function handleCopyToolCall(payload: string) { + void copyToClipboard(payload, 'Tool call copied to clipboard'); + }
{/if} + {#if config().showToolCalls} + {@const toolCalls = parsedToolCalls()} + {@const fallbackToolCalls = fallbackToolCallContent()} + {#if (toolCalls && toolCalls.length > 0) || fallbackToolCalls} + + + + + Tool calls: + + + {#if toolCalls && toolCalls.length > 0} + {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)} + {@const badge = formatToolCallBadge(toolCall, index)} + + {/each} + {:else if fallbackToolCalls} + + {/if} + + {/if} + {/if} + {#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms} {@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000} @@ -287,4 +393,17 @@ white-space: pre-wrap; word-break: break-word; } + + .tool-call-badge { + max-width: 12rem; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + + .tool-call-badge--fallback { + max-width: 20rem; + white-space: normal; + word-break: break-word; + } diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte index e4672b787ee89..d2a0a739c54d1 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte @@ -226,6 +226,11 @@ label: 'Enable model selector', type: 'checkbox' }, + { + key: 'showToolCalls', + label: 'Show tool call labels', + type: 'checkbox' + }, { key: 'disableReasoningFormat', label: 'Show raw LLM output', diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index c25f380846cf4..7547832d95ae1 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record = theme: 'system', showTokensPerSecond: false, showThoughtInProgress: false, + showToolCalls: false, disableReasoningFormat: false, keepStatsVisible: false, showMessageStats: true, @@ -80,6 +81,8 @@ export const SETTING_CONFIG_INFO: Record = { custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.', showTokensPerSecond: 'Display generation speed in tokens per second during streaming.', showThoughtInProgress: 'Expand thought process by default when generating messages.', + showToolCalls: + 'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.', disableReasoningFormat: 'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.', keepStatsVisible: 'Keep processing statistics visible after generation finishes.', diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 414e060764d7e..1908d83909eab 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,6 +1,25 @@ import { config } from '$lib/stores/settings.svelte'; import { selectedModelName } from '$lib/stores/models.svelte'; import { slotsService } from './slots'; +import type { + ApiChatCompletionRequest, + ApiChatCompletionResponse, + ApiChatCompletionStreamChunk, + ApiChatCompletionToolCall, + ApiChatCompletionToolCallDelta, + ApiChatMessageData +} from '$lib/types/api'; +import type { + DatabaseMessage, + DatabaseMessageExtra, + DatabaseMessageExtraAudioFile, + DatabaseMessageExtraImageFile, + DatabaseMessageExtraLegacyContext, + DatabaseMessageExtraPdfFile, + DatabaseMessageExtraTextFile +} from '$lib/types/database'; +import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat'; +import type { SettingsChatServiceOptions } from '$lib/types/settings'; /** * ChatService - Low-level API communication layer for llama.cpp server interactions * @@ -53,6 +72,7 @@ export class ChatService { onComplete, onError, onReasoningChunk, + onToolCallChunk, onModel, onFirstValidChunk, // Generation parameters @@ -201,6 +221,7 @@ export class ChatService { onComplete, onError, onReasoningChunk, + onToolCallChunk, onModel, onFirstValidChunk, conversationId, @@ -208,7 +229,13 @@ export class ChatService { ); return; } else { - return this.handleNonStreamResponse(response, onComplete, onError, onModel); + return this.handleNonStreamResponse( + response, + onComplete, + onError, + onToolCallChunk, + onModel + ); } } catch (error) { if (error instanceof Error && error.name === 'AbortError') { @@ -264,10 +291,12 @@ export class ChatService { onComplete?: ( response: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCalls?: string ) => void, onError?: (error: Error) => void, onReasoningChunk?: (chunk: string) => void, + onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void, onFirstValidChunk?: () => void, conversationId?: string, @@ -282,11 +311,53 @@ export class ChatService { const decoder = new TextDecoder(); let aggregatedContent = ''; let fullReasoningContent = ''; + let aggregatedToolCalls: ApiChatCompletionToolCall[] = []; let hasReceivedData = false; let lastTimings: ChatMessageTimings | undefined; let streamFinished = false; let modelEmitted = false; let firstValidChunkEmitted = false; + let toolCallIndexOffset = 0; + let hasOpenToolCallBatch = false; + + const finalizeOpenToolCallBatch = () => { + if (!hasOpenToolCallBatch) { + return; + } + + toolCallIndexOffset = aggregatedToolCalls.length; + hasOpenToolCallBatch = false; + }; + + const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => { + if (!toolCalls || toolCalls.length === 0) { + return; + } + + aggregatedToolCalls = this.mergeToolCallDeltas( + aggregatedToolCalls, + toolCalls, + toolCallIndexOffset + ); + + if (aggregatedToolCalls.length === 0) { + return; + } + + hasOpenToolCallBatch = true; + + const serializedToolCalls = JSON.stringify(aggregatedToolCalls); + + if (!serializedToolCalls) { + return; + } + + hasReceivedData = true; + + if (!abortSignal?.aborted) { + onToolCallChunk?.(serializedToolCalls); + } + }; try { let chunk = ''; @@ -325,6 +396,7 @@ export class ChatService { const content = parsed.choices[0]?.delta?.content; const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; + const toolCalls = parsed.choices[0]?.delta?.tool_calls; const timings = parsed.timings; const promptProgress = parsed.prompt_progress; @@ -342,6 +414,7 @@ export class ChatService { } if (content) { + finalizeOpenToolCallBatch(); hasReceivedData = true; aggregatedContent += content; if (!abortSignal?.aborted) { @@ -350,12 +423,15 @@ export class ChatService { } if (reasoningContent) { + finalizeOpenToolCallBatch(); hasReceivedData = true; fullReasoningContent += reasoningContent; if (!abortSignal?.aborted) { onReasoningChunk?.(reasoningContent); } } + + processToolCallDelta(toolCalls); } catch (e) { console.error('Error parsing JSON chunk:', e); } @@ -368,12 +444,26 @@ export class ChatService { if (abortSignal?.aborted) return; if (streamFinished) { - if (!hasReceivedData && aggregatedContent.length === 0) { + finalizeOpenToolCallBatch(); + + if ( + !hasReceivedData && + aggregatedContent.length === 0 && + aggregatedToolCalls.length === 0 + ) { const noResponseError = new Error('No response received from server. Please try again.'); throw noResponseError; } - onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings); + const finalToolCalls = + aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined; + + onComplete?.( + aggregatedContent, + fullReasoningContent || undefined, + lastTimings, + finalToolCalls + ); } } catch (error) { const err = error instanceof Error ? error : new Error('Stream error'); @@ -386,6 +476,54 @@ export class ChatService { } } + private mergeToolCallDeltas( + existing: ApiChatCompletionToolCall[], + deltas: ApiChatCompletionToolCallDelta[], + indexOffset = 0 + ): ApiChatCompletionToolCall[] { + const result = existing.map((call) => ({ + ...call, + function: call.function ? { ...call.function } : undefined + })); + + for (const delta of deltas) { + const index = + typeof delta.index === 'number' && delta.index >= 0 + ? delta.index + indexOffset + : result.length; + + while (result.length <= index) { + result.push({ function: undefined }); + } + + const target = result[index]!; + + if (delta.id) { + target.id = delta.id; + } + + if (delta.type) { + target.type = delta.type; + } + + if (delta.function) { + const fn = target.function ? { ...target.function } : {}; + + if (delta.function.name) { + fn.name = delta.function.name; + } + + if (delta.function.arguments) { + fn.arguments = (fn.arguments ?? '') + delta.function.arguments; + } + + target.function = fn; + } + } + + return result; + } + /** * Handles non-streaming response from the chat completion API. * Parses the JSON response and extracts the generated content. @@ -401,9 +539,11 @@ export class ChatService { onComplete?: ( response: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCalls?: string ) => void, onError?: (error: Error) => void, + onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void ): Promise { try { @@ -423,17 +563,31 @@ export class ChatService { const content = data.choices[0]?.message?.content || ''; const reasoningContent = data.choices[0]?.message?.reasoning_content; + const toolCalls = data.choices[0]?.message?.tool_calls; if (reasoningContent) { console.log('Full reasoning content:', reasoningContent); } - if (!content.trim()) { + let serializedToolCalls: string | undefined; + + if (toolCalls && toolCalls.length > 0) { + const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls); + + if (mergedToolCalls.length > 0) { + serializedToolCalls = JSON.stringify(mergedToolCalls); + if (serializedToolCalls) { + onToolCallChunk?.(serializedToolCalls); + } + } + } + + if (!content.trim() && !serializedToolCalls) { const noResponseError = new Error('No response received from server. Please try again.'); throw noResponseError; } - onComplete?.(content, reasoningContent); + onComplete?.(content, reasoningContent, undefined, serializedToolCalls); return content; } catch (error) { diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 3f97a89183d82..5b5a9d74a5bc6 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -205,6 +205,7 @@ class ChatStore { type, timestamp: Date.now(), thinking: '', + toolCalls: '', children: [], extra: extras }, @@ -360,6 +361,7 @@ class ChatStore { ): Promise { let streamedContent = ''; let streamedReasoningContent = ''; + let streamedToolCallContent = ''; let resolvedModel: string | null = null; let modelPersisted = false; @@ -468,6 +470,20 @@ class ChatStore { this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); }, + onToolCallChunk: (toolCallChunk: string) => { + const chunk = toolCallChunk.trim(); + + if (!chunk) { + return; + } + + streamedToolCallContent = chunk; + + const messageIndex = this.findMessageIndex(assistantMessage.id); + + this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent }); + }, + onModel: (modelName: string) => { recordModel(modelName); }, @@ -475,18 +491,21 @@ class ChatStore { onComplete: async ( finalContent?: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCallContent?: string ) => { slotsService.stopStreaming(); const updateData: { content: string; thinking: string; + toolCalls: string; timings?: ChatMessageTimings; model?: string; } = { content: finalContent || streamedContent, thinking: reasoningContent || streamedReasoningContent, + toolCalls: toolCallContent || streamedToolCallContent, timings: timings }; @@ -499,7 +518,11 @@ class ChatStore { const messageIndex = this.findMessageIndex(assistantMessage.id); - const localUpdateData: { timings?: ChatMessageTimings; model?: string } = { + const localUpdateData: { + timings?: ChatMessageTimings; + model?: string; + toolCalls?: string; + } = { timings: timings }; @@ -507,6 +530,10 @@ class ChatStore { localUpdateData.model = updateData.model; } + if (updateData.toolCalls !== undefined) { + localUpdateData.toolCalls = updateData.toolCalls; + } + this.updateMessageAtIndex(messageIndex, localUpdateData); await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id); @@ -620,6 +647,7 @@ class ChatStore { content: '', timestamp: Date.now(), thinking: '', + toolCalls: '', children: [], model: null }, @@ -1443,6 +1471,7 @@ class ChatStore { role: messageToEdit.role, content: newContent, thinking: messageToEdit.thinking || '', + toolCalls: messageToEdit.toolCalls || '', children: [], model: messageToEdit.model // Preserve original model info when branching }, @@ -1518,6 +1547,7 @@ class ChatStore { role: messageToEdit.role, content: newContent, thinking: messageToEdit.thinking || '', + toolCalls: messageToEdit.toolCalls || '', children: [], extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined, model: messageToEdit.model // Preserve original model info when branching @@ -1589,6 +1619,7 @@ class ChatStore { role: 'assistant', content: '', thinking: '', + toolCalls: '', children: [], model: null }, @@ -1647,6 +1678,7 @@ class ChatStore { role: 'assistant', content: '', thinking: '', + toolCalls: '', children: [], model: null }, diff --git a/tools/server/webui/src/lib/stores/database.ts b/tools/server/webui/src/lib/stores/database.ts index 6394c5b7eda74..82edcc3227c27 100644 --- a/tools/server/webui/src/lib/stores/database.ts +++ b/tools/server/webui/src/lib/stores/database.ts @@ -114,6 +114,7 @@ export class DatabaseStore { ...message, id: uuid(), parent: parentId, + toolCalls: message.toolCalls ?? '', children: [] }; @@ -154,6 +155,7 @@ export class DatabaseStore { content: '', parent: null, thinking: '', + toolCalls: '', children: [] }; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 6ebc43db0e3ef..1a8bc64989957 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -183,6 +183,23 @@ export interface ApiChatCompletionRequest { samplers?: string[]; // Custom parameters (JSON string) custom?: Record; + timings_per_token?: boolean; +} + +export interface ApiChatCompletionToolCallFunctionDelta { + name?: string; + arguments?: string; +} + +export interface ApiChatCompletionToolCallDelta { + index?: number; + id?: string; + type?: string; + function?: ApiChatCompletionToolCallFunctionDelta; +} + +export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta { + function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string }; } export interface ApiChatCompletionStreamChunk { @@ -195,6 +212,7 @@ export interface ApiChatCompletionStreamChunk { content?: string; reasoning_content?: string; model?: string; + tool_calls?: ApiChatCompletionToolCallDelta[]; }; }>; timings?: { @@ -216,6 +234,7 @@ export interface ApiChatCompletionResponse { content: string; reasoning_content?: string; model?: string; + tool_calls?: ApiChatCompletionToolCallDelta[]; }; }>; } diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts index b5318b73f4108..16debc6d67d58 100644 --- a/tools/server/webui/src/lib/types/database.d.ts +++ b/tools/server/webui/src/lib/types/database.d.ts @@ -60,6 +60,7 @@ export interface DatabaseMessage { content: string; parent: string; thinking: string; + toolCalls?: string; children: string[]; extra?: DatabaseMessageExtra[]; timings?: ChatMessageTimings; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 946ef015e92e1..b85b0597d0068 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -38,12 +38,19 @@ export interface SettingsChatServiceOptions { samplers?: string | string[]; // Custom parameters custom?: string; + timings_per_token?: boolean; // Callbacks onChunk?: (chunk: string) => void; onReasoningChunk?: (chunk: string) => void; + onToolCallChunk?: (chunk: string) => void; onModel?: (model: string) => void; onFirstValidChunk?: () => void; - onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void; + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void; onError?: (error: Error) => void; }