= {};
+
+ const name = toolCall.function.name?.trim();
+ if (name) {
+ fnPayload.name = name;
+ }
+
+ const rawArguments = toolCall.function.arguments?.trim();
+ if (rawArguments) {
+ try {
+ fnPayload.arguments = JSON.parse(rawArguments);
+ } catch {
+ fnPayload.arguments = rawArguments;
+ }
+ }
+
+ if (Object.keys(fnPayload).length > 0) {
+ payload.function = fnPayload;
+ }
+ }
+
+ const formattedPayload = JSON.stringify(payload, null, 2);
+
+ return {
+ label,
+ tooltip: formattedPayload,
+ copyValue: formattedPayload
+ };
+ }
+
+ function handleCopyToolCall(payload: string) {
+ void copyToClipboard(payload, 'Tool call copied to clipboard');
+ }
{/if}
+ {#if config().showToolCalls}
+ {@const toolCalls = parsedToolCalls()}
+ {@const fallbackToolCalls = fallbackToolCallContent()}
+ {#if (toolCalls && toolCalls.length > 0) || fallbackToolCalls}
+
+
+
+
+ Tool calls:
+
+
+ {#if toolCalls && toolCalls.length > 0}
+ {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)}
+ {@const badge = formatToolCallBadge(toolCall, index)}
+
+ {/each}
+ {:else if fallbackToolCalls}
+
+ {/if}
+
+ {/if}
+ {/if}
+
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
@@ -287,4 +393,17 @@
white-space: pre-wrap;
word-break: break-word;
}
+
+ .tool-call-badge {
+ max-width: 12rem;
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ }
+
+ .tool-call-badge--fallback {
+ max-width: 20rem;
+ white-space: normal;
+ word-break: break-word;
+ }
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
index e4672b787ee89..d2a0a739c54d1 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
@@ -226,6 +226,11 @@
label: 'Enable model selector',
type: 'checkbox'
},
+ {
+ key: 'showToolCalls',
+ label: 'Show tool call labels',
+ type: 'checkbox'
+ },
{
key: 'disableReasoningFormat',
label: 'Show raw LLM output',
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index c25f380846cf4..7547832d95ae1 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record =
theme: 'system',
showTokensPerSecond: false,
showThoughtInProgress: false,
+ showToolCalls: false,
disableReasoningFormat: false,
keepStatsVisible: false,
showMessageStats: true,
@@ -80,6 +81,8 @@ export const SETTING_CONFIG_INFO: Record = {
custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
showThoughtInProgress: 'Expand thought process by default when generating messages.',
+ showToolCalls:
+ 'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.',
disableReasoningFormat:
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 414e060764d7e..1908d83909eab 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -1,6 +1,25 @@
import { config } from '$lib/stores/settings.svelte';
import { selectedModelName } from '$lib/stores/models.svelte';
import { slotsService } from './slots';
+import type {
+ ApiChatCompletionRequest,
+ ApiChatCompletionResponse,
+ ApiChatCompletionStreamChunk,
+ ApiChatCompletionToolCall,
+ ApiChatCompletionToolCallDelta,
+ ApiChatMessageData
+} from '$lib/types/api';
+import type {
+ DatabaseMessage,
+ DatabaseMessageExtra,
+ DatabaseMessageExtraAudioFile,
+ DatabaseMessageExtraImageFile,
+ DatabaseMessageExtraLegacyContext,
+ DatabaseMessageExtraPdfFile,
+ DatabaseMessageExtraTextFile
+} from '$lib/types/database';
+import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
/**
* ChatService - Low-level API communication layer for llama.cpp server interactions
*
@@ -53,6 +72,7 @@ export class ChatService {
onComplete,
onError,
onReasoningChunk,
+ onToolCallChunk,
onModel,
onFirstValidChunk,
// Generation parameters
@@ -201,6 +221,7 @@ export class ChatService {
onComplete,
onError,
onReasoningChunk,
+ onToolCallChunk,
onModel,
onFirstValidChunk,
conversationId,
@@ -208,7 +229,13 @@ export class ChatService {
);
return;
} else {
- return this.handleNonStreamResponse(response, onComplete, onError, onModel);
+ return this.handleNonStreamResponse(
+ response,
+ onComplete,
+ onError,
+ onToolCallChunk,
+ onModel
+ );
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
@@ -264,10 +291,12 @@ export class ChatService {
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
+ onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onFirstValidChunk?: () => void,
conversationId?: string,
@@ -282,11 +311,53 @@ export class ChatService {
const decoder = new TextDecoder();
let aggregatedContent = '';
let fullReasoningContent = '';
+ let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
let hasReceivedData = false;
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
let firstValidChunkEmitted = false;
+ let toolCallIndexOffset = 0;
+ let hasOpenToolCallBatch = false;
+
+ const finalizeOpenToolCallBatch = () => {
+ if (!hasOpenToolCallBatch) {
+ return;
+ }
+
+ toolCallIndexOffset = aggregatedToolCalls.length;
+ hasOpenToolCallBatch = false;
+ };
+
+ const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+ if (!toolCalls || toolCalls.length === 0) {
+ return;
+ }
+
+ aggregatedToolCalls = this.mergeToolCallDeltas(
+ aggregatedToolCalls,
+ toolCalls,
+ toolCallIndexOffset
+ );
+
+ if (aggregatedToolCalls.length === 0) {
+ return;
+ }
+
+ hasOpenToolCallBatch = true;
+
+ const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+ if (!serializedToolCalls) {
+ return;
+ }
+
+ hasReceivedData = true;
+
+ if (!abortSignal?.aborted) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ };
try {
let chunk = '';
@@ -325,6 +396,7 @@ export class ChatService {
const content = parsed.choices[0]?.delta?.content;
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+ const toolCalls = parsed.choices[0]?.delta?.tool_calls;
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;
@@ -342,6 +414,7 @@ export class ChatService {
}
if (content) {
+ finalizeOpenToolCallBatch();
hasReceivedData = true;
aggregatedContent += content;
if (!abortSignal?.aborted) {
@@ -350,12 +423,15 @@ export class ChatService {
}
if (reasoningContent) {
+ finalizeOpenToolCallBatch();
hasReceivedData = true;
fullReasoningContent += reasoningContent;
if (!abortSignal?.aborted) {
onReasoningChunk?.(reasoningContent);
}
}
+
+ processToolCallDelta(toolCalls);
} catch (e) {
console.error('Error parsing JSON chunk:', e);
}
@@ -368,12 +444,26 @@ export class ChatService {
if (abortSignal?.aborted) return;
if (streamFinished) {
- if (!hasReceivedData && aggregatedContent.length === 0) {
+ finalizeOpenToolCallBatch();
+
+ if (
+ !hasReceivedData &&
+ aggregatedContent.length === 0 &&
+ aggregatedToolCalls.length === 0
+ ) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
+ const finalToolCalls =
+ aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+ onComplete?.(
+ aggregatedContent,
+ fullReasoningContent || undefined,
+ lastTimings,
+ finalToolCalls
+ );
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
@@ -386,6 +476,54 @@ export class ChatService {
}
}
+ private mergeToolCallDeltas(
+ existing: ApiChatCompletionToolCall[],
+ deltas: ApiChatCompletionToolCallDelta[],
+ indexOffset = 0
+ ): ApiChatCompletionToolCall[] {
+ const result = existing.map((call) => ({
+ ...call,
+ function: call.function ? { ...call.function } : undefined
+ }));
+
+ for (const delta of deltas) {
+ const index =
+ typeof delta.index === 'number' && delta.index >= 0
+ ? delta.index + indexOffset
+ : result.length;
+
+ while (result.length <= index) {
+ result.push({ function: undefined });
+ }
+
+ const target = result[index]!;
+
+ if (delta.id) {
+ target.id = delta.id;
+ }
+
+ if (delta.type) {
+ target.type = delta.type;
+ }
+
+ if (delta.function) {
+ const fn = target.function ? { ...target.function } : {};
+
+ if (delta.function.name) {
+ fn.name = delta.function.name;
+ }
+
+ if (delta.function.arguments) {
+ fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+ }
+
+ target.function = fn;
+ }
+ }
+
+ return result;
+ }
+
/**
* Handles non-streaming response from the chat completion API.
* Parses the JSON response and extracts the generated content.
@@ -401,9 +539,11 @@ export class ChatService {
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
onError?: (error: Error) => void,
+ onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void
): Promise {
try {
@@ -423,17 +563,31 @@ export class ChatService {
const content = data.choices[0]?.message?.content || '';
const reasoningContent = data.choices[0]?.message?.reasoning_content;
+ const toolCalls = data.choices[0]?.message?.tool_calls;
if (reasoningContent) {
console.log('Full reasoning content:', reasoningContent);
}
- if (!content.trim()) {
+ let serializedToolCalls: string | undefined;
+
+ if (toolCalls && toolCalls.length > 0) {
+ const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
+
+ if (mergedToolCalls.length > 0) {
+ serializedToolCalls = JSON.stringify(mergedToolCalls);
+ if (serializedToolCalls) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ }
+ }
+
+ if (!content.trim() && !serializedToolCalls) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(content, reasoningContent);
+ onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
return content;
} catch (error) {
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 3f97a89183d82..5b5a9d74a5bc6 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -205,6 +205,7 @@ class ChatStore {
type,
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: [],
extra: extras
},
@@ -360,6 +361,7 @@ class ChatStore {
): Promise {
let streamedContent = '';
let streamedReasoningContent = '';
+ let streamedToolCallContent = '';
let resolvedModel: string | null = null;
let modelPersisted = false;
@@ -468,6 +470,20 @@ class ChatStore {
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
},
+ onToolCallChunk: (toolCallChunk: string) => {
+ const chunk = toolCallChunk.trim();
+
+ if (!chunk) {
+ return;
+ }
+
+ streamedToolCallContent = chunk;
+
+ const messageIndex = this.findMessageIndex(assistantMessage.id);
+
+ this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent });
+ },
+
onModel: (modelName: string) => {
recordModel(modelName);
},
@@ -475,18 +491,21 @@ class ChatStore {
onComplete: async (
finalContent?: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCallContent?: string
) => {
slotsService.stopStreaming();
const updateData: {
content: string;
thinking: string;
+ toolCalls: string;
timings?: ChatMessageTimings;
model?: string;
} = {
content: finalContent || streamedContent,
thinking: reasoningContent || streamedReasoningContent,
+ toolCalls: toolCallContent || streamedToolCallContent,
timings: timings
};
@@ -499,7 +518,11 @@ class ChatStore {
const messageIndex = this.findMessageIndex(assistantMessage.id);
- const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
+ const localUpdateData: {
+ timings?: ChatMessageTimings;
+ model?: string;
+ toolCalls?: string;
+ } = {
timings: timings
};
@@ -507,6 +530,10 @@ class ChatStore {
localUpdateData.model = updateData.model;
}
+ if (updateData.toolCalls !== undefined) {
+ localUpdateData.toolCalls = updateData.toolCalls;
+ }
+
this.updateMessageAtIndex(messageIndex, localUpdateData);
await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id);
@@ -620,6 +647,7 @@ class ChatStore {
content: '',
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
@@ -1443,6 +1471,7 @@ class ChatStore {
role: messageToEdit.role,
content: newContent,
thinking: messageToEdit.thinking || '',
+ toolCalls: messageToEdit.toolCalls || '',
children: [],
model: messageToEdit.model // Preserve original model info when branching
},
@@ -1518,6 +1547,7 @@ class ChatStore {
role: messageToEdit.role,
content: newContent,
thinking: messageToEdit.thinking || '',
+ toolCalls: messageToEdit.toolCalls || '',
children: [],
extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined,
model: messageToEdit.model // Preserve original model info when branching
@@ -1589,6 +1619,7 @@ class ChatStore {
role: 'assistant',
content: '',
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
@@ -1647,6 +1678,7 @@ class ChatStore {
role: 'assistant',
content: '',
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
diff --git a/tools/server/webui/src/lib/stores/database.ts b/tools/server/webui/src/lib/stores/database.ts
index 6394c5b7eda74..82edcc3227c27 100644
--- a/tools/server/webui/src/lib/stores/database.ts
+++ b/tools/server/webui/src/lib/stores/database.ts
@@ -114,6 +114,7 @@ export class DatabaseStore {
...message,
id: uuid(),
parent: parentId,
+ toolCalls: message.toolCalls ?? '',
children: []
};
@@ -154,6 +155,7 @@ export class DatabaseStore {
content: '',
parent: null,
thinking: '',
+ toolCalls: '',
children: []
};
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index 6ebc43db0e3ef..1a8bc64989957 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -183,6 +183,23 @@ export interface ApiChatCompletionRequest {
samplers?: string[];
// Custom parameters (JSON string)
custom?: Record;
+ timings_per_token?: boolean;
+}
+
+export interface ApiChatCompletionToolCallFunctionDelta {
+ name?: string;
+ arguments?: string;
+}
+
+export interface ApiChatCompletionToolCallDelta {
+ index?: number;
+ id?: string;
+ type?: string;
+ function?: ApiChatCompletionToolCallFunctionDelta;
+}
+
+export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta {
+ function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string };
}
export interface ApiChatCompletionStreamChunk {
@@ -195,6 +212,7 @@ export interface ApiChatCompletionStreamChunk {
content?: string;
reasoning_content?: string;
model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
timings?: {
@@ -216,6 +234,7 @@ export interface ApiChatCompletionResponse {
content: string;
reasoning_content?: string;
model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
}
diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts
index b5318b73f4108..16debc6d67d58 100644
--- a/tools/server/webui/src/lib/types/database.d.ts
+++ b/tools/server/webui/src/lib/types/database.d.ts
@@ -60,6 +60,7 @@ export interface DatabaseMessage {
content: string;
parent: string;
thinking: string;
+ toolCalls?: string;
children: string[];
extra?: DatabaseMessageExtra[];
timings?: ChatMessageTimings;
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 946ef015e92e1..b85b0597d0068 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -38,12 +38,19 @@ export interface SettingsChatServiceOptions {
samplers?: string | string[];
// Custom parameters
custom?: string;
+ timings_per_token?: boolean;
// Callbacks
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
+ onToolCallChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
onFirstValidChunk?: () => void;
- onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void;
onError?: (error: Error) => void;
}