Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions client/inprocess_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func TestInProcessMCPClient(t *testing.T) {
Type: "text",
Text: "Input parameter: " + request.Params.Arguments["parameter-1"].(string),
},
mcp.AudioContent{
Type: "audio",
Data: "base64-encoded-audio-data",
MIMEType: "audio/wav",
},
},
}, nil
})
Expand Down Expand Up @@ -77,6 +82,14 @@ func TestInProcessMCPClient(t *testing.T) {
Text: "Test prompt with arg1: " + request.Params.Arguments["arg1"],
},
},
{
Role: mcp.RoleUser,
Content: mcp.AudioContent{
Type: "audio",
Data: "base64-encoded-audio-data",
MIMEType: "audio/wav",
},
},
},
}, nil
},
Expand Down Expand Up @@ -192,8 +205,8 @@ func TestInProcessMCPClient(t *testing.T) {
t.Fatalf("CallTool failed: %v", err)
}

if len(result.Content) != 1 {
t.Errorf("Expected 1 content item, got %d", len(result.Content))
if len(result.Content) != 2 {
t.Errorf("Expected 2 content item, got %d", len(result.Content))
}
})

Expand Down Expand Up @@ -359,14 +372,17 @@ func TestInProcessMCPClient(t *testing.T) {

request := mcp.GetPromptRequest{}
request.Params.Name = "test-prompt"
request.Params.Arguments = map[string]string{
"arg1": "arg1 value",
}

result, err := client.GetPrompt(context.Background(), request)
if err != nil {
t.Errorf("GetPrompt failed: %v", err)
}

if len(result.Messages) != 1 {
t.Errorf("Expected 1 message, got %d", len(result.Messages))
if len(result.Messages) != 2 {
t.Errorf("Expected 2 message, got %d", len(result.Messages))
}
})

Expand Down
2 changes: 1 addition & 1 deletion mcp/prompts.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ const (
// resources from the MCP server.
type PromptMessage struct {
Role Role `json:"role"`
Content Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
Content Content `json:"content"` // Can be TextContent, ImageContent, AudioContent or EmbeddedResource
}

// PromptListChangedNotification is an optional notification from the server
Expand Down
2 changes: 1 addition & 1 deletion mcp/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type ListToolsResult struct {
// should be reported as an MCP error response.
type CallToolResult struct {
Result
Content []Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
Content []Content `json:"content"` // Can be TextContent, ImageContent, AudioContent, or EmbeddedResource
// Whether the tool call ended in an error.
//
// If not set, this is assumed to be false (the call was successful).
Expand Down
15 changes: 14 additions & 1 deletion mcp/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ type CreateMessageResult struct {
// SamplingMessage describes a message issued to or received from an LLM API.
type SamplingMessage struct {
Role Role `json:"role"`
Content interface{} `json:"content"` // Can be TextContent or ImageContent
Content interface{} `json:"content"` // Can be TextContent, ImageContent or AudioContent
}

type Annotations struct {
Expand Down Expand Up @@ -709,6 +709,19 @@ type ImageContent struct {

func (ImageContent) isContent() {}

// AudioContent represents the contents of audio, embedded into a prompt or tool call result.
// It must have Type set to "audio".
type AudioContent struct {
Annotated
Type string `json:"type"` // Must be "audio"
// The base64-encoded audio data.
Data string `json:"data"`
// The MIME type of the audio. Different providers may support different audio types.
MIMEType string `json:"mimeType"`
}

func (AudioContent) isContent() {}

// EmbeddedResource represents the contents of a resource, embedded into a prompt or tool call result.
//
// It is up to the client how best to render embedded resources for the
Expand Down
40 changes: 39 additions & 1 deletion mcp/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ func AsImageContent(content interface{}) (*ImageContent, bool) {
return asType[ImageContent](content)
}

// AsAudioContent attempts to cast the given interface to AudioContent
func AsAudioContent(content interface{}) (*AudioContent, bool) {
return asType[AudioContent](content)
}

// AsEmbeddedResource attempts to cast the given interface to EmbeddedResource
func AsEmbeddedResource(content interface{}) (*EmbeddedResource, bool) {
return asType[EmbeddedResource](content)
Expand Down Expand Up @@ -208,7 +213,15 @@ func NewImageContent(data, mimeType string) ImageContent {
}
}

// NewEmbeddedResource
// Helper function to create a new AudioContent
func NewAudioContent(data, mimeType string) AudioContent {
return AudioContent{
Type: "audio",
Data: data,
MIMEType: mimeType,
}
}

// Helper function to create a new EmbeddedResource
func NewEmbeddedResource(resource ResourceContents) EmbeddedResource {
return EmbeddedResource{
Expand Down Expand Up @@ -246,6 +259,23 @@ func NewToolResultImage(text, imageData, mimeType string) *CallToolResult {
}
}

// NewToolResultAudio creates a new CallToolResult with both text and audio content
func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
return &CallToolResult{
Content: []Content{
TextContent{
Type: "text",
Text: text,
},
AudioContent{
Type: "audio",
Data: imageData,
MIMEType: mimeType,
},
},
}
}

// NewToolResultResource creates a new CallToolResult with an embedded resource
func NewToolResultResource(
text string,
Expand Down Expand Up @@ -423,6 +453,14 @@ func ParseContent(contentMap map[string]any) (Content, error) {
}
return NewImageContent(data, mimeType), nil

case "audio":
data := ExtractString(contentMap, "data")
mimeType := ExtractString(contentMap, "mimeType")
if data == "" || mimeType == "" {
return nil, fmt.Errorf("audio data or mimeType is missing")
}
return NewAudioContent(data, mimeType), nil

case "resource":
resourceMap := ExtractMap(contentMap, "resource")
if resourceMap == nil {
Expand Down