Merge pull request modelcontextprotocol#31 from modelcontextprotocol/justin/model-reqs-during-sampling

jspahrsummers · web-flow · commit 27317bf79086 · 2024-11-05T14:25:25.000Z
Optional model preferences during sampling
diff --git a/docs/spec/sampling.md b/docs/spec/sampling.md
@@ -35,13 +35,17 @@ A Sampling Request in the Model Context Protocol (MCP) represents a request from
 
 Message content can be either text or images, allowing for multimodal interactions where supported by the model. Text content is provided directly as strings, while image content must be base64 encoded with an appropriate MIME type.
 
+### Model Preferences
+
+Servers can express preferences for model selection using the `ModelPreferences` object. This allows servers to indicate priorities for factors like cost, speed, and intelligence, as well as provide hints for specific models.
+
 ## Use Cases
 
 Common use cases for sampling include generating responses in chat interfaces, code completion, and content generation. Here are some example sampling scenarios:
 
 ### Chat Response
 
-A server requesting a chat response:
+A server requesting a chat response with model preferences:
 
 ```json
 {
@@ -55,7 +59,16 @@ A server requesting a chat response:
     }
   ],
   "maxTokens": 100,
-  "temperature": 0.7
+  "temperature": 0.7,
+  "modelPreferences": {
+    "hints": [
+      {
+        "name": "claude-3-sonnet"
+      }
+    ],
+    "intelligencePriority": 0.8,
+    "speedPriority": 0.5
+  }
 }
 ```
 
@@ -82,7 +95,18 @@ A server requesting analysis of an image:
       }
     }
   ],
-  "maxTokens": 200
+  "maxTokens": 200,
+  "modelPreferences": {
+    "hints": [
+      {
+        "name": "claude-3-opus"
+      },
+      {
+        "name": "claude-3-sonnet"
+      }
+    ],
+    "intelligencePriority": 1.0
+  }
 }
 ```
 
@@ -126,6 +150,7 @@ To request sampling from an LLM via the client, the server MUST send a `sampling
 Method: `sampling/createMessage`
 Params:
   - `messages`: Array of `SamplingMessage` objects representing the conversation history
+  - `modelPreferences`: Optional `ModelPreferences` object to guide model selection
   - `systemPrompt`: Optional system prompt to use
   - `includeContext`: Optional request to include context from MCP servers
   - `temperature`: Optional sampling temperature
@@ -152,7 +177,19 @@ Example:
     "systemPrompt": "You are a helpful assistant.",
     "maxTokens": 100,
     "temperature": 0.7,
-    "includeContext": "none"
+    "includeContext": "none",
+    "modelPreferences": {
+      "hints": [
+        {
+          "name": "claude-3-sonnet"
+        },
+        {
+          "name": "claude-3-opus"
+        }
+      ],
+      "intelligencePriority": 0.9,
+      "speedPriority": 0.6
+    }
   }
 }
 ```
@@ -177,7 +214,7 @@ Example:
       "type": "text",
       "text": "The capital of France is Paris."
     },
-    "model": "gpt-4",
+    "model": "claude-3-sonnet-20240307",
     "stopReason": "endTurn"
   }
 }
diff --git a/schema/schema.json b/schema/schema.json
@@ -290,6 +290,10 @@
                             "properties": {},
                             "type": "object"
                         },
+                        "modelPreferences": {
+                            "$ref": "#/definitions/ModelPreferences",
+                            "description": "The server's preferences for which model to select. The client MAY ignore these preferences."
+                        },
                         "stopSequences": {
                             "items": {
                                 "type": "string"
@@ -347,20 +351,14 @@
                     "type": "string"
                 },
                 "stopReason": {
-                    "description": "The reason why sampling stopped.",
-                    "enum": [
-                        "endTurn",
-                        "maxTokens",
-                        "stopSequence"
-                    ],
+                    "description": "The reason why sampling stopped, if known.",
                     "type": "string"
                 }
             },
             "required": [
                 "content",
                 "model",
-                "role",
-                "stopReason"
+                "role"
             ],
             "type": "object"
         },
@@ -971,6 +969,37 @@
             ],
             "type": "object"
         },
+        "ModelPreferences": {
+            "description": "The server's preferences for model selection, requested of the client during sampling.\n\nBecause LLMs can vary along multiple dimensions, choosing the \"best\" model is\nrarely straightforward.  Different models excel in different areas—some are\nfaster but less capable, others are more capable but more expensive, and so\non. This interface allows servers to express their priorities across multiple\ndimensions to help clients make an appropriate selection for their use case.\n\nThese preferences are always advisory. The client MAY ignore them. It is also\nup to the client to decide how to interpret these preferences and how to\nbalance them against other considerations.",
+            "properties": {
+                "costPriority": {
+                    "description": "How much to prioritize cost when selecting a model. A value of 0 means cost\nis not important, while a value of 1 means cost is the most important\nfactor.",
+                    "maximum": 1,
+                    "minimum": 0,
+                    "type": "number"
+                },
+                "hints": {
+                    "description": "Optional string hints to use for model selection. How these hints are\ninterpreted depends on the key(s) in each record:\n\n- If the record contains a `name` key:\n   - The client SHOULD treat this as a substring of a model name; for example:\n       - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`\n       - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.\n       - `claude` should match any Claude model\n   - The client MAY also map the string to a different provider's model name or a different model family, as long as it fills a similar niche; for example:\n       - `gemini-1.5-flash` could match `claude-3-haiku-20240307`\n\nAll other keys are currently left unspecified by the spec and are up to the\nclient to interpret.\n\nIf multiple hints are specified, the client MUST evaluate them in order\n(such that the first match is taken).\n\nThe client SHOULD prioritize these hints over the numeric priorities, but\nMAY still use the priorities to select from ambiguous matches.",
+                    "items": {
+                        "$ref": "#/definitions/Record<string,string>"
+                    },
+                    "type": "array"
+                },
+                "intelligencePriority": {
+                    "description": "How much to prioritize intelligence and capabilities when selecting a\nmodel. A value of 0 means intelligence is not important, while a value of 1\nmeans intelligence is the most important factor.",
+                    "maximum": 1,
+                    "minimum": 0,
+                    "type": "number"
+                },
+                "speedPriority": {
+                    "description": "How much to prioritize sampling speed (latency) when selecting a model. A\nvalue of 0 means speed is not important, while a value of 1 means speed is\nthe most important factor.",
+                    "maximum": 1,
+                    "minimum": 0,
+                    "type": "number"
+                }
+            },
+            "type": "object"
+        },
         "Notification": {
             "properties": {
                 "method": {
@@ -1238,6 +1267,9 @@
             ],
             "type": "object"
         },
+        "Record<string,string>": {
+            "type": "object"
+        },
         "Request": {
             "properties": {
                 "method": {
diff --git a/schema/schema.ts b/schema/schema.ts
@@ -682,6 +682,10 @@ export interface CreateMessageRequest extends Request {
   method: "sampling/createMessage";
   params: {
     messages: SamplingMessage[];
+    /**
+     * The server's preferences for which model to select. The client MAY ignore these preferences.
+     */
+    modelPreferences?: ModelPreferences;
     /**
      * An optional system prompt the server wants to use for sampling. The client MAY modify or omit this prompt.
      */
@@ -715,9 +719,9 @@ export interface CreateMessageResult extends Result, SamplingMessage {
    */
   model: string;
   /**
-   * The reason why sampling stopped.
+   * The reason why sampling stopped, if known.
    */
-  stopReason: "endTurn" | "stopSequence" | "maxTokens";
+  stopReason?: "endTurn" | "stopSequence" | "maxTokens" | string;
 }
 
 /**
@@ -756,6 +760,77 @@ export interface ImageContent {
   mimeType: string;
 }
 
+/**
+ * The server's preferences for model selection, requested of the client during sampling.
+ *
+ * Because LLMs can vary along multiple dimensions, choosing the "best" model is
+ * rarely straightforward.  Different models excel in different areas—some are
+ * faster but less capable, others are more capable but more expensive, and so
+ * on. This interface allows servers to express their priorities across multiple
+ * dimensions to help clients make an appropriate selection for their use case.
+ *
+ * These preferences are always advisory. The client MAY ignore them. It is also
+ * up to the client to decide how to interpret these preferences and how to
+ * balance them against other considerations.
+ */
+export interface ModelPreferences {
+  /**
+   * Optional string hints to use for model selection. How these hints are
+   * interpreted depends on the key(s) in each record:
+   *
+   * - If the record contains a `name` key:
+   *    - The client SHOULD treat this as a substring of a model name; for example:
+   *        - `claude-3-5-sonnet` should match `claude-3-5-sonnet-20241022`
+   *        - `sonnet` should match `claude-3-5-sonnet-20241022`, `claude-3-sonnet-20240229`, etc.
+   *        - `claude` should match any Claude model
+   *    - The client MAY also map the string to a different provider's model name or a different model family, as long as it fills a similar niche; for example:
+   *        - `gemini-1.5-flash` could match `claude-3-haiku-20240307`
+   *
+   * All other keys are currently left unspecified by the spec and are up to the
+   * client to interpret.
+   *
+   * If multiple hints are specified, the client MUST evaluate them in order
+   * (such that the first match is taken).
+   *
+   * The client SHOULD prioritize these hints over the numeric priorities, but
+   * MAY still use the priorities to select from ambiguous matches.
+   */
+  hints?: Record<"name" | string, string>[];
+
+  /**
+   * How much to prioritize cost when selecting a model. A value of 0 means cost
+   * is not important, while a value of 1 means cost is the most important
+   * factor.
+   *
+   * @TJS-type number
+   * @minimum 0
+   * @maximum 1
+   */
+  costPriority?: number;
+
+  /**
+   * How much to prioritize sampling speed (latency) when selecting a model. A
+   * value of 0 means speed is not important, while a value of 1 means speed is
+   * the most important factor.
+   *
+   * @TJS-type number
+   * @minimum 0
+   * @maximum 1
+   */
+  speedPriority?: number;
+
+  /**
+   * How much to prioritize intelligence and capabilities when selecting a
+   * model. A value of 0 means intelligence is not important, while a value of 1
+   * means intelligence is the most important factor.
+   *
+   * @TJS-type number
+   * @minimum 0
+   * @maximum 1
+   */
+  intelligencePriority?: number;
+}
+
 /* Autocomplete */
 /**
  * A request from the client to the server, to ask for completion options.