From 3892adb1376e4374464e9e27749a15e44eecd49a Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Tue, 21 Oct 2025 10:22:51 -0400 Subject: [PATCH] adding reasoning effort param --- .gitignore | 2 ++ predictionguard/src/chat.py | 58 +++++++++++++++---------------------- predictionguard/version.py | 2 +- 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 170005e..ce73674 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,5 @@ venv.bak/ # JetBrains Folder .idea +# uv package management +uv.lock diff --git a/predictionguard/src/chat.py b/predictionguard/src/chat.py index 3f4c3ce..cd46a43 100644 --- a/predictionguard/src/chat.py +++ b/predictionguard/src/chat.py @@ -98,6 +98,7 @@ def create( max_tokens: Optional[int] = None, parallel_tool_calls: Optional[bool] = None, presence_penalty: Optional[float] = None, + reasoning_effort: Optional[str] = None, stop: Optional[ Union[ str, List[str] @@ -124,11 +125,13 @@ def create( :param frequency_penalty: The frequency penalty to use. :param logit_bias: The logit bias to use. :param max_completion_tokens: The maximum amount of tokens the model should return. + :param max_tokens: Deprecated, please use max_completion_tokens instead. :param parallel_tool_calls: The parallel tool calls to use. :param presence_penalty: The presence penalty to use. + :param reasoning_effort: How much effort for model to use for reasoning. Only supported by reasoning models. :param stop: The completion stopping criteria. :param stream: Option to stream the API response - :param temperature: The consistency of the model responses to the same prompt. The higher the more consistent. + :param temperature: The consistency of the model responses to the same prompt. The higher it is set, the more consistent. :param tool_choice: The tool choice to use. :param tools: Options to pass to the tool choice. :param top_p: The sampling for the model to use. @@ -157,6 +160,7 @@ def create( max_completion_tokens, parallel_tool_calls, presence_penalty, + reasoning_effort, stop, stream, temperature, @@ -182,6 +186,7 @@ def _generate_chat( max_completion_tokens, parallel_tool_calls, presence_penalty, + reasoning_effort, stop, stream, temperature, @@ -311,40 +316,23 @@ def stream_generator(url, headers, payload, stream, timeout): elif entry["type"] == "text": continue - # TODO: Remove `tool_choice` check when null value available in API - if tool_choice is None: - payload_dict = { - "model": model, - "messages": messages, - "frequency_penalty": frequency_penalty, - "logit_bias": logit_bias, - "max_completion_tokens": max_completion_tokens, - "parallel_tool_calls": parallel_tool_calls, - "presence_penalty": presence_penalty, - "stop": stop, - "stream": stream, - "temperature": temperature, - "tools": tools, - "top_p": top_p, - "top_k": top_k, - } - else: - payload_dict = { - "model": model, - "messages": messages, - "frequency_penalty": frequency_penalty, - "logit_bias": logit_bias, - "max_completion_tokens": max_completion_tokens, - "parallel_tool_calls": parallel_tool_calls, - "presence_penalty": presence_penalty, - "stop": stop, - "stream": stream, - "temperature": temperature, - "tool_choice": tool_choice, - "tools": tools, - "top_p": top_p, - "top_k": top_k, - } + payload_dict = { + "model": model, + "messages": messages, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "max_completion_tokens": max_completion_tokens, + "parallel_tool_calls": parallel_tool_calls, + "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, + "stop": stop, + "stream": stream, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + "top_k": top_k, + } if input: payload_dict["input"] = input diff --git a/predictionguard/version.py b/predictionguard/version.py index 48da1d9..7f0ba96 100644 --- a/predictionguard/version.py +++ b/predictionguard/version.py @@ -1,2 +1,2 @@ # Setting the package version -__version__ = "2.9.1" +__version__ = "2.9.2"