Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,5 @@ venv.bak/
# JetBrains Folder
.idea

# uv package management
uv.lock
58 changes: 23 additions & 35 deletions predictionguard/src/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def create(
max_tokens: Optional[int] = None,
parallel_tool_calls: Optional[bool] = None,
presence_penalty: Optional[float] = None,
reasoning_effort: Optional[str] = None,
stop: Optional[
Union[
str, List[str]
Expand All @@ -124,11 +125,13 @@ def create(
:param frequency_penalty: The frequency penalty to use.
:param logit_bias: The logit bias to use.
:param max_completion_tokens: The maximum amount of tokens the model should return.
:param max_tokens: Deprecated, please use max_completion_tokens instead.
:param parallel_tool_calls: The parallel tool calls to use.
:param presence_penalty: The presence penalty to use.
:param reasoning_effort: How much effort for model to use for reasoning. Only supported by reasoning models.
:param stop: The completion stopping criteria.
:param stream: Option to stream the API response
:param temperature: The consistency of the model responses to the same prompt. The higher the more consistent.
:param temperature: The consistency of the model responses to the same prompt. The higher it is set, the more consistent.
:param tool_choice: The tool choice to use.
:param tools: Options to pass to the tool choice.
:param top_p: The sampling for the model to use.
Expand Down Expand Up @@ -157,6 +160,7 @@ def create(
max_completion_tokens,
parallel_tool_calls,
presence_penalty,
reasoning_effort,
stop,
stream,
temperature,
Expand All @@ -182,6 +186,7 @@ def _generate_chat(
max_completion_tokens,
parallel_tool_calls,
presence_penalty,
reasoning_effort,
stop,
stream,
temperature,
Expand Down Expand Up @@ -311,40 +316,23 @@ def stream_generator(url, headers, payload, stream, timeout):
elif entry["type"] == "text":
continue

# TODO: Remove `tool_choice` check when null value available in API
if tool_choice is None:
payload_dict = {
"model": model,
"messages": messages,
"frequency_penalty": frequency_penalty,
"logit_bias": logit_bias,
"max_completion_tokens": max_completion_tokens,
"parallel_tool_calls": parallel_tool_calls,
"presence_penalty": presence_penalty,
"stop": stop,
"stream": stream,
"temperature": temperature,
"tools": tools,
"top_p": top_p,
"top_k": top_k,
}
else:
payload_dict = {
"model": model,
"messages": messages,
"frequency_penalty": frequency_penalty,
"logit_bias": logit_bias,
"max_completion_tokens": max_completion_tokens,
"parallel_tool_calls": parallel_tool_calls,
"presence_penalty": presence_penalty,
"stop": stop,
"stream": stream,
"temperature": temperature,
"tool_choice": tool_choice,
"tools": tools,
"top_p": top_p,
"top_k": top_k,
}
payload_dict = {
"model": model,
"messages": messages,
"frequency_penalty": frequency_penalty,
"logit_bias": logit_bias,
"max_completion_tokens": max_completion_tokens,
"parallel_tool_calls": parallel_tool_calls,
"presence_penalty": presence_penalty,
"reasoning_effort": reasoning_effort,
"stop": stop,
"stream": stream,
"temperature": temperature,
"tool_choice": tool_choice,
"tools": tools,
"top_p": top_p,
"top_k": top_k,
}

if input:
payload_dict["input"] = input
Expand Down
2 changes: 1 addition & 1 deletion predictionguard/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Setting the package version
__version__ = "2.9.1"
__version__ = "2.9.2"
Loading