From 744f0c15be49e5c5d15f437cfc6edd202da358a8 Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Mon, 2 Jun 2025 13:05:03 -0400
Subject: [PATCH 1/3] adding enableOCR param to doc service

---
 predictionguard/src/documents.py | 53 +++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py
index 69b3ae9..60cb536 100644
--- a/predictionguard/src/documents.py
+++ b/predictionguard/src/documents.py
@@ -1,6 +1,3 @@
-import json
-from pyexpat import model
-
 import requests
 from typing import Any, Dict, Optional
 
@@ -10,7 +7,7 @@
 class Documents:
     """Documents allows you to extract text from various document file types.
 
-    Usage::
+    Usage:
 
         from predictionguard import PredictionGuard
 
@@ -39,20 +36,47 @@ def __init__(self, api_key, url):
 
     def create(
         self,
-        file: str
+        file: str,
+        embed_images: Optional[bool] = False,
+        output_format: Optional[str] = None,
+        chunk_document: Optional[bool] = False,
+        chunk_size: Optional[int] = None,
+        enable_ocr: Optional[bool] = True,
+        toxicity: Optional[bool] = False,
+        pii: Optional[str] = "",
+        replace_method: Optional[str] = "",
+        injection: Optional[bool] = False,
     ) -> Dict[str, Any]:
         """
         Creates a documents request to the Prediction Guard /documents/extract API
 
         :param file: Document to be parsed
+        :param embed_images: Whether to embed images into documents
+        :param output_format: Output format
+        :param chunk_document: Whether to chunk documents into chunks
+        :param chunk_size: Chunk size
+        :param enable_ocr: Whether to enable OCR
+        :param toxicity: Whether to check for output toxicity
+        :param pii: Whether to check for or replace pii
+        :param replace_method: Replace method for any PII that is present.
+        :param injection: Whether to check for prompt injection
         :result: A dictionary containing the title, content, and length of the document.
         """
 
         # Run _extract_documents
-        choices = self._extract_documents(file)
+        choices = self._extract_documents(
+            file, embed_images, output_format,
+            chunk_document, chunk_size, enable_ocr,
+            toxicity, pii, replace_method, injection
+        )
         return choices
 
-    def _extract_documents(self, file):
+    def _extract_documents(
+            self, file, embed_images,
+            output_format, chunk_document,
+            chunk_size, enable_ocr, toxicity,
+            pii, replace_method, injection
+    ):
         """
         Function to extract a document.
         """
@@ -60,13 +84,26 @@ def _extract_documents(self, file):
         headers = {
             "Authorization": "Bearer " + self.api_key,
             "User-Agent": "Prediction Guard Python Client: " + __version__,
+            "Toxicity": str(toxicity),
+            "Pii": pii,
+            "Replace-Method": replace_method,
+            "Injection": str(injection)
+        }
+
+        data = {
+            "embedImages": embed_images,
+            "outputFormat": output_format,
+            "chunkDocument": chunk_document,
+            "chunkSize": chunk_size,
+            "enableOCR": enable_ocr,
         }
 
         with open(file, "rb") as doc_file:
             files = {"file": (file, doc_file)}
 
             response = requests.request(
-                "POST", self.url + "/documents/extract", headers=headers, files=files
+                "POST", self.url + "/documents/extract",
+                headers=headers, files=files, data=data
             )
 
         # If the request was successful, print the proxies.

From 184629f473f325a75d4885eb9292a3bb9f12208e Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Thu, 26 Jun 2025 16:14:17 -0400
Subject: [PATCH 2/3] removing translate and updating env vars

---
 .github/workflows/main.yml       |  12 ++--
 .github/workflows/pr.yml         |  12 ++--
 predictionguard/src/translate.py | 108 +++----------------------------
 tests/test_chat.py               |  10 +--
 tests/test_completions.py        |   8 +--
 tests/test_translate.py          |  12 ----
 6 files changed, 29 insertions(+), 133 deletions(-)
 delete mode 100644 tests/test_translate.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8cf0b68..ff5fd52 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -20,12 +20,12 @@ jobs:
         run: pytest tests
         env:
           PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
-          PREDICTIONGUARD_URL: ${{ secrets.PREDICTIONGUARD_URL }}
-          TEST_MODEL_NAME: ${{ secrets.TEST_MODEL_NAME }}
-          TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }}
-          TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
-          TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }}
-          TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }}
+          PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }}
+          TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
+          TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
+          TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
+          TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}
+          TEST_RERANK_MODEL: ${{ vars.TEST_RERANK_MODEL }}
 
       - name: To PyPI using Flit
         uses: AsifArmanRahman/to-pypi-using-flit@v1
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 25bf448..d76b6e4 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -24,9 +24,9 @@ jobs:
       run: pytest tests
       env:
         PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY}}
-        PREDICTIONGUARD_URL: ${{ secrets.PREDICTIONGUARD_URL}}
-        TEST_MODEL_NAME: ${{ secrets.TEST_MODEL_NAME }}
-        TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }}
-        TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
-        TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }}
-        TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }}
\ No newline at end of file
+        PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL}}
+        TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }}
+        TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }}
+        TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }}
+        TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }}
+        TEST_RERANK_MODEL: ${{ vars.TEST_RERANK_MODEL }}
\ No newline at end of file
diff --git a/predictionguard/src/translate.py b/predictionguard/src/translate.py
index 336a12f..3259bdd 100644
--- a/predictionguard/src/translate.py
+++ b/predictionguard/src/translate.py
@@ -7,116 +7,24 @@
 
 
 class Translate:
-    # UNCOMMENT WHEN DEPRECATED
-    # """No longer supported.
-    # """
-    #
-    # def __init__(self, api_key, url):
-    #     self.api_key = api_key
-    #     self.url = url
-    #
-    # def create(
-    #         self,
-    #         text: Optional[str],
-    #         source_lang: Optional[str],
-    #         target_lang: Optional[str],
-    #         use_third_party_engine: Optional[bool] = False
-    #     ) -> Dict[str, Any]:
-    #     """
-    #     No longer supported
-    #     """
-    #
-    #     raise ValueError(
-    #         "The translate functionality is no longer supported."
-    #     )
-    """Translate converts text from one language to another.
-
-    Usage::
-
-        from predictionguard import PredictionGuard
-
-        # Set your Prediction Guard token as an environmental variable.
-        os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
-
-        client = PredictionGuard()
-
-        response = client.translate.create(
-            text="The sky is blue.",
-            source_lang="eng",
-            target_lang="fra",
-            use_third_party_engine=True
-        )
-
-        print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": ")))
+    """No longer supported.
     """
 
-    # REMOVE BELOW HERE FOR DEPRECATION
     def __init__(self, api_key, url):
         self.api_key = api_key
         self.url = url
 
     def create(
             self,
-            text: str,
-            source_lang: str,
-            target_lang: str,
+            text: Optional[str],
+            source_lang: Optional[str],
+            target_lang: Optional[str],
             use_third_party_engine: Optional[bool] = False
         ) -> Dict[str, Any]:
         """
-        Creates a translate request to the Prediction Guard /translate API.
-
-        :param text: The text to be translated.
-        :param source_lang: The language the text is currently in.
-        :param target_lang: The language the text will be translated to.
-        :param use_third_party_engine: A boolean for enabling translations with third party APIs.
-        :result: A dictionary containing the translate response.
-        """
-
-        # Create a list of tuples, each containing all the parameters for
-        # a call to _generate_translation
-        args = (text, source_lang, target_lang, use_third_party_engine)
-
-        # Run _generate_translation
-        choices = self._generate_translation(*args)
-        return choices
-
-    def _generate_translation(self, text, source_lang, target_lang, use_third_party_engine):
+        No longer supported
         """
-        Function to generate a translation response.
-        """
-
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": "Bearer " + self.api_key,
-            "User-Agent": "Prediction Guard Python Client: " + __version__,
-        }
-
-        payload_dict = {
-            "text": text,
-            "source_lang": source_lang,
-            "target_lang": target_lang,
-            "use_third_party_engine": use_third_party_engine
-        }
-        payload = json.dumps(payload_dict)
-        response = requests.request(
-            "POST", self.url + "/translate", headers=headers, data=payload
-        )
 
-        # If the request was successful, print the proxies.
-        if response.status_code == 200:
-            ret = response.json()
-            return ret
-        elif response.status_code == 429:
-            raise ValueError(
-                "Could not connect to Prediction Guard API. "
-                "Too many requests, rate limit or quota exceeded."
-            )
-        else:
-            # Check if there is a json body in the response. Read that in,
-            # print out the error field in the json body, and raise an exception.
-            err = ""
-            try:
-                err = response.json()["error"]
-            except Exception:
-                pass
-            raise ValueError("Could not make translation. " + err)
\ No newline at end of file
+        raise ValueError(
+            "The translate functionality is no longer supported."
+        )
\ No newline at end of file
diff --git a/tests/test_chat.py b/tests/test_chat.py
index c4c0a13..6c6ffb2 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -10,7 +10,7 @@ def test_chat_completions_create():
     test_client = PredictionGuard()
 
     response = test_client.chat.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         messages=[
             {"role": "system", "content": "You are a helpful chatbot."},
             {"role": "user", "content": "Tell me a joke."},
@@ -24,7 +24,7 @@ def test_chat_completions_create_string():
     test_client = PredictionGuard()
 
     response = test_client.chat.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         messages="Tell me a joke"
     )
 
@@ -36,7 +36,7 @@ def test_chat_completions_create_stream():
 
     response_list = []
     for res in test_client.chat.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         messages=[
             {"role": "system", "content": "You are a helpful chatbot."},
             {"role": "user", "content": "Tell me a joke."},
@@ -58,7 +58,7 @@ def test_chat_completions_create_stream_output_fail():
     response_list = []
     with pytest.raises(ValueError, match=streaming_error):
         for res in test_client.chat.completions.create(
-            model=os.environ["TEST_MODEL_NAME"],
+            model=os.environ["TEST_CHAT_MODEL"],
             messages=[
                 {"role": "system", "content": "You are a helpful chatbot."},
                 {"role": "user", "content": "Tell me a joke."},
@@ -191,7 +191,7 @@ def test_chat_completions_create_tool_call():
     test_client = PredictionGuard()
 
     response = test_client.chat.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         messages=[
             {"role": "system", "content": "You are a helpful chatbot."},
             {"role": "user", "content": "Tell me a joke."},
diff --git a/tests/test_completions.py b/tests/test_completions.py
index f3adcb7..ad72b57 100644
--- a/tests/test_completions.py
+++ b/tests/test_completions.py
@@ -9,7 +9,7 @@ def test_completions_create():
     test_client = PredictionGuard()
 
     response = test_client.completions.create(
-        model=os.environ["TEST_MODEL_NAME"], prompt="Tell me a joke"
+        model=os.environ["TEST_CHAT_MODEL"], prompt="Tell me a joke"
     )
 
     assert len(response["choices"][0]["text"]) > 0
@@ -19,7 +19,7 @@ def test_completions_create_batch():
     test_client = PredictionGuard()
 
     response = test_client.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         prompt=["Tell me a joke.", "Tell me a cool fact."],
     )
 
@@ -42,7 +42,7 @@ def test_completions_create_stream():
 
     response_list = []
     for res in test_client.completions.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         prompt="Tell me a joke.",
         stream=True,
     ):
@@ -61,7 +61,7 @@ def test_completions_create_stream_output_fail():
     response_list = []
     with pytest.raises(ValueError, match=streaming_error):
         for res in test_client.completions.create(
-            model=os.environ["TEST_MODEL_NAME"],
+            model=os.environ["TEST_CHAT_MODEL"],
             prompt="Tell me a joke.",
             stream=True,
             output={"toxicity": True},
diff --git a/tests/test_translate.py b/tests/test_translate.py
deleted file mode 100644
index ee16e46..0000000
--- a/tests/test_translate.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from predictionguard import PredictionGuard
-
-
-def test_translate_create():
-    test_client = PredictionGuard()
-
-    response = test_client.translate.create(
-        text="The sky is blue", source_lang="eng", target_lang="fra"
-    )
-
-    assert type(response["best_score"]) is float
-    assert len(response["best_translation"])
\ No newline at end of file

From e2a32f1d77ba3fa04df4294a4d7f7a5870421b5a Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Thu, 26 Jun 2025 16:38:50 -0400
Subject: [PATCH 3/3] fixing tokenize test model name

---
 tests/test_tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 39c04be..2920cde 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -7,7 +7,7 @@ def test_tokenize_create():
     test_client = PredictionGuard()
 
     response = test_client.tokenize.create(
-        model=os.environ["TEST_MODEL_NAME"],
+        model=os.environ["TEST_CHAT_MODEL"],
         input="Tokenize this please."
     )