From 744f0c15be49e5c5d15f437cfc6edd202da358a8 Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Mon, 2 Jun 2025 13:05:03 -0400 Subject: [PATCH 1/3] adding enableOCR param to doc service --- predictionguard/src/documents.py | 53 +++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/predictionguard/src/documents.py b/predictionguard/src/documents.py index 69b3ae9..60cb536 100644 --- a/predictionguard/src/documents.py +++ b/predictionguard/src/documents.py @@ -1,6 +1,3 @@ -import json -from pyexpat import model - import requests from typing import Any, Dict, Optional @@ -10,7 +7,7 @@ class Documents: """Documents allows you to extract text from various document file types. - Usage:: + Usage: from predictionguard import PredictionGuard @@ -39,20 +36,47 @@ def __init__(self, api_key, url): def create( self, - file: str + file: str, + embed_images: Optional[bool] = False, + output_format: Optional[str] = None, + chunk_document: Optional[bool] = False, + chunk_size: Optional[int] = None, + enable_ocr: Optional[bool] = True, + toxicity: Optional[bool] = False, + pii: Optional[str] = "", + replace_method: Optional[str] = "", + injection: Optional[bool] = False, ) -> Dict[str, Any]: """ Creates a documents request to the Prediction Guard /documents/extract API :param file: Document to be parsed + :param embed_images: Whether to embed images into documents + :param output_format: Output format + :param chunk_document: Whether to chunk documents into chunks + :param chunk_size: Chunk size + :param enable_ocr: Whether to enable OCR + :param toxicity: Whether to check for output toxicity + :param pii: Whether to check for or replace pii + :param replace_method: Replace method for any PII that is present. + :param injection: Whether to check for prompt injection :result: A dictionary containing the title, content, and length of the document. """ # Run _extract_documents - choices = self._extract_documents(file) + choices = self._extract_documents( + file, embed_images, output_format, + chunk_document, chunk_size, enable_ocr, + toxicity, pii, replace_method, injection + ) return choices - def _extract_documents(self, file): + def _extract_documents( + self, file, embed_images, + output_format, chunk_document, + chunk_size, enable_ocr, toxicity, + pii, replace_method, injection + ): """ Function to extract a document. """ @@ -60,13 +84,26 @@ def _extract_documents(self, file): headers = { "Authorization": "Bearer " + self.api_key, "User-Agent": "Prediction Guard Python Client: " + __version__, + "Toxicity": str(toxicity), + "Pii": pii, + "Replace-Method": replace_method, + "Injection": str(injection) + } + + data = { + "embedImages": embed_images, + "outputFormat": output_format, + "chunkDocument": chunk_document, + "chunkSize": chunk_size, + "enableOCR": enable_ocr, } with open(file, "rb") as doc_file: files = {"file": (file, doc_file)} response = requests.request( - "POST", self.url + "/documents/extract", headers=headers, files=files + "POST", self.url + "/documents/extract", + headers=headers, files=files, data=data ) # If the request was successful, print the proxies. From 184629f473f325a75d4885eb9292a3bb9f12208e Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Thu, 26 Jun 2025 16:14:17 -0400 Subject: [PATCH 2/3] removing translate and updating env vars --- .github/workflows/main.yml | 12 ++-- .github/workflows/pr.yml | 12 ++-- predictionguard/src/translate.py | 108 +++---------------------------- tests/test_chat.py | 10 +-- tests/test_completions.py | 8 +-- tests/test_translate.py | 12 ---- 6 files changed, 29 insertions(+), 133 deletions(-) delete mode 100644 tests/test_translate.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8cf0b68..ff5fd52 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,12 +20,12 @@ jobs: run: pytest tests env: PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }} - PREDICTIONGUARD_URL: ${{ secrets.PREDICTIONGUARD_URL }} - TEST_MODEL_NAME: ${{ secrets.TEST_MODEL_NAME }} - TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }} - TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} - TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }} - TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }} + PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL }} + TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }} + TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }} + TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} + TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }} + TEST_RERANK_MODEL: ${{ vars.TEST_RERANK_MODEL }} - name: To PyPI using Flit uses: AsifArmanRahman/to-pypi-using-flit@v1 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 25bf448..d76b6e4 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -24,9 +24,9 @@ jobs: run: pytest tests env: PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY}} - PREDICTIONGUARD_URL: ${{ secrets.PREDICTIONGUARD_URL}} - TEST_MODEL_NAME: ${{ secrets.TEST_MODEL_NAME }} - TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }} - TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} - TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }} - TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }} \ No newline at end of file + PREDICTIONGUARD_URL: ${{ vars.PREDICTIONGUARD_URL}} + TEST_CHAT_MODEL: ${{ vars.TEST_CHAT_MODEL }} + TEST_TEXT_EMBEDDINGS_MODEL: ${{ vars.TEST_TEXT_EMBEDDINGS_MODEL }} + TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ vars.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} + TEST_VISION_MODEL: ${{ vars.TEST_VISION_MODEL }} + TEST_RERANK_MODEL: ${{ vars.TEST_RERANK_MODEL }} \ No newline at end of file diff --git a/predictionguard/src/translate.py b/predictionguard/src/translate.py index 336a12f..3259bdd 100644 --- a/predictionguard/src/translate.py +++ b/predictionguard/src/translate.py @@ -7,116 +7,24 @@ class Translate: - # UNCOMMENT WHEN DEPRECATED - # """No longer supported. - # """ - # - # def __init__(self, api_key, url): - # self.api_key = api_key - # self.url = url - # - # def create( - # self, - # text: Optional[str], - # source_lang: Optional[str], - # target_lang: Optional[str], - # use_third_party_engine: Optional[bool] = False - # ) -> Dict[str, Any]: - # """ - # No longer supported - # """ - # - # raise ValueError( - # "The translate functionality is no longer supported." - # ) - """Translate converts text from one language to another. - - Usage:: - - from predictionguard import PredictionGuard - - # Set your Prediction Guard token as an environmental variable. - os.environ["PREDICTIONGUARD_API_KEY"] = "" - - client = PredictionGuard() - - response = client.translate.create( - text="The sky is blue.", - source_lang="eng", - target_lang="fra", - use_third_party_engine=True - ) - - print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": "))) + """No longer supported. """ - # REMOVE BELOW HERE FOR DEPRECATION def __init__(self, api_key, url): self.api_key = api_key self.url = url def create( self, - text: str, - source_lang: str, - target_lang: str, + text: Optional[str], + source_lang: Optional[str], + target_lang: Optional[str], use_third_party_engine: Optional[bool] = False ) -> Dict[str, Any]: """ - Creates a translate request to the Prediction Guard /translate API. - - :param text: The text to be translated. - :param source_lang: The language the text is currently in. - :param target_lang: The language the text will be translated to. - :param use_third_party_engine: A boolean for enabling translations with third party APIs. - :result: A dictionary containing the translate response. - """ - - # Create a list of tuples, each containing all the parameters for - # a call to _generate_translation - args = (text, source_lang, target_lang, use_third_party_engine) - - # Run _generate_translation - choices = self._generate_translation(*args) - return choices - - def _generate_translation(self, text, source_lang, target_lang, use_third_party_engine): + No longer supported """ - Function to generate a translation response. - """ - - headers = { - "Content-Type": "application/json", - "Authorization": "Bearer " + self.api_key, - "User-Agent": "Prediction Guard Python Client: " + __version__, - } - - payload_dict = { - "text": text, - "source_lang": source_lang, - "target_lang": target_lang, - "use_third_party_engine": use_third_party_engine - } - payload = json.dumps(payload_dict) - response = requests.request( - "POST", self.url + "/translate", headers=headers, data=payload - ) - # If the request was successful, print the proxies. - if response.status_code == 200: - ret = response.json() - return ret - elif response.status_code == 429: - raise ValueError( - "Could not connect to Prediction Guard API. " - "Too many requests, rate limit or quota exceeded." - ) - else: - # Check if there is a json body in the response. Read that in, - # print out the error field in the json body, and raise an exception. - err = "" - try: - err = response.json()["error"] - except Exception: - pass - raise ValueError("Could not make translation. " + err) \ No newline at end of file + raise ValueError( + "The translate functionality is no longer supported." + ) \ No newline at end of file diff --git a/tests/test_chat.py b/tests/test_chat.py index c4c0a13..6c6ffb2 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -10,7 +10,7 @@ def test_chat_completions_create(): test_client = PredictionGuard() response = test_client.chat.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], messages=[ {"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Tell me a joke."}, @@ -24,7 +24,7 @@ def test_chat_completions_create_string(): test_client = PredictionGuard() response = test_client.chat.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], messages="Tell me a joke" ) @@ -36,7 +36,7 @@ def test_chat_completions_create_stream(): response_list = [] for res in test_client.chat.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], messages=[ {"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Tell me a joke."}, @@ -58,7 +58,7 @@ def test_chat_completions_create_stream_output_fail(): response_list = [] with pytest.raises(ValueError, match=streaming_error): for res in test_client.chat.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], messages=[ {"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Tell me a joke."}, @@ -191,7 +191,7 @@ def test_chat_completions_create_tool_call(): test_client = PredictionGuard() response = test_client.chat.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], messages=[ {"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Tell me a joke."}, diff --git a/tests/test_completions.py b/tests/test_completions.py index f3adcb7..ad72b57 100644 --- a/tests/test_completions.py +++ b/tests/test_completions.py @@ -9,7 +9,7 @@ def test_completions_create(): test_client = PredictionGuard() response = test_client.completions.create( - model=os.environ["TEST_MODEL_NAME"], prompt="Tell me a joke" + model=os.environ["TEST_CHAT_MODEL"], prompt="Tell me a joke" ) assert len(response["choices"][0]["text"]) > 0 @@ -19,7 +19,7 @@ def test_completions_create_batch(): test_client = PredictionGuard() response = test_client.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], prompt=["Tell me a joke.", "Tell me a cool fact."], ) @@ -42,7 +42,7 @@ def test_completions_create_stream(): response_list = [] for res in test_client.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], prompt="Tell me a joke.", stream=True, ): @@ -61,7 +61,7 @@ def test_completions_create_stream_output_fail(): response_list = [] with pytest.raises(ValueError, match=streaming_error): for res in test_client.completions.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], prompt="Tell me a joke.", stream=True, output={"toxicity": True}, diff --git a/tests/test_translate.py b/tests/test_translate.py deleted file mode 100644 index ee16e46..0000000 --- a/tests/test_translate.py +++ /dev/null @@ -1,12 +0,0 @@ -from predictionguard import PredictionGuard - - -def test_translate_create(): - test_client = PredictionGuard() - - response = test_client.translate.create( - text="The sky is blue", source_lang="eng", target_lang="fra" - ) - - assert type(response["best_score"]) is float - assert len(response["best_translation"]) \ No newline at end of file From e2a32f1d77ba3fa04df4294a4d7f7a5870421b5a Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Thu, 26 Jun 2025 16:38:50 -0400 Subject: [PATCH 3/3] fixing tokenize test model name --- tests/test_tokenize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 39c04be..2920cde 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -7,7 +7,7 @@ def test_tokenize_create(): test_client = PredictionGuard() response = test_client.tokenize.create( - model=os.environ["TEST_MODEL_NAME"], + model=os.environ["TEST_CHAT_MODEL"], input="Tokenize this please." )