From 7de6367e5f504e3dfa47695d3a7e45a42485fb55 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:41 -0700 Subject: [PATCH 01/38] Update task_query_response.prompty remove required keys --- .../simulator/_prompty/task_query_response.prompty | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 881d00493ff8..42a5d3fe4e37 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,11 +3,6 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - api_key: ${env:AZURE_OPENAI_API_KEY} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From f288b341820d9f54f7830dae8f841035b4f30df6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:54 -0700 Subject: [PATCH 02/38] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 7dce5e28a6d1..1d8e360b56b9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From 2a4b6f744a9a6c8faee8c742f0ad55d5cf82b922 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:21:58 -0700 Subject: [PATCH 03/38] Update task_query_response.prompty --- .../evaluation/simulator/_prompty/task_query_response.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 42a5d3fe4e37..b8c04fb19ef1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From c8ce251bc34b2c3913f1d7e793ed65292e6a2e24 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:22:17 -0700 Subject: [PATCH 04/38] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 1d8e360b56b9..7dce5e28a6d1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From e4cdd30f1189977531d90f89dff8248e41537f23 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 9 Oct 2024 14:24:35 -0700 Subject: [PATCH 05/38] Fix the api_key needed --- .../azure/ai/evaluation/_model_configurations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py index 43114d3605c3..f9b8d64c9d5d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py @@ -16,7 +16,7 @@ class AzureOpenAIModelConfiguration(TypedDict): """Name of Azure OpenAI deployment to make request to""" azure_endpoint: str """Endpoint of Azure OpenAI resource to make request to""" - api_key: str + api_key: NotRequired[str] """API key of Azure OpenAI resource""" api_version: NotRequired[str] """(Optional) API version to use in request to Azure OpenAI deployment""" From b478651c1c77e137f535e92997770c4873edc917 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 16 Oct 2024 09:45:04 -0700 Subject: [PATCH 06/38] Update for release --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index f7da251f03bd..0e92ee34a330 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,8 +1,6 @@ # Release History -## 1.0.0b4 (Unreleased) - -### Features Added +## 1.0.0b4 (2024-10-16) ### Breaking Changes From 8e5a264b835c184295c396e6816b747d64f158a0 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 16 Oct 2024 10:49:11 -0700 Subject: [PATCH 07/38] Black fix for file --- .../azure/ai/evaluation/simulator/_helpers/_experimental.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py index 6728a61649c6..ca676c9bcdc9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py @@ -27,13 +27,11 @@ @overload -def experimental(wrapped: Type[T]) -> Type[T]: - ... +def experimental(wrapped: Type[T]) -> Type[T]: ... @overload -def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: - ... +def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ... def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]: From 3a80606d08c319a9c6879e772d84aced41c2fd19 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 17 Oct 2024 14:12:06 -0700 Subject: [PATCH 08/38] Add original text in global context --- .../azure/ai/evaluation/simulator/_simulator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 06a62a97781a..1a4b52fa7a5f 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -172,6 +172,7 @@ async def __call__( user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, api_call_delay_sec=api_call_delay_sec, + text=text ) async def _simulate_with_predefined_turns( @@ -497,6 +498,7 @@ async def _create_conversations_from_query_responses( user_simulator_prompty_kwargs: Dict[str, Any], target: Callable, api_call_delay_sec: float, + text: str, ) -> List[JsonLineChatProtocol]: """ Creates full conversations from query-response pairs. @@ -515,6 +517,8 @@ async def _create_conversations_from_query_responses( :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float + :keyword text: The initial input text for generating query responses. + :paramtype text: str :return: A list of simulated conversations represented as JsonLineChatProtocol objects. :rtype: List[JsonLineChatProtocol] """ @@ -552,6 +556,7 @@ async def _create_conversations_from_query_responses( "task": task, "expected_response": response, "query": query, + "original_text": text, }, "$schema": "http://azureml/sdk-2-0/ChatConversation.json", } From 6768f9a5f0a8449f1e172f3eaf68a1bd5afbc3b7 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 17 Oct 2024 14:13:47 -0700 Subject: [PATCH 09/38] Update test --- .../tests/unittests/test_non_adv_simulator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index b98d5940bba6..592abfa0dde3 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -353,6 +353,7 @@ async def test_create_conversations_from_query_responses( api_call_delay_sec=1, user_simulator_prompty=None, user_simulator_prompty_kwargs={}, + text="some text", ) assert len(result) == 1 From f7cc4bb1b3f7f8de6c73f41eeec20ed6702ea772 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:38:43 -0700 Subject: [PATCH 10/38] Update the indirect attack simulator --- .../simulator/_indirect_attack_simulator.py | 107 ++++++++++++------ 1 file changed, 74 insertions(+), 33 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 83f17254be3c..ce4178274fb1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -3,15 +3,18 @@ # --------------------------------------------------------- # pylint: disable=C0301,C0114,R0913,R0903 # noqa: E501 +import asyncio import logging from typing import Callable, cast +from tqdm import tqdm + from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException -from azure.ai.evaluation.simulator import AdversarialScenario +from azure.ai.evaluation.simulator import AdversarialScenario, SupportedLanguages from azure.core.credentials import TokenCredential -from ._adversarial_simulator import AdversarialSimulator +from ._adversarial_simulator import AdversarialSimulator, JsonLineList from ._helpers import experimental from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope @@ -19,7 +22,7 @@ @experimental -class IndirectAttackSimulator: +class IndirectAttackSimulator(AdversarialSimulator): """ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope. @@ -69,29 +72,22 @@ def _ensure_service_dependencies(self): async def __call__( self, *, - scenario: AdversarialScenario, target: Callable, - max_conversation_turns: int = 1, max_simulation_results: int = 3, api_call_retry_limit: int = 3, api_call_retry_sleep_sec: int = 1, api_call_delay_sec: int = 0, concurrent_async_task: int = 3, + **kwargs, ): """ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope. This simulator converses with your AI system using prompts injected into the context to interrupt normal expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside the scope of your AI system. - - :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs. - :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario :keyword target: The target function to simulate adversarial inputs against. This function should be asynchronous and accept a dictionary representing the adversarial input. :paramtype target: Callable - :keyword max_conversation_turns: The maximum number of conversation turns to simulate. - Defaults to 1. - :paramtype max_conversation_turns: int :keyword max_simulation_results: The maximum number of simulation results to return. Defaults to 3. :paramtype max_simulation_results: int @@ -128,11 +124,11 @@ async def __call__( 'template_parameters': {}, 'messages': [ { - 'content': ' ', + 'content': '', 'role': 'user' }, { - 'content': "", + 'content': "", 'role': 'assistant', 'context': None } @@ -141,25 +137,70 @@ async def __call__( }] } """ - if scenario not in AdversarialScenario.__members__.values(): - msg = f"Invalid scenario: {scenario}. Supported scenarios: {AdversarialScenario.__members__.values()}" - raise EvaluationException( - message=msg, - internal_message=msg, - target=ErrorTarget.DIRECT_ATTACK_SIMULATOR, - category=ErrorCategory.INVALID_VALUE, - blame=ErrorBlame.USER_ERROR, + # values that cannot be changed: + scenario = AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK + max_conversation_turns = 2 + language = SupportedLanguages.English + self._ensure_service_dependencies() + templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value) + concurrent_async_task = min(concurrent_async_task, 1000) + semaphore = asyncio.Semaphore(concurrent_async_task) + sim_results = [] + tasks = [] + total_tasks = sum(len(t.template_parameters) for t in templates) + if max_simulation_results > total_tasks: + logger.warning( + "Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s." + "\n %s simulations will be generated.", + max_simulation_results, + total_tasks, + total_tasks, ) - jb_sim = AdversarialSimulator(azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential) - jb_sim_results = await jb_sim( - scenario=scenario, - target=target, - max_conversation_turns=max_conversation_turns, - max_simulation_results=max_simulation_results, - api_call_retry_limit=api_call_retry_limit, - api_call_retry_sleep_sec=api_call_retry_sleep_sec, - api_call_delay_sec=api_call_delay_sec, - concurrent_async_task=concurrent_async_task, - _jailbreak_type="xpia", + total_tasks = min(total_tasks, max_simulation_results) + progress_bar = tqdm( + total=total_tasks, + desc="generating jailbreak simulations", + ncols=100, + unit="simulations", ) - return jb_sim_results + for template in templates: + for parameter in template.template_parameters: + tasks.append( + asyncio.create_task( + self._simulate_async( + target=target, + template=template, + parameters=parameter, + max_conversation_turns=max_conversation_turns, + api_call_retry_limit=api_call_retry_limit, + api_call_retry_sleep_sec=api_call_retry_sleep_sec, + api_call_delay_sec=api_call_delay_sec, + language=language, + semaphore=semaphore, + ) + ) + ) + if len(tasks) >= max_simulation_results: + break + if len(tasks) >= max_simulation_results: + break + for task in asyncio.as_completed(tasks): + completed_task = await task + template_parameters = completed_task.get("template_parameters", {}) + xpia_attack_type = template_parameters.get("xpia_attack_type", "") + action = template_parameters.get("action", "") + document_type = template_parameters.get("document_type", "") + sim_results.append({ + "messages": completed_task["messages"], + "$schema": "http://azureml/sdk-2-0/ChatConversation.json", + "template_parameters": { + "metadata": { + "xpia_attack_type": xpia_attack_type, + "action": action, + "document_type": document_type, + }, + }, + }) + progress_bar.update(1) + progress_bar.close() + return JsonLineList(sim_results) From 07eb46678ecf0723ecbb6ea0265fdade0abb7185 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:39:43 -0700 Subject: [PATCH 11/38] Black suggested fixes --- .../simulator/_indirect_attack_simulator.py | 22 ++++++++++--------- .../ai/evaluation/simulator/_simulator.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index ce4178274fb1..79b987f2a595 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -190,17 +190,19 @@ async def __call__( xpia_attack_type = template_parameters.get("xpia_attack_type", "") action = template_parameters.get("action", "") document_type = template_parameters.get("document_type", "") - sim_results.append({ - "messages": completed_task["messages"], - "$schema": "http://azureml/sdk-2-0/ChatConversation.json", - "template_parameters": { - "metadata": { - "xpia_attack_type": xpia_attack_type, - "action": action, - "document_type": document_type, + sim_results.append( + { + "messages": completed_task["messages"], + "$schema": "http://azureml/sdk-2-0/ChatConversation.json", + "template_parameters": { + "metadata": { + "xpia_attack_type": xpia_attack_type, + "action": action, + "document_type": document_type, + }, }, - }, - }) + } + ) progress_bar.update(1) progress_bar.close() return JsonLineList(sim_results) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 1a4b52fa7a5f..f2c529cd011c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -172,7 +172,7 @@ async def __call__( user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, api_call_delay_sec=api_call_delay_sec, - text=text + text=text, ) async def _simulate_with_predefined_turns( @@ -517,7 +517,7 @@ async def _create_conversations_from_query_responses( :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float - :keyword text: The initial input text for generating query responses. + :keyword text: The initial input text for generating query responses. :paramtype text: str :return: A list of simulated conversations represented as JsonLineChatProtocol objects. :rtype: List[JsonLineChatProtocol] From 942bfd59e68ffaae698369ccfd0bde89bad30a50 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:41:06 -0700 Subject: [PATCH 12/38] Update simulator prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 1d8e360b56b9..4aa4af9d6a3e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -25,7 +25,7 @@ Output must be in JSON format Here's a sample output: { "content": "Here is my follow-up question.", - "user": "user" + "role": "user" } Output with a json object that continues the conversation, given the conversation history: From 98cad972ce8d9d2012ffce1002f482f2be2212ad Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:47:00 -0700 Subject: [PATCH 13/38] Update adversarial scenario enum to exclude XPIA --- .../azure/ai/evaluation/simulator/__init__.py | 3 ++- .../azure/ai/evaluation/simulator/_adversarial_scenario.py | 5 +++++ .../ai/evaluation/simulator/_indirect_attack_simulator.py | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py index 9011665f66b6..c05842651b2f 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py @@ -1,4 +1,4 @@ -from ._adversarial_scenario import AdversarialScenario +from ._adversarial_scenario import AdversarialScenario, AdversarialScenarioJailbreak from ._adversarial_simulator import AdversarialSimulator from ._constants import SupportedLanguages from ._direct_attack_simulator import DirectAttackSimulator @@ -8,6 +8,7 @@ __all__ = [ "AdversarialSimulator", "AdversarialScenario", + "AdversarialScenarioJailbreak", "DirectAttackSimulator", "IndirectAttackSimulator", "SupportedLanguages", diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py index 8588bf0d3947..a8b4489b130d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py @@ -16,6 +16,11 @@ class AdversarialScenario(Enum): ADVERSARIAL_CONTENT_GEN_UNGROUNDED = "adv_content_gen_ungrounded" ADVERSARIAL_CONTENT_GEN_GROUNDED = "adv_content_gen_grounded" ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material" + + +class AdversarialScenarioJailbreak(Enum): + """Adversarial scenario types for XPIA Jailbreak""" + ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia" diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 39ea74ece410..bcb4548d08bd 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -12,7 +12,7 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException -from azure.ai.evaluation.simulator import AdversarialScenario, SupportedLanguages +from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages from azure.core.credentials import TokenCredential from ._adversarial_simulator import AdversarialSimulator, JsonLineList @@ -140,7 +140,7 @@ async def __call__( } """ # values that cannot be changed: - scenario = AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK + scenario = AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK max_conversation_turns = 2 language = SupportedLanguages.English self._ensure_service_dependencies() From d5103169f8dbb807dcf3cf143f4d04796912efff Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:49:12 -0700 Subject: [PATCH 14/38] Update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index e21b4b803103..152233879dac 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,7 @@ ### Breaking Changes - Renamed environment variable `PF_EVALS_BATCH_USE_ASYNC` to `AI_EVALS_BATCH_USE_ASYNC`. +- AdversarialScenario enum does not include `ADVERSARIAL_INDIRECT_JAILBREAK`, invoking IndirectJailbreak or XPIA should be done with `IndirectAttackSimulator` ### Bugs Fixed - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format From 742943ef7ed2c26256570c8f55638ccee2a31ab5 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:49:27 -0700 Subject: [PATCH 15/38] Black fixes --- .../azure/ai/evaluation/_common/_experimental.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py index 6728a61649c6..ca676c9bcdc9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py @@ -27,13 +27,11 @@ @overload -def experimental(wrapped: Type[T]) -> Type[T]: - ... +def experimental(wrapped: Type[T]) -> Type[T]: ... @overload -def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: - ... +def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ... def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]: From 12e06155f2b8068886b56ac5ad7c9c16787ddf87 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:52:03 -0700 Subject: [PATCH 16/38] Remove duplicate import --- .../azure/ai/evaluation/simulator/_indirect_attack_simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index bcb4548d08bd..dc3c92789330 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -16,7 +16,6 @@ from azure.core.credentials import TokenCredential from ._adversarial_simulator import AdversarialSimulator, JsonLineList -from ._helpers import experimental from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope From de32b50eb491ad46b5c35fe333eebad9c7e852be Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 18:16:38 -0700 Subject: [PATCH 17/38] Fix the mypy error --- .../simulator/_indirect_attack_simulator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index dc3c92789330..e9426a309799 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -5,7 +5,7 @@ # noqa: E501 import asyncio import logging -from typing import Callable, cast +from typing import Any, Callable, Dict, cast from tqdm import tqdm @@ -58,6 +58,7 @@ def __init__(self, *, azure_ai_project: dict, credential): self.adversarial_template_handler = AdversarialTemplateHandler( azure_ai_project=self.azure_ai_project, rai_client=self.rai_client ) + super().__init__(azure_ai_project=azure_ai_project, credential=credential) def _ensure_service_dependencies(self): if self.rai_client is None: @@ -186,11 +187,11 @@ async def __call__( if len(tasks) >= max_simulation_results: break for task in asyncio.as_completed(tasks): - completed_task = await task - template_parameters = completed_task.get("template_parameters", {}) - xpia_attack_type = template_parameters.get("xpia_attack_type", "") - action = template_parameters.get("action", "") - document_type = template_parameters.get("document_type", "") + completed_task: Dict[str, Any] = await task + template_parameters: Dict[str, Any] = completed_task.get("template_parameters", {}) + xpia_attack_type: str = template_parameters.get("xpia_attack_type", "") + action: str = template_parameters.get("action", "") + document_type: str = template_parameters.get("document_type", "") sim_results.append( { "messages": completed_task["messages"], From 4b6413237d638bad6333e56127953a278096114e Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 21 Oct 2024 09:21:55 -0700 Subject: [PATCH 18/38] Mypy please be happy --- .../simulator/_indirect_attack_simulator.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index e9426a309799..3ffc559d18a6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -5,7 +5,7 @@ # noqa: E501 import asyncio import logging -from typing import Any, Callable, Dict, cast +from typing import Callable, cast from tqdm import tqdm @@ -187,14 +187,14 @@ async def __call__( if len(tasks) >= max_simulation_results: break for task in asyncio.as_completed(tasks): - completed_task: Dict[str, Any] = await task - template_parameters: Dict[str, Any] = completed_task.get("template_parameters", {}) - xpia_attack_type: str = template_parameters.get("xpia_attack_type", "") - action: str = template_parameters.get("action", "") - document_type: str = template_parameters.get("document_type", "") + completed_task = await task # type: ignore + template_parameters = completed_task.get("template_parameters", {}) # type: ignore + xpia_attack_type = template_parameters.get("xpia_attack_type", "") # type: ignore + action = template_parameters.get("action", "") # type: ignore + document_type = template_parameters.get("document_type", "") # type: ignore sim_results.append( { - "messages": completed_task["messages"], + "messages": completed_task["messages"], # type: ignore "$schema": "http://azureml/sdk-2-0/ChatConversation.json", "template_parameters": { "metadata": { From 1c0b4dd68c32d9c2363657616c6724eef0b2b238 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 22 Oct 2024 08:14:38 -0700 Subject: [PATCH 19/38] Updates to non adv simulator --- sdk/evaluation/azure-ai-evaluation/README.md | 44 +++++++++---------- .../_prompty/task_query_response.prompty | 8 ++-- .../simulator/_prompty/task_simulate.prompty | 5 +++ .../ai/evaluation/simulator/_simulator.py | 29 +++++++----- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index a657c4d55577..bfe1a07e31df 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -199,28 +199,28 @@ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%. <|text_end|> Output with 5 QnAs: -[ - { - "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", - "r": "January 24, 1984" - }, - { - "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?", - "r": "Steve Jobs" - }, - { - "q": "What percent of the desktop share did Apple have in the United States in late 2003?", - "r": "2.06 percent" - }, - { - "q": "What were the research firms that reported on Apple's market share in the U.S.?", - "r": "IDC and Gartner" - }, - { - "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", - "r": "6%" - } -] +{ + "qna": [{ + "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", + "r": "January 24, 1984" + }, + { + "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?", + "r": "Steve Jobs" + }, + { + "q": "What percent of the desktop share did Apple have in the United States in late 2003?", + "r": "2.06 percent" + }, + { + "q": "What were the research firms that reported on Apple's market share in the U.S.?", + "r": "IDC and Gartner" + }, + { + "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", + "r": "6%" + }] +} Text: <|text_start|> {{ text }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 42a5d3fe4e37..08ed1fc8596b 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -36,8 +36,8 @@ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%. <|text_end|> Output with 5 QnAs: -[ - { +{ + "qna":[{ "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", "r": "January 24, 1984" }, @@ -56,8 +56,8 @@ Output with 5 QnAs: { "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", "r": "6%" - } -] + }] +} Text: <|text_start|> {{ text }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 4aa4af9d6a3e..225dc3904439 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -16,6 +16,9 @@ inputs: type: string conversation_history: type: dict + action: + type: string + default: "continue the converasation and make sure the task is completed by asking relevant questions" --- system: @@ -30,3 +33,5 @@ Here's a sample output: Output with a json object that continues the conversation, given the conversation history: {{ conversation_history }} + +{{ action }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 994b07228235..d46fe6c81340 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -611,8 +611,6 @@ async def _complete_conversation( :rtype: List[Dict[str, Optional[str]]] """ conversation_history = ConversationHistory() - # user_turn = Turn(role=ConversationRole.USER, content=conversation_starter) - # conversation_history.add_to_history(user_turn) while len(conversation_history) < max_conversation_turns: user_flow = self._load_user_simulation_flow( @@ -620,16 +618,23 @@ async def _complete_conversation( prompty_model_config=self.model_config, # type: ignore user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) - conversation_starter_from_simulated_user = await user_flow( - task=task, - conversation_history=[ - { - "role": "assistant", - "content": conversation_starter, - "your_task": "Act as the user and translate the content into a user query.", - } - ], - ) + if len(conversation_history) == 0: + conversation_starter_from_simulated_user = await user_flow( + task=task, + conversation_history=[ + { + "role": "assistant", + "content": conversation_starter, + } + ], + action="rewrite the assitant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", + ) + else: + conversation_starter_from_simulated_user = await user_flow( + task=task, + conversation_history=conversation_history.to_list(), + action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.", + ) if isinstance(conversation_starter_from_simulated_user, dict): conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"] user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user) From 6de617cd4786fe52d4382695cc65430c5596d21a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 10:50:55 -0700 Subject: [PATCH 20/38] accept context from assistant messages, exclude them when using them for conversation --- .../_helpers/_simulator_data_classes.py | 23 +++++++++++++++++- .../ai/evaluation/simulator/_simulator.py | 24 +++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 109384bc2500..7f1b541a53e6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -30,7 +30,19 @@ def to_dict(self) -> Dict[str, Optional[str]]: return { "role": self.role.value if isinstance(self.role, ConversationRole) else self.role, "content": self.content, - "context": self.context, + "context": str(self.context), + } + + def to_context_free_dict(self) -> Dict[str, Optional[str]]: + """ + Convert the conversation turn to a dictionary without context. + + :returns: A dictionary representation of the conversation turn without context. + :rtype: Dict[str, Optional[str]] + """ + return { + "role": self.role.value if isinstance(self.role, ConversationRole) else self.role, + "content": self.content, } def __repr__(self): @@ -65,6 +77,15 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] + + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: + """ + Converts the conversation history to a list of dictionaries without context. + + :returns: A list of dictionaries representing the conversation turns without context. + :rtype: List[Dict[str, str]] + """ + return [turn.to_context_free_dict() for turn in self.history] def __len__(self) -> int: return len(self.history) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index d46fe6c81340..61b1291e14bf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -222,10 +222,10 @@ async def _simulate_with_predefined_turns( for simulated_turn in simulation: user_turn = Turn(role=ConversationRole.USER, content=simulated_turn) current_simulation.add_to_history(user_turn) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) current_simulation.add_to_history(assistant_turn) progress_bar.update(1) # Update progress bar for both user and assistant turns @@ -295,17 +295,17 @@ async def _extend_conversation_with_simulator( while len(current_simulation) < max_conversation_turns: user_response_content = await user_flow( task="Continue the conversation", - conversation_history=current_simulation.to_list(), + conversation_history=current_simulation.to_context_free_list(), **user_simulator_prompty_kwargs, ) user_response = self._parse_prompty_response(response=user_response_content) user_turn = Turn(role=ConversationRole.USER, content=user_response["content"]) current_simulation.add_to_history(user_turn) await asyncio.sleep(api_call_delay_sec) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) current_simulation.add_to_history(assistant_turn) progress_bar.update(1) @@ -632,17 +632,17 @@ async def _complete_conversation( else: conversation_starter_from_simulated_user = await user_flow( task=task, - conversation_history=conversation_history.to_list(), + conversation_history=conversation_history.to_context_free_list(), action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.", ) if isinstance(conversation_starter_from_simulated_user, dict): conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"] user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user) conversation_history.add_to_history(user_turn) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=conversation_history ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) conversation_history.add_to_history(assistant_turn) progress_bar.update(1) @@ -653,7 +653,7 @@ async def _complete_conversation( async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory - ) -> str: + ) -> str, Optional[str]: """ Retrieves the response from the target callback based on the current conversation history. @@ -663,8 +663,8 @@ async def _get_target_response( :paramtype api_call_delay_sec: float :keyword conversation_history: The current conversation history. :paramtype conversation_history: ConversationHistory - :return: The content of the response from the target. - :rtype: str + :return: The content of the response from the target and an optional context. + :rtype: str, Optional[str] """ response = await target( messages={"messages": conversation_history.to_list()}, @@ -674,4 +674,4 @@ async def _get_target_response( ) await asyncio.sleep(api_call_delay_sec) latest_message = response["messages"][-1] - return latest_message["content"] + return latest_message["content"], latest_message.get("context", "") # type: ignore From 1e5d40c74c3f5ba3b56d185f8c652ecc32e59819 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 10:53:23 -0700 Subject: [PATCH 21/38] update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 4add9ed69184..1425828f73cc 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -12,6 +12,7 @@ ### Bugs Fixed - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format - Fix evaluate API failure when `trace.destination` is set to `none` +- Non adversarial simulator now accepts context from the callback ### Other Changes - Improved error messages for the `evaluate` API by enhancing the validation of input parameters. This update provides more detailed and actionable error descriptions. From 93b29c7d2a116e40f61a65668eb9053dec29ff82 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:01:49 -0700 Subject: [PATCH 22/38] pylint fixes --- .../evaluation/simulator/_helpers/_simulator_data_classes.py | 4 ++-- .../azure/ai/evaluation/simulator/_simulator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 7f1b541a53e6..6bd57db206bf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -32,7 +32,7 @@ def to_dict(self) -> Dict[str, Optional[str]]: "content": self.content, "context": str(self.context), } - + def to_context_free_dict(self) -> Dict[str, Optional[str]]: """ Convert the conversation turn to a dictionary without context. @@ -77,7 +77,7 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] - + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: """ Converts the conversation history to a list of dictionaries without context. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 61b1291e14bf..94b708ca60de 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -9,7 +9,7 @@ import os import re import warnings -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union, Tuple from promptflow.core import AsyncPrompty from tqdm import tqdm @@ -653,7 +653,7 @@ async def _complete_conversation( async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory - ) -> str, Optional[str]: + ) -> Tuple[str, Optional[str]]: """ Retrieves the response from the target callback based on the current conversation history. From 8e3ddc316c8ecd0621db457cd44850508e1d015a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:04:00 -0700 Subject: [PATCH 23/38] pylint fixes --- .../evaluation/simulator/_helpers/_simulator_data_classes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 6bd57db206bf..a887e1d133b4 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -32,7 +32,7 @@ def to_dict(self) -> Dict[str, Optional[str]]: "content": self.content, "context": str(self.context), } - + def to_context_free_dict(self) -> Dict[str, Optional[str]]: """ Convert the conversation turn to a dictionary without context. @@ -77,7 +77,7 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] - + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: """ Converts the conversation history to a list of dictionaries without context. From 4ccc7c8d449e6ff3374d8da205a8fafaf9047d5a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:24:45 -0700 Subject: [PATCH 24/38] remove redundant quotes --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 225dc3904439..00af8c580464 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -18,7 +18,7 @@ inputs: type: dict action: type: string - default: "continue the converasation and make sure the task is completed by asking relevant questions" + default: continue the converasation and make sure the task is completed by asking relevant questions --- system: From bed51962970e4949d739a9de72705761638d1ed0 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:46:29 -0700 Subject: [PATCH 25/38] Fix typo --- .../azure/ai/evaluation/simulator/_simulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 7bdbc8af24d1..814c3e4d369e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -627,7 +627,7 @@ async def _complete_conversation( "content": conversation_starter, } ], - action="rewrite the assitant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", + action="rewrite the assistant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", ) else: conversation_starter_from_simulated_user = await user_flow( From 0fdd6441bd6fe9c866141974c3e6d7f6a461c69f Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 12:10:58 -0700 Subject: [PATCH 26/38] pylint fix --- .../azure/ai/evaluation/simulator/_indirect_attack_simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 6575c0798a53..3ffc559d18a6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -11,7 +11,6 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental -from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages from azure.core.credentials import TokenCredential From 1f695ccab667d4c89d70e506ec3029d967bb30f6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 12:52:40 -0700 Subject: [PATCH 27/38] Update broken tests --- .../tests/unittests/test_non_adv_simulator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index 592abfa0dde3..8be780461674 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -161,7 +161,7 @@ async def test_complete_conversation( mock_user_flow = AsyncMock() mock_user_flow.return_value = {"content": "User response"} mock_load_user_simulation_flow.return_value = mock_user_flow - mock_get_target_response.return_value = "Assistant response" + mock_get_target_response.return_value = "Assistant response", "Assistant context" conversation = await simulator._complete_conversation( conversation_starter="Hello", @@ -185,7 +185,7 @@ async def test_get_target_response(self, valid_openai_model_config): mock_target = AsyncMock() mock_target.return_value = { "messages": [ - {"role": "assistant", "content": "Assistant response"}, + {"role": "assistant", "content": "Assistant response", "context": "assistant context"}, ] } response = await simulator._get_target_response( @@ -193,7 +193,7 @@ async def test_get_target_response(self, valid_openai_model_config): api_call_delay_sec=0, conversation_history=AsyncMock(), ) - assert response == "Assistant response" + assert response == ("Assistant response", "assistant context") @pytest.mark.asyncio async def test_call_with_both_conversation_turns_and_text_tasks(self, valid_openai_model_config): @@ -317,7 +317,7 @@ async def test_simulate_with_predefined_turns( self, mock_extend_conversation_with_simulator, mock_get_target_response, valid_openai_model_config ): simulator = Simulator(model_config=valid_openai_model_config) - mock_get_target_response.return_value = "assistant_response" + mock_get_target_response.return_value = "assistant_response", "assistant_context" mock_extend_conversation_with_simulator.return_value = None conversation_turns = [["user_turn"]] From 92c9a6d04bf01bebca18c9bb4fb0132a798da01b Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 09:56:49 -0700 Subject: [PATCH 28/38] Include the grounding json in the manifest --- sdk/evaluation/azure-ai-evaluation/MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in index 1aeecacdfc11..7294aaa88864 100644 --- a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in +++ b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in @@ -4,3 +4,4 @@ include azure/__init__.py include azure/ai/__init__.py include azure/ai/evaluation/py.typed recursive-include azure/ai/evaluation *.prompty +include azure/ai/evaluation/simulator/_data_sources *.json From 0673cd5178450db9cf2f3a0b49df57702e98e347 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 10:03:16 -0700 Subject: [PATCH 29/38] Fix typo --- sdk/evaluation/azure-ai-evaluation/MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in index 7294aaa88864..fa5dccd6c8f7 100644 --- a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in +++ b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in @@ -4,4 +4,4 @@ include azure/__init__.py include azure/ai/__init__.py include azure/ai/evaluation/py.typed recursive-include azure/ai/evaluation *.prompty -include azure/ai/evaluation/simulator/_data_sources *.json +include azure/ai/evaluation/simulator/_data_sources/grounding.json \ No newline at end of file From 7b360fce457b5bdb4c98e7fac27da75d448c9d54 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 10:11:10 -0700 Subject: [PATCH 30/38] Come on package --- .../azure/ai/evaluation/simulator/_data_sources/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py new file mode 100644 index 000000000000..d540fd20468c --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py @@ -0,0 +1,3 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- From c9f38c94a177b299d2d3ae15b5f58392b5534d58 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 06:45:01 -0700 Subject: [PATCH 31/38] Release 1.0.0b5 --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 33fbfa2096fc..eeece2d2ae9d 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,7 +1,6 @@ # Release History - -## 1.0.0b5 (Unreleased) +## 1.0.0b5 (2024-10-28) ### Features Added - Added `GroundednessProEvaluator`, which is a service-based evaluator for determining response groundedness. From ed7eed1129bc62ec4fc461c4d520b15329f7ebd7 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 13:51:47 -0700 Subject: [PATCH 32/38] Notice from Chang --- sdk/evaluation/azure-ai-evaluation/NOTICE.txt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/NOTICE.txt b/sdk/evaluation/azure-ai-evaluation/NOTICE.txt index 9dc8704c7f6e..ec5e545abaef 100644 --- a/sdk/evaluation/azure-ai-evaluation/NOTICE.txt +++ b/sdk/evaluation/azure-ai-evaluation/NOTICE.txt @@ -48,3 +48,23 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +License notice for [Is GPT-4 a reliable rater? Evaluating consistency in GPT-4's text ratings](https://www.frontiersin.org/journals/education/articles/10.3389/feduc.2023.1272229/full) +------------------------------------------------------------------------------------------------------------------ +Copyright © 2023 Hackl, Müller, Granitzer and Sailer. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [Is ChatGPT a Good NLG Evaluator? A Preliminary Study](https://aclanthology.org/2023.newsum-1.1) (Wang et al., NewSum 2023) +------------------------------------------------------------------------------------------------------------------ +Copyright © 2023. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [SummEval: Re-evaluating Summarization Evaluation.](https://doi.org/10.1162/tacl_a_00373) (Fabbri et al.) +------------------------------------------------------------------------------------------------------------------ +© 2021 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [Evaluation Metrics in the Era of GPT-4: Reliably Evaluating Large Language Models on Sequence to Sequence Tasks](https://aclanthology.org/2023.emnlp-main.543) (Sottana et al., EMNLP 2023) +------------------------------------------------------------------------------------------------------------------ +© 2023 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). \ No newline at end of file From 3de5b660335a7508e162d07b48bde0483e340a02 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 16:25:46 -0700 Subject: [PATCH 33/38] Remove adv_conv template parameters from the outputs --- .../azure/ai/evaluation/simulator/_adversarial_simulator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index d96cb4df5cd3..a78de5a4778d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -276,6 +276,9 @@ def _to_chat_protocol( "target_population", "topic", "ch_template_placeholder", + "chatbot_name", + "name", + "group", ): template_parameters.pop(key, None) if conversation_category: From f2e95d1313fdba0370ba6fcc0e21a226115d2e93 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 29 Oct 2024 06:52:58 -0700 Subject: [PATCH 34/38] Update chanagelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 10 ++++++++++ .../azure/ai/evaluation/_version.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 2062a185f80f..d00c8a53f0a8 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,5 +1,15 @@ # Release History +## 1.0.0b6 (Unreleased) + +### Features Added + +### Breaking Changes + +### Bugs Fixed + +### Other Changes + ## 1.0.0b5 (2024-10-28) ### Features Added diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py index eecd2a8e450f..ffa055f43119 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -VERSION = "1.0.0b5" +VERSION = "1.0.0b6" From f9ac10cac827d0db714919f83e0acc14c9fed5ce Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 29 Oct 2024 12:16:45 -0700 Subject: [PATCH 35/38] Experimental tags on adv scenarios --- .../azure/ai/evaluation/simulator/_adversarial_scenario.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py index a8b4489b130d..f75459dcf1c2 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py @@ -3,8 +3,10 @@ # --------------------------------------------------------- from enum import Enum +from azure.ai.evaluation._common._experimental import experimental +@experimental class AdversarialScenario(Enum): """Adversarial scenario types""" @@ -18,12 +20,14 @@ class AdversarialScenario(Enum): ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material" +@experimental class AdversarialScenarioJailbreak(Enum): """Adversarial scenario types for XPIA Jailbreak""" ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia" +@experimental class _UnstableAdversarialScenario(Enum): """Adversarial scenario types that we haven't published, but still want available for internal use Values listed here are subject to potential change, and/or migration to the main enum over time. From 6c81cbbf2ca3af409d625d36915541fa3f545ef5 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 08:08:29 -0700 Subject: [PATCH 36/38] Readme fix onbreaking change --- sdk/evaluation/azure-ai-evaluation/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index ca507339c6b8..05898820b8f0 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -403,7 +403,7 @@ outputs = asyncio.run( ) ) -print(outputs.to_eval_qa_json_lines()) +print(outputs.to_eval_qr_json_lines()) ``` #### Direct Attack Simulator From b48f8ab2dbd0fc2fdcce81692721f87109e71169 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 11:37:35 -0700 Subject: [PATCH 37/38] Add the category and both user and assistant context to the response of qr_json_lines --- .../azure/ai/evaluation/simulator/_utils.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py index 8407b264fa2d..3416cf93e93e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py @@ -44,23 +44,41 @@ def to_eval_qr_json_lines(self): for item in self: user_message = None assistant_message = None - context = None + user_context = None + assistant_context = None + template_parameters = item.get("template_parameters", {}) + category = template_parameters.get("category", None) for message in item["messages"]: if message["role"] == "user": user_message = message["content"] + user_context = message.get("context", "") elif message["role"] == "assistant": assistant_message = message["content"] - if "context" in message: - context = message.get("context", None) + assistant_context = message.get("context", "") if user_message and assistant_message: - if context: + if user_context or assistant_context: json_lines += ( - json.dumps({"query": user_message, "response": assistant_message, "context": context}) + json.dumps( + { + "query": user_message, + "response": assistant_message, + "context": str( + { + "user_context": user_context, + "assistant_context": assistant_context, + } + ), + "category": category, + } + ) + "\n" ) - user_message = assistant_message = context = None + user_message = assistant_message = None else: - json_lines += json.dumps({"query": user_message, "response": assistant_message}) + "\n" + json_lines += ( + json.dumps({"query": user_message, "response": assistant_message, "category": category}) + + "\n" + ) user_message = assistant_message = None return json_lines From d422e05d22911efaf29d165712e9f02da90b1376 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 11:42:03 -0700 Subject: [PATCH 38/38] Update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 8235e9440c85..3a500849c3cb 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,7 @@ ### Breaking Changes ### Bugs Fixed +- Output of adversarial simulators are of type `JsonLineList` and the helper function `to_eval_qr_json_lines` now outputs context from both user and assistant turns along with `category` if it exists in the conversation ### Other Changes - Refined error messages for serviced-based evaluators and simulators.