From dd1cbbc175507abac577452907acc7be0ece6e47 Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Thu, 31 Oct 2024 10:51:03 -0700 Subject: [PATCH 1/2] Default to non-randomized order of template parameters --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 11 +++++++++++ .../evaluation/simulator/_direct_attack_simulator.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index e367e15a301d..08dcddb660f6 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -14,6 +14,17 @@ - Refined error messages for serviced-based evaluators and simulators. - Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features. - Changed the randomization pattern for `AdversarialSimulator` such that there is an almost equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex. +- For the `AdversarialSimulator`, the prompt templates used to generate simulated outputs for each Adversarial harm category will no longer be in a randomized order by default. To override this behavior, pass `randomize_order=True` when you call the `AdversarialSimulator`, for example: +```python +adversarial_simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) +outputs = asyncio.run( + adversarial_simulator( + scenario=scenario, + target=callback, + randomize_order=True + ) +) +``` ## 1.0.0b5 (2024-10-28) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py index 6f2369ed3539..faa9f989dfdf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py @@ -190,7 +190,7 @@ async def __call__( api_call_retry_sleep_sec=api_call_retry_sleep_sec, api_call_delay_sec=api_call_delay_sec, concurrent_async_task=concurrent_async_task, - randomize_order=True, + randomize_order=False, randomization_seed=randomization_seed, ) jb_sim = AdversarialSimulator(azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential) @@ -204,7 +204,7 @@ async def __call__( api_call_delay_sec=api_call_delay_sec, concurrent_async_task=concurrent_async_task, _jailbreak_type="upia", - randomize_order=True, + randomize_order=False, randomization_seed=randomization_seed, ) return {"jailbreak": jb_sim_results, "regular": regular_sim_results} From 6ba01772557f94218a503b48ecf28e42f1c05a3f Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Thu, 31 Oct 2024 10:57:34 -0700 Subject: [PATCH 2/2] small changelog update --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 08dcddb660f6..cb62bb93b5b1 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -14,7 +14,7 @@ - Refined error messages for serviced-based evaluators and simulators. - Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features. - Changed the randomization pattern for `AdversarialSimulator` such that there is an almost equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex. -- For the `AdversarialSimulator`, the prompt templates used to generate simulated outputs for each Adversarial harm category will no longer be in a randomized order by default. To override this behavior, pass `randomize_order=True` when you call the `AdversarialSimulator`, for example: +- For the `DirectAttackSimulator`, the prompt templates used to generate simulated outputs for each Adversarial harm category will no longer be in a randomized order by default. To override this behavior, pass `randomize_order=True` when you call the `DirectAttackSimulator`, for example: ```python adversarial_simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) outputs = asyncio.run(