From 16cce2086ac28c2c3b7003caacbfb40988af754d Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Wed, 30 Oct 2024 14:51:46 -0700 Subject: [PATCH 1/2] Update randomization pattern for Adversarial simulation --- .../azure-ai-evaluation/CHANGELOG.md | 1 + .../simulator/_adversarial_simulator.py | 24 ++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index ec35abaf57ee..f67106dd0141 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -11,6 +11,7 @@ ### Other Changes - Refined error messages for serviced-based evaluators and simulators. - Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features. +- Changed the randomization pattern for `AdversarialSimulator` such that there is an equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex. ## 1.0.0b5 (2024-10-28) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index a78de5a4778d..af96e637b389 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -7,6 +7,7 @@ import logging import random from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast +from itertools import zip_longest from tqdm import tqdm @@ -215,17 +216,18 @@ async def __call__( ncols=100, unit="simulations", ) - for template in templates: - parameter_order = list(range(len(template.template_parameters))) - if randomize_order: - # The template parameter lists are persistent across sim runs within a session, - # So randomize a the selection instead of the parameter list directly, - # or a potentially large deep copy. - if randomization_seed is not None: - random.seed(randomization_seed) - random.shuffle(parameter_order) - for index in parameter_order: - parameter = template.template_parameters[index].copy() + + if randomize_order: + # The template parameter lists are persistent across sim runs within a session, + # So randomize a the selection instead of the parameter list directly, + # or a potentially large deep copy. + if randomization_seed is not None: + random.seed(randomization_seed) + random.shuffle(templates) + parameter_lists = [t.template_parameters for t in templates] + zipped_parameters = list(zip_longest(*parameter_lists)) + for param_group in zipped_parameters: + for template, parameter in zip(templates, param_group): if _jailbreak_type == "upia": parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset)) tasks.append( From e03b9a7287abcdf1f54071bd5861833aae676aa3 Mon Sep 17 00:00:00 2001 From: Sydney Lister Date: Wed, 30 Oct 2024 15:02:59 -0700 Subject: [PATCH 2/2] update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index f67106dd0141..eccfe1cbf5e4 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -11,7 +11,7 @@ ### Other Changes - Refined error messages for serviced-based evaluators and simulators. - Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features. -- Changed the randomization pattern for `AdversarialSimulator` such that there is an equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex. +- Changed the randomization pattern for `AdversarialSimulator` such that there is an almost equal number of Adversarial harm categories (e.g. Hate + Unfairness, Self-Harm, Violence, Sex) represented in the `AdversarialSimulator` outputs. Previously, for 200 `max_simulation_results` a user might see 140 results belonging to the 'Hate + Unfairness' category and 40 results belonging to the 'Self-Harm' category. Now, user will see 50 results for each of Hate + Unfairness, Self-Harm, Violence, and Sex. ## 1.0.0b5 (2024-10-28)