Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,8 @@ API Reference
ScorerEvaluator
ScorerIdentifier
ScorerMetrics
ScorerMetricsEntry
ScorerMetricsRegistry
ScorerPromptValidator
SelfAskCategoryScorer
SelfAskGeneralFloatScaleScorer
Expand Down
21 changes: 14 additions & 7 deletions doc/code/scenarios/1_composite_scenario.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "19e072daec284cd097fd22f28449e859",
"model_id": "25ba83e56f4144b78a1eab03e08fdb01",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -192,18 +192,25 @@
"\n",
"\u001b[1m 🎯 Target Information\u001b[0m\n",
"\u001b[36m • Target Type: OpenAIChatTarget\u001b[0m\n",
"\u001b[36m • Target Model: gpt-40\u001b[0m\n",
"\u001b[36m • Target Endpoint: https://pyrit-github-pipeline.openai.azure.com/openai/v1\u001b[0m\n",
"\u001b[36m • Target Model: gpt-4o-japan-nilfilter\u001b[0m\n",
"\u001b[36m • Target Endpoint: https://pyrit-japan-test.openai.azure.com/openai/v1\u001b[0m\n",
"\n",
"\u001b[1m 📊 Scorer Information\u001b[0m\n",
"\u001b[36m • Scorer Type: TrueFalseCompositeScorer\u001b[0m\n",
"\u001b[36m └─ Composite of 2 scorer(s):\u001b[0m\n",
"\u001b[36m • Scorer Type: FloatScaleThresholdScorer\u001b[0m\n",
"\u001b[36m └─ Wraps:\u001b[0m\n",
"\u001b[36m • Scorer Type: AzureContentFilterScorer\u001b[0m\n",
"\u001b[36m └─ Composite of 1 scorer(s):\u001b[0m\n",
"\u001b[36m • Scorer Type: AzureContentFilterScorer\u001b[0m\n",
"\u001b[36m • Scorer Type: TrueFalseInverterScorer\u001b[0m\n",
"\u001b[36m └─ Wraps:\u001b[0m\n",
"\u001b[36m • Scorer Type: SelfAskRefusalScorer\u001b[0m\n",
"\u001b[36m └─ Composite of 1 scorer(s):\u001b[0m\n",
"\u001b[36m • Scorer Type: SelfAskRefusalScorer\u001b[0m\n",
"\n",
"\u001b[1m 📉 Scorer Performance Metrics\u001b[0m\n",
"\u001b[32m • Accuracy: 58.72%\u001b[0m\n",
"\u001b[36m • Accuracy Std Error: ±0.0285\u001b[0m\n",
"\u001b[36m • F1 Score: 0.1399\u001b[0m\n",
"\u001b[36m • Precision: 0.6667\u001b[0m\n",
"\u001b[36m • Recall: 0.0781\u001b[0m\n",
"\n",
"\u001b[1m\u001b[36m▼ Overall Statistics\u001b[0m\n",
"\u001b[36m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
Expand Down
8 changes: 2 additions & 6 deletions doc/code/scenarios/1_composite_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.17.3
# kernelspec:
# display_name: pyrit-dev
# language: python
# name: python3
# jupytext_version: 1.17.2
# ---

# %% [markdown]
# # Foundry Scenario Example
# # 1. Foundry Scenario Example
#
# This notebook demonstrates how to use a composite strategies — the `FoundryStrategy` — to test a target with multiple
# attack strategies.
Expand Down
146 changes: 145 additions & 1 deletion doc/code/scoring/scorer_evals.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,150 @@
" print(\"Evaluation for harm category:\", harm_category)\n",
" print(asdict(metrics))"
]
},
{
"cell_type": "markdown",
"id": "17",
"metadata": {},
"source": [
"## Scorer Metrics Registry\n",
"\n",
"### Adding to registry\n",
"The `ScorerMetricsRegistry` stores official scoring evaluation metrics in JSONL format (one file for harm scoring, another for objective scoring) in the public repository. These are results from running scorer evaluations on official consolidated datasets curated by the PyRIT team. Each entry in the registry contains all the attributes that identify a scorer (see the `ScorerIdentifier` class for more information on what goes into scoring identification) as well as the metrics resulting from the evaluation. To run an evaluation that you'd like to save to the registry, set the `add_to_registry` flag to True. (Note: This should primarily be done by the PyRIT team)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from pyrit.common.path import DATASETS_PATH, SCORER_EVALS_TRUE_FALSE_PATH\n",
"\n",
"chat_target = OpenAIChatTarget(\n",
" endpoint=os.environ.get(\"AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT\"),\n",
" api_key=os.environ.get(\"AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY\"),\n",
" model_name=os.environ.get(\"AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL\"),\n",
" temperature=0.9,\n",
" )\n",
"true_false_classifier = SelfAskTrueFalseScorer(\n",
" true_false_question_path=DATASETS_PATH/\"score\"/\"true_false_question\"/\"task_achieved.yaml\", chat_target=chat_target\n",
")\n",
"\n",
"evaluator: ObjectiveScorerEvaluator = ScorerEvaluator.from_scorer(scorer=true_false_classifier) # type: ignore\n",
"official_dataset_path = SCORER_EVALS_TRUE_FALSE_PATH / \"CONSOLIDATED_true_false_objective_dataset.csv\"\n",
"metrics = await evaluator.run_evaluation_from_csv_async( # type:ignore\n",
" csv_path=official_dataset_path,\n",
" assistant_response_col_name=\"assistant_response\",\n",
" human_label_col_names=[\"normalized_score\"],\n",
" objective_or_harm_col_name=\"objective\",\n",
" assistant_response_data_type_col_name=\"data_type\",\n",
" num_scorer_trials=1,\n",
" add_to_registry=True\n",
")"
]
},
{
"cell_type": "markdown",
"id": "19",
"metadata": {},
"source": [
"### Retrieving metrics from registry\n",
"`ScorerMetricsRegistry` has a method (`get_metrics_registry_entries`) to filter entries on a variety of attributes, including hash, `Scorer` type, model information, as well as accuracy/mean standard error thresholds. You can use `print_summary` to display a brief summary of the scoring configuration and metrics."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"============================================================\n",
"Scorer Configuration:\n",
" Type: TrueFalseCompositeScorer\n",
" Version: 1\n",
" Dataset Version: 1.0\n",
" Sub-scorers: FloatScaleThresholdScorer, TrueFalseInverterScorer\n",
"\n",
"Metrics:\n",
" accuracy: 0.5872\n",
" accuracy_standard_error: 0.0285\n",
" f1_score: 0.1399\n",
" precision: 0.6667\n",
" recall: 0.0781\n",
"============================================================\n",
"============================================================\n",
"Scorer Configuration:\n",
" Type: SelfAskTrueFalseScorer\n",
" Version: 1\n",
" Model: gpt-4o-unsafe (OpenAIChatTarget)\n",
" Dataset Version: 1.0\n",
" System Prompt: sha256:894c040cb71ebe86\n",
"\n",
"Metrics:\n",
" accuracy: 0.8658\n",
" accuracy_standard_error: 0.0197\n",
" f1_score: 0.8305\n",
" precision: 0.9074\n",
" recall: 0.7656\n",
"============================================================\n"
]
}
],
"source": [
"from pyrit.score.scorer_evaluation.scorer_metrics_registry import ScorerMetricsRegistry\n",
"\n",
"registry = ScorerMetricsRegistry()\n",
"# get first 5 entries\n",
"entries = registry.get_metrics_registry_entries()[:5]\n",
"\n",
"for entry in entries:\n",
" entry.print_summary()"
]
},
{
"cell_type": "markdown",
"id": "21",
"metadata": {},
"source": [
"You can view identifier information and retrieve metrics (if they exist) for a specific `Scorer` configuration by calling methods on the `Scorer` object itself. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'__type__': 'SelfAskTrueFalseScorer', 'version': 1, 'system_prompt_template': 'sha256:894c040cb71ebe86', 'user_prompt_template': None, 'sub_identifier': None, 'model_info': {'__type__': 'OpenAIChatTarget', 'model_name': 'gpt-4o-unsafe', 'temperature': 0.9}, 'score_aggregator': 'OR_', 'scorer_specific_params': None, 'pyrit_version': '0.10.1.dev0', 'hash': 'fdab4c3571d3f46a40051d8b87e0b96e8b929918c7a084f555db70093416a993'}\n"
]
},
{
"data": {
"text/plain": [
"ObjectiveScorerMetrics(accuracy=0.8657718120805369, accuracy_standard_error=0.01974765141819736, f1_score=0.8305084745762712, precision=0.9074074074074074, recall=0.765625)"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(true_false_classifier.get_identifier())\n",
"true_false_classifier.get_scorer_metrics_from_registry() #get metrics from registry via scorer object"
]
}
],
"metadata": {
Expand All @@ -384,7 +528,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
61 changes: 56 additions & 5 deletions doc/code/scoring/scorer_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.18.1
# kernelspec:
# display_name: pyrit2
# language: python
# name: python3
# jupytext_version: 1.17.2
# ---

# %% [markdown]
Expand Down Expand Up @@ -229,3 +225,58 @@

print("Evaluation for harm category:", harm_category)
print(asdict(metrics))

# %% [markdown]
# ## Scorer Metrics Registry
#
# ### Adding to registry
# The `ScorerMetricsRegistry` stores official scoring evaluation metrics in JSONL format (one file for harm scoring, another for objective scoring) in the public repository. These are results from running scorer evaluations on official consolidated datasets curated by the PyRIT team. Each entry in the registry contains all the attributes that identify a scorer (see the `ScorerIdentifier` class for more information on what goes into scoring identification) as well as the metrics resulting from the evaluation. To run an evaluation that you'd like to save to the registry, set the `add_to_registry` flag to True. (Note: This should primarily be done by the PyRIT team)

# %%
import os

from pyrit.common.path import DATASETS_PATH, SCORER_EVALS_TRUE_FALSE_PATH

chat_target = OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"),
temperature=0.9,
)
true_false_classifier = SelfAskTrueFalseScorer(
true_false_question_path=DATASETS_PATH / "score" / "true_false_question" / "task_achieved.yaml",
chat_target=chat_target,
)

evaluator: ObjectiveScorerEvaluator = ScorerEvaluator.from_scorer(scorer=true_false_classifier) # type: ignore
official_dataset_path = SCORER_EVALS_TRUE_FALSE_PATH / "CONSOLIDATED_true_false_objective_dataset.csv"
metrics = await evaluator.run_evaluation_from_csv_async( # type:ignore
csv_path=official_dataset_path,
assistant_response_col_name="assistant_response",
human_label_col_names=["normalized_score"],
objective_or_harm_col_name="objective",
assistant_response_data_type_col_name="data_type",
num_scorer_trials=1,
add_to_registry=True,
)

# %% [markdown]
# ### Retrieving metrics from registry
# `ScorerMetricsRegistry` has a method (`get_metrics_registry_entries`) to filter entries on a variety of attributes, including hash, `Scorer` type, model information, as well as accuracy/mean standard error thresholds. You can use `print_summary` to display a brief summary of the scoring configuration and metrics.

# %%
from pyrit.score.scorer_evaluation.scorer_metrics_registry import ScorerMetricsRegistry

registry = ScorerMetricsRegistry()
# get first 5 entries
entries = registry.get_metrics_registry_entries()[:5]

for entry in entries:
entry.print_summary()

# %% [markdown]
# You can view identifier information and retrieve metrics (if they exist) for a specific `Scorer` configuration by calling methods on the `Scorer` object itself.

# %%
print(true_false_classifier.get_identifier())
true_false_classifier.get_scorer_metrics_from_registry() # get metrics from registry via scorer object
33 changes: 31 additions & 2 deletions pyrit/models/scenario_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
import logging
import uuid
from datetime import datetime, timezone
from typing import List, Literal, Optional
from typing import TYPE_CHECKING, List, Literal, Optional

import pyrit
from pyrit.models import AttackOutcome, AttackResult

if TYPE_CHECKING:
from pyrit.score.scorer_evaluation.scorer_evaluator import ScorerMetrics
from pyrit.score.scorer_evaluation.scorer_metrics_registry import RegistryType

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -66,7 +70,7 @@ def __init__(
self.id = id if id is not None else uuid.uuid4()
self.scenario_identifier = scenario_identifier
self.objective_target_identifier = objective_target_identifier
self.objective_scorer_identifier = objective_scorer_identifier
self.objective_scorer_identifier = objective_scorer_identifier or {}
self.scenario_run_state = scenario_run_state
self.attack_results = attack_results
self.labels = labels if labels is not None else {}
Expand Down Expand Up @@ -136,3 +140,28 @@ def objective_achieved_rate(self, *, atomic_attack_name: Optional[str] = None) -

successful_results = sum(1 for result in all_results if result.outcome == AttackOutcome.SUCCESS)
return int((successful_results / total_results) * 100)

def get_scorer_evaluation_metrics(
self, registry_type: Optional["RegistryType"] = None
) -> Optional["ScorerMetrics"]:
"""
Get the evaluation metrics for the scenario's scorer from the scorer evaluation registry.

Returns:
ScorerMetrics: The evaluation metrics object, or None if not found.
"""
# import here to avoid circular imports
from pyrit.score.scorer_evaluation.scorer_metrics_registry import (
ScorerMetricsRegistry,
)

# Use the stored hash directly for lookup (avoids needing to reconstruct ScorerIdentifier)
scorer_hash = self.objective_scorer_identifier.get("hash")
if not scorer_hash:
return None

registry = ScorerMetricsRegistry()
entries = registry.get_metrics_registry_entries(registry_type=registry_type, hash=scorer_hash)
if entries:
return entries[0].metrics
return None
5 changes: 4 additions & 1 deletion pyrit/prompt_target/azure_blob_storage_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def __init__(
self._sas_token = sas_token
self._client_async: AsyncContainerClient = None

super().__init__(endpoint=self._container_url, max_requests_per_minute=max_requests_per_minute)
super().__init__(
endpoint=self._container_url,
max_requests_per_minute=max_requests_per_minute,
)

async def _create_container_client_async(self) -> None:
"""
Expand Down
6 changes: 5 additions & 1 deletion pyrit/prompt_target/azure_ml_chat_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def __init__(
endpoint_value = default_values.get_required_value(
env_var_name=self.endpoint_uri_environment_variable, passed_value=endpoint
)
PromptChatTarget.__init__(self, max_requests_per_minute=max_requests_per_minute, endpoint=endpoint_value)
PromptChatTarget.__init__(
self,
max_requests_per_minute=max_requests_per_minute,
endpoint=endpoint_value,
)

self._initialize_vars(endpoint=endpoint, api_key=api_key)

Expand Down
5 changes: 4 additions & 1 deletion pyrit/prompt_target/crucible_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def __init__(
minute before hitting a rate limit. The number of requests sent to the target
will be capped at the value provided.
"""
super().__init__(max_requests_per_minute=max_requests_per_minute, endpoint=endpoint)
super().__init__(
max_requests_per_minute=max_requests_per_minute,
endpoint=endpoint,
)

self._api_key: str = default_values.get_required_value(
env_var_name=self.API_KEY_ENVIRONMENT_VARIABLE, passed_value=api_key
Expand Down
Loading
Loading