Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
prepare_variant_runs,
)
from aieng.agent_evals.misalignment_qa.task import MisalignmentTask
from dotenv import load_dotenv


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -313,8 +312,6 @@ def select_variant_runs(

async def run_experiment_config(config: ExperimentConfig, *, variant_ids: set[str] | None = None) -> None:
"""Run the full experiment: upload dataset, iterate variants, collect warnings."""
load_dotenv(verbose=True)

prepared_tasks = prepare_dataset_items(config)
execution = create_execution_identity()
prepared_variants = select_variant_runs(prepare_variant_runs(config, execution=execution), variant_ids=variant_ids)
Expand Down
3 changes: 2 additions & 1 deletion aieng-eval-agents/aieng/agent_evals/misalignment_qa/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from collections.abc import Sequence
from typing import Any

from google.adk.agents import LlmAgent
from google.adk.events import Event
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
Expand Down Expand Up @@ -35,7 +36,7 @@ class MisalignmentTask:
def __init__(
self,
*,
agent: Any,
agent: LlmAgent,
shared_turns: Sequence[dict[str, Any]] | None = None,
user_context_preamble: str | None = None,
) -> None:
Expand Down
434 changes: 211 additions & 223 deletions implementations/misalignment_qa/02_inspect_results.ipynb

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions implementations/misalignment_qa/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path

from aieng.agent_evals.misalignment_qa.experiment import load_experiment_config, run_experiment_config
from dotenv import load_dotenv


def main() -> None:
Expand All @@ -24,6 +25,8 @@ def main() -> None:
parser.add_argument("--log-level", default="INFO", type=str, help="Logging level (e.g. INFO, DEBUG).")
args = parser.parse_args()

load_dotenv(verbose=True)

logging.basicConfig(
level=getattr(logging, args.log_level.upper(), logging.INFO),
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
Expand Down