Skip to content

Commit 8bb68f4

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add PromptTemplateData to support context and history columns when creating Evaluation run from dataframe
PiperOrigin-RevId: 872946786
1 parent 89d5723 commit 8bb68f4

2 files changed

Lines changed: 114 additions & 98 deletions

File tree

vertexai/_genai/_evals_common.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,77 @@ def _extract_contents_for_inference(
258258
return request_dict_or_raw_text
259259

260260

261+
def _resolve_dataset(
262+
dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
263+
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
264+
) -> types.EvaluationRunDataSource:
265+
"""Resolves dataset for the evaluation run."""
266+
if isinstance(dataset, types.EvaluationDataset):
267+
candidate_name = _get_candidate_name(dataset, agent_info_pydantic)
268+
eval_set = _create_evaluation_set_from_dataframe(
269+
self._api_client, dest, dataset.eval_dataset_df, candidate_name
270+
)
271+
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
272+
return dataset
273+
274+
275+
def _resolve_inference_configs(
276+
inference_configs: Optional[
277+
dict[str, types.EvaluationRunInferenceConfigOrDict]
278+
] = None,
279+
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
280+
) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]:
281+
"""Resolves inference configs for the evaluation run."""
282+
if agent_info_pydantic and agent_info_pydantic.name:
283+
inference_configs = {}
284+
inference_configs[agent_info_pydantic.name] = (
285+
types.EvaluationRunInferenceConfig(
286+
agent_config=types.EvaluationRunAgentConfig(
287+
developer_instruction=genai_types.Content(
288+
parts=[
289+
genai_types.Part(text=agent_info_pydantic.instruction)
290+
]
291+
),
292+
tools=agent_info_pydantic.tool_declarations,
293+
)
294+
)
295+
)
296+
return inference_configs
297+
298+
299+
def _add_evaluation_run_labels(
300+
labels: Optional[dict[str, str]] = None,
301+
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
302+
):
303+
"""Adds labels to the evaluation run."""
304+
labels = labels or {}
305+
if agent_info_pydantic and agent_info_pydantic.agent_resource_name:
306+
labels["vertex-ai-evaluation-agent-engine-id"] = (
307+
agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[-1]
308+
)
309+
return labels
310+
311+
312+
def _get_candidate_name(
313+
dataset: types.EvaluationDataset,
314+
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
315+
) -> Optional[str]:
316+
"""Internal helper to get candidate name."""
317+
if agent_info_pydantic is not None and (
318+
dataset.candidate_name
319+
and agent_info_pydantic
320+
and agent_info_pydantic.name
321+
and dataset.candidate_name != agent_info_pydantic.name
322+
):
323+
logger.warning(
324+
"Evaluation dataset candidate_name and agent_info.name are different."
325+
" Please make sure this is intended."
326+
)
327+
elif dataset.candidate_name is None and agent_info_pydantic:
328+
return agent_info_pydantic.name
329+
return dataset.candidate_name or None
330+
331+
261332
def _execute_inference_concurrently(
262333
api_client: BaseApiClient,
263334
prompt_dataset: pd.DataFrame,
@@ -1858,6 +1929,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
18581929
result[key] = value
18591930
elif isinstance(value, (list, tuple)):
18601931
result[key] = [_object_to_dict(item) for item in value]
1932+
# Add recursive handling for dictionaries
1933+
elif isinstance(value, dict):
1934+
result[key] = {k: _object_to_dict(v) for k, v in value.items()}
18611935
elif isinstance(value, bytes):
18621936
result[key] = base64.b64encode(value).decode("utf-8")
18631937
elif hasattr(value, "__dict__"): # Nested object

vertexai/_genai/evals.py

Lines changed: 40 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,32 +1625,14 @@ def create_evaluation_run(
16251625
raise ValueError(
16261626
"At most one of agent_info or inference_configs can be provided."
16271627
)
1628-
agent_info_pydantic: types.evals.AgentInfo = types.evals.AgentInfo()
1629-
if agent_info:
1630-
if isinstance(agent_info, dict):
1631-
agent_info_pydantic = types.evals.AgentInfo.model_validate(agent_info)
1632-
else:
1633-
agent_info_pydantic = agent_info
1634-
if isinstance(dataset, types.EvaluationDataset):
1635-
if dataset.eval_dataset_df is None:
1636-
raise ValueError(
1637-
"EvaluationDataset must have eval_dataset_df populated."
1638-
)
1639-
if agent_info_pydantic is not None and (
1640-
dataset.candidate_name
1641-
and agent_info_pydantic
1642-
and agent_info_pydantic.name
1643-
and dataset.candidate_name != agent_info_pydantic.name
1644-
):
1645-
logger.warning(
1646-
"Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
1647-
)
1648-
elif dataset.candidate_name is None and agent_info_pydantic:
1649-
dataset.candidate_name = agent_info_pydantic.name
1650-
eval_set = _evals_common._create_evaluation_set_from_dataframe(
1651-
self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
1652-
)
1653-
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
1628+
agent_info_pydantic = (
1629+
types.evals.AgentInfo.model_validate(agent_info)
1630+
if isinstance(agent_info, dict)
1631+
else (agent_info or types.evals.AgentInfo())
1632+
)
1633+
resolved_dataset = _evals_common._resolve_dataset(
1634+
dataset, agent_info_pydantic
1635+
)
16541636
output_config = genai_types.OutputConfig(
16551637
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
16561638
)
@@ -1660,37 +1642,20 @@ def create_evaluation_run(
16601642
evaluation_config = types.EvaluationRunConfig(
16611643
output_config=output_config, metrics=resolved_metrics
16621644
)
1663-
if agent_info_pydantic and agent_info_pydantic.name is not None:
1664-
inference_configs = {}
1665-
inference_configs[agent_info_pydantic.name] = (
1666-
types.EvaluationRunInferenceConfig(
1667-
agent_config=types.EvaluationRunAgentConfig(
1668-
developer_instruction=genai_types.Content(
1669-
parts=[
1670-
genai_types.Part(text=agent_info_pydantic.instruction)
1671-
]
1672-
),
1673-
tools=agent_info_pydantic.tool_declarations,
1674-
)
1675-
)
1676-
)
1677-
if agent_info_pydantic.agent_resource_name:
1678-
labels = labels or {}
1679-
labels["vertex-ai-evaluation-agent-engine-id"] = (
1680-
agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[
1681-
-1
1682-
]
1683-
)
1684-
if not name:
1685-
name = f"evaluation_run_{uuid.uuid4()}"
1686-
1645+
resolved_inference_configs = _evals_common._resolve_inference_configs(
1646+
inference_configs, agent_info_pydantic
1647+
)
1648+
resolved_labels = _evals_common._add_evaluation_run_labels(
1649+
labels, agent_info_pydantic
1650+
)
1651+
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
16871652
return self._create_evaluation_run(
1688-
name=name,
1689-
display_name=display_name or name,
1690-
data_source=dataset,
1653+
name=resolved_name,
1654+
display_name=display_name or resolved_name,
1655+
data_source=resolved_dataset,
16911656
evaluation_config=evaluation_config,
1692-
inference_configs=inference_configs,
1693-
labels=labels,
1657+
inference_configs=resolved_inference_configs,
1658+
labels=resolved_labels,
16941659
config=config,
16951660
)
16961661

@@ -2495,27 +2460,14 @@ async def create_evaluation_run(
24952460
raise ValueError(
24962461
"At most one of agent_info or inference_configs can be provided."
24972462
)
2498-
if agent_info and isinstance(agent_info, dict):
2499-
agent_info = types.evals.AgentInfo.model_validate(agent_info)
2500-
if isinstance(dataset, types.EvaluationDataset):
2501-
if dataset.eval_dataset_df is None:
2502-
raise ValueError(
2503-
"EvaluationDataset must have eval_dataset_df populated."
2504-
)
2505-
if agent_info is not None and (
2506-
dataset.candidate_name
2507-
and agent_info.name
2508-
and dataset.candidate_name != agent_info.name
2509-
):
2510-
logger.warning(
2511-
"Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
2512-
)
2513-
elif dataset.candidate_name is None and agent_info:
2514-
dataset.candidate_name = agent_info.name
2515-
eval_set = _evals_common._create_evaluation_set_from_dataframe(
2516-
self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
2517-
)
2518-
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
2463+
agent_info_pydantic = (
2464+
types.evals.AgentInfo.model_validate(agent_info)
2465+
if isinstance(agent_info, dict)
2466+
else (agent_info or types.evals.AgentInfo())
2467+
)
2468+
resolved_dataset = _evals_common._resolve_dataset(
2469+
dataset, agent_info_pydantic
2470+
)
25192471
output_config = genai_types.OutputConfig(
25202472
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
25212473
)
@@ -2525,31 +2477,21 @@ async def create_evaluation_run(
25252477
evaluation_config = types.EvaluationRunConfig(
25262478
output_config=output_config, metrics=resolved_metrics
25272479
)
2528-
if agent_info and agent_info.name is not None:
2529-
inference_configs = {}
2530-
inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
2531-
agent_config=types.EvaluationRunAgentConfig(
2532-
developer_instruction=genai_types.Content(
2533-
parts=[genai_types.Part(text=agent_info.instruction)]
2534-
),
2535-
tools=agent_info.tool_declarations,
2536-
)
2537-
)
2538-
if agent_info.agent_resource_name:
2539-
labels = labels or {}
2540-
labels["vertex-ai-evaluation-agent-engine-id"] = (
2541-
agent_info.agent_resource_name.split("reasoningEngines/")[-1]
2542-
)
2543-
if not name:
2544-
name = f"evaluation_run_{uuid.uuid4()}"
2480+
resolved_inference_configs = _evals_common._resolve_inference_configs(
2481+
inference_configs, agent_info_pydantic
2482+
)
2483+
resolved_labels = _evals_common._add_evaluation_run_labels(
2484+
labels, agent_info_pydantic
2485+
)
2486+
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
25452487

25462488
result = await self._create_evaluation_run(
2547-
name=name,
2548-
display_name=display_name or name,
2549-
data_source=dataset,
2489+
name=resolved_name,
2490+
display_name=display_name or resolved_name,
2491+
data_source=resolved_dataset,
25502492
evaluation_config=evaluation_config,
2551-
inference_configs=inference_configs,
2552-
labels=labels,
2493+
inference_configs=resolved_inference_configs,
2494+
labels=resolved_labels,
25532495
config=config,
25542496
)
25552497

0 commit comments

Comments
 (0)