@@ -258,6 +258,78 @@ def _extract_contents_for_inference(
258258 return request_dict_or_raw_text
259259
260260
261+ def _resolve_dataset (
262+ dataset : Union [types .EvaluationRunDataSource , types .EvaluationDataset ],
263+ agent_info_pydantic : Optional [types .evals .AgentInfo ] = None ,
264+ ) -> types .EvaluationRunDataSource :
265+ """Resolves dataset for the evaluation run."""
266+ if isinstance (dataset , types .EvaluationDataset ):
267+ candidate_name = _get_candidate_name (dataset , agent_info_pydantic )
268+ eval_set = _create_evaluation_set_from_dataframe (
269+ self ._api_client , dest , dataset .eval_dataset_df , candidate_name
270+ )
271+ dataset = types .EvaluationRunDataSource (evaluation_set = eval_set .name )
272+ return dataset
273+
274+
275+ def _resolve_inference_configs (
276+ inference_configs : Optional [
277+ dict [str , types .EvaluationRunInferenceConfigOrDict ]
278+ ] = None ,
279+ agent_info_pydantic : Optional [types .evals .AgentInfo ] = None ,
280+ ) -> Optional [dict [str , types .EvaluationRunInferenceConfigOrDict ]]:
281+ """Resolves inference configs for the evaluation run."""
282+ if agent_info_pydantic and agent_info_pydantic .name :
283+ inference_configs = {}
284+ inference_configs [agent_info_pydantic .name ] = (
285+ types .EvaluationRunInferenceConfig (
286+ agent_config = types .EvaluationRunAgentConfig (
287+ developer_instruction = genai_types .Content (
288+ parts = [
289+ genai_types .Part (text = agent_info_pydantic .instruction )
290+ ]
291+ ),
292+ tools = agent_info_pydantic .tool_declarations ,
293+ )
294+ )
295+ )
296+ return inference_configs
297+
298+
299+ def _add_evaluation_run_labels (
300+ labels : Optional [dict [str , str ]] = None ,
301+ agent_info_pydantic : Optional [types .evals .AgentInfo ] = None ,
302+ ):
303+ """Adds labels to the evaluation run."""
304+ labels = labels or {}
305+ if agent_info_pydantic and agent_info_pydantic .agent_resource_name :
306+ labels ["vertex-ai-evaluation-agent-engine-id" ] = (
307+ agent_info_pydantic .agent_resource_name .split ("reasoningEngines/" )[
308+ - 1
309+ ]
310+ )
311+ return labels
312+
313+
314+ def _get_candidate_name (
315+ dataset : types .EvaluationDataset ,
316+ agent_info_pydantic : Optional [types .evals .AgentInfo ] = None ,
317+ ) -> Optional [str ]:
318+ """Internal helper to get candidate name."""
319+ if agent_info_pydantic is not None and (
320+ dataset .candidate_name
321+ and agent_info_pydantic
322+ and agent_info_pydantic .name
323+ and dataset .candidate_name != agent_info_pydantic .name
324+ ):
325+ logger .warning (
326+ "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
327+ )
328+ elif dataset .candidate_name is None and agent_info_pydantic :
329+ return agent_info_pydantic .name
330+ return dataset .candidate_name or None
331+
332+
261333def _execute_inference_concurrently (
262334 api_client : BaseApiClient ,
263335 prompt_dataset : pd .DataFrame ,
@@ -1858,6 +1930,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
18581930 result [key ] = value
18591931 elif isinstance (value , (list , tuple )):
18601932 result [key ] = [_object_to_dict (item ) for item in value ]
1933+ # Add recursive handling for dictionaries
1934+ elif isinstance (value , dict ):
1935+ result [key ] = {k : _object_to_dict (v ) for k , v in value .items ()}
18611936 elif isinstance (value , bytes ):
18621937 result [key ] = base64 .b64encode (value ).decode ("utf-8" )
18631938 elif hasattr (value , "__dict__" ): # Nested object
@@ -1871,9 +1946,15 @@ def _create_evaluation_set_from_dataframe(
18711946 api_client : BaseApiClient ,
18721947 gcs_dest_prefix : str ,
18731948 eval_df : pd .DataFrame ,
1874- candidate_name : Optional [str ] = None ,
18751949) -> Union [types .EvaluationSet , Any ]:
18761950 """Converts a dataframe to an EvaluationSet."""
1951+ if dataset .eval_dataset_df is None :
1952+ raise ValueError (
1953+ "EvaluationDataset must have eval_dataset_df populated."
1954+ )
1955+ candidate_name = _evals_common ._get_candidate_name (
1956+ dataset , agent_info_pydantic
1957+ )
18771958 eval_item_requests = []
18781959 for _ , row in eval_df .iterrows ():
18791960 intermediate_events = []
@@ -1885,13 +1966,31 @@ def _create_evaluation_set_from_dataframe(
18851966 for event in row [_evals_constant .INTERMEDIATE_EVENTS ]:
18861967 if CONTENT in event :
18871968 intermediate_events .append (event [CONTENT ])
1969+ if _evals_constant .CONTEXT in row or _evals_constant .HISTORY in row :
1970+ values = {}
1971+ if _evals_constant .CONTEXT in row :
1972+ values [_evals_constant .CONTEXT ] = genai_types .Content (
1973+ parts = [genai_types .Part (text = row [_evals_constant .CONTEXT ])],
1974+ role = _evals_constant .USER_AUTHOR ,
1975+ )
1976+ if _evals_constant .HISTORY in row :
1977+ values [_evals_constant .HISTORY ] = genai_types .Content (
1978+ parts = [genai_types .Part (text = row [_evals_constant .HISTORY ])],
1979+ role = _evals_constant .USER_AUTHOR ,
1980+ )
1981+ if _evals_constant .PROMPT in row :
1982+ values [_evals_constant .PROMPT ] = genai_types .Content (
1983+ parts = [genai_types .Part (text = row [_evals_constant .PROMPT ])],
1984+ role = _evals_constant .USER_AUTHOR ,
1985+ )
1986+ prompt = types .EvaluationPrompt (
1987+ prompt_template_data = types .PromptTemplateData (values = values )
1988+ )
1989+ elif _evals_constant .PROMPT in row :
1990+ prompt = types .EvaluationPrompt (text = row [_evals_constant .PROMPT ])
18881991 eval_item_requests .append (
18891992 types .EvaluationItemRequest (
1890- prompt = (
1891- types .EvaluationPrompt (text = row [_evals_constant .PROMPT ])
1892- if _evals_constant .PROMPT in row
1893- else None
1894- ),
1993+ prompt = prompt if prompt else None ,
18951994 golden_response = (
18961995 types .CandidateResponse (text = row [_evals_constant .REFERENCE ])
18971996 if _evals_constant .REFERENCE in row
0 commit comments