Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
# limitations under the License.
#
# pylint: disable=protected-access,bad-continuation,
import base64
import importlib
import json
import os
import re
import statistics
import sys
from unittest import mock
Expand Down Expand Up @@ -291,8 +293,72 @@ def test_display_evaluation_result_with_agent_trace_prefixes(self, mock_is_ipyth

mock_display_module.HTML.assert_called_once()
html_content = mock_display_module.HTML.call_args[0][0]
assert "my_function" in html_content
assert "this is model response" in html_content
match = re.search(r'atob\("([^"]+)"\)', html_content)
assert match
decoded_json = base64.b64decode(match.group(1)).decode("utf-8")
assert "my_function" in decoded_json
assert "this is model response" in decoded_json

del sys.modules["IPython"]
del sys.modules["IPython.display"]

@mock.patch(
"vertexai._genai._evals_visualization._is_ipython_env",
return_value=True,
)
def test_display_evaluation_result_with_non_ascii_character(self, mock_is_ipython):
"""Tests that non-ASCII characters are handled correctly."""
mock_display_module = mock.MagicMock()
mock_ipython_module = mock.MagicMock()
mock_ipython_module.display = mock_display_module
sys.modules["IPython"] = mock_ipython_module
sys.modules["IPython.display"] = mock_display_module

dataset_df = pd.DataFrame(
[
{
"prompt": "Test prompt with emoji 😊",
"response": "Test response with emoji 😊",
},
]
)
eval_dataset = vertexai_genai_types.EvaluationDataset(
eval_dataset_df=dataset_df
)
eval_result = vertexai_genai_types.EvaluationResult(
evaluation_dataset=[eval_dataset],
eval_case_results=[
vertexai_genai_types.EvalCaseResult(
eval_case_index=0,
response_candidate_results=[
vertexai_genai_types.ResponseCandidateResult(
response_index=0, metric_results={}
)
],
)
],
)

_evals_visualization.display_evaluation_result(eval_result)

mock_display_module.HTML.assert_called_once()
html_content = mock_display_module.HTML.call_args[0][0]
# Verify that the new decoding logic is present in the HTML
assert "new TextDecoder().decode" in html_content

match = re.search(r'atob\("([^"]+)"\)', html_content)
assert match
decoded_json = base64.b64decode(match.group(1)).decode("utf-8")

# JSON serialization escapes non-ASCII characters (e.g. \uXXXX), so we
# parse it back to check for the actual characters.
parsed_json = json.loads(decoded_json)
assert "Test prompt with emoji 😊" in json.dumps(
parsed_json, ensure_ascii=False
)
assert "Test response with emoji 😊" in json.dumps(
parsed_json, ensure_ascii=False
)

del sys.modules["IPython"]
del sys.modules["IPython.display"]
Expand Down Expand Up @@ -1290,7 +1356,7 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error(
) in str(excinfo.value)

@mock.patch.object(_evals_utils, "EvalDatasetLoader")
@mock.patch("vertexai._genai._evals_common.InMemorySessionService")
@mock.patch("vertexai._genai._evals_common.InMemorySessionService") # fmt: skip
@mock.patch("vertexai._genai._evals_common.Runner")
@mock.patch("vertexai._genai._evals_common.LlmAgent")
def test_run_inference_with_local_agent(
Expand Down
46 changes: 31 additions & 15 deletions vertexai/_genai/_evals_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#
"""Visualization utilities for GenAI Evaluation SDK."""

import base64
import json
import logging
import textwrap
from typing import Any, Optional

import pandas as pd
Expand Down Expand Up @@ -78,9 +80,16 @@ def stringify_cell(cell: Any) -> Optional[str]:
return df_copy


def _encode_to_base64(data: str) -> str:
"""Encodes a string to a web-safe Base64 string."""
return base64.b64encode(data.encode("utf-8")).decode("utf-8")


def _get_evaluation_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for single evaluation visualization."""
return f"""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
f"""
<!DOCTYPE html>
<html>
<head>
Expand Down Expand Up @@ -249,12 +258,11 @@ def _get_evaluation_html(eval_result_json: str) -> str:
<body>
<div class="container">
<h1>Evaluation Report</h1>
<div id="summary-section"></div>
<div id="agent-info-section"></div>
<div id="details-section"></div>
</div>
< <div id="summary-section"></div>
<div id="agent-info-section"></div>
<div id="details-section"></div>
<script>
var vizData_vertex_eval_sdk = {eval_result_json};
var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
function formatDictVals(obj) {{
if (typeof obj === 'string') return obj;
if (obj === undefined || obj === null) return '';
Expand Down Expand Up @@ -552,11 +560,14 @@ def _get_evaluation_html(eval_result_json: str) -> str:
</body>
</html>
"""
)


def _get_comparison_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for a side-by-side eval comparison."""
return f"""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
f"""
<!DOCTYPE html>
<html>
<head>
Expand Down Expand Up @@ -612,11 +623,10 @@ def _get_comparison_html(eval_result_json: str) -> str:
<body>
<div class="container">
<h1>Eval Comparison Report</h1>
<div id="summary-section"></div>
<div id="details-section"></div>
</div>
< <div id="summary-section"></div>
<div id="details-section"></div>
<script>
var vizData_vertex_eval_sdk = {eval_result_json};
var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
function renderSummary(summaryMetrics, metadata) {{
const container = document.getElementById('summary-section');
if (!summaryMetrics || summaryMetrics.length === 0) {{ container.innerHTML = '<h2>Summary Metrics</h2><p>No summary metrics.</p>'; return; }}
Expand Down Expand Up @@ -692,11 +702,14 @@ def _get_comparison_html(eval_result_json: str) -> str:
</body>
</html>
"""
)


def _get_inference_html(dataframe_json: str) -> str:
"""Returns a self-contained HTML for displaying inference results."""
return f"""
payload_b64 = _encode_to_base64(dataframe_json)
return textwrap.dedent(
f"""
<!DOCTYPE html>
<html>
<head>
Expand Down Expand Up @@ -741,12 +754,12 @@ def _get_inference_html(dataframe_json: str) -> str:
</style>
</head>
<body>
<div class="container">
< <div class="container">
<h1>Evaluation Dataset</h1>
<div id="results-table"></div>
</div>
<script>
var vizData_vertex_eval_sdk = {dataframe_json};
var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("{payload_b64}"), c => c.charCodeAt(0))));
var container_vertex_eval_sdk = document.getElementById('results-table');

function renderRubrics(cellValue) {{
Expand Down Expand Up @@ -822,6 +835,7 @@ def _get_inference_html(dataframe_json: str) -> str:
</body>
</html>
"""
)


def _extract_text_and_raw_json(content: Any) -> dict[str, str]:
Expand Down Expand Up @@ -1086,12 +1100,14 @@ def _get_status_html(status: str, error_message: Optional[str] = None) -> str:
</p>
"""

return f"""
return textwrap.dedent(
f"""
<div>
<p><b>Status:</b> {status}</p>
{error_html}
</div>
"""
)


def display_evaluation_run_status(eval_run_obj: "types.EvaluationRun") -> None:
Expand Down
Loading