diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index ec6c50a456..a9b62bebfe 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -13,9 +13,11 @@ # limitations under the License. # # pylint: disable=protected-access,bad-continuation, +import base64 import importlib import json import os +import re import statistics import sys from unittest import mock @@ -291,8 +293,72 @@ def test_display_evaluation_result_with_agent_trace_prefixes(self, mock_is_ipyth mock_display_module.HTML.assert_called_once() html_content = mock_display_module.HTML.call_args[0][0] - assert "my_function" in html_content - assert "this is model response" in html_content + match = re.search(r'atob\("([^"]+)"\)', html_content) + assert match + decoded_json = base64.b64decode(match.group(1)).decode("utf-8") + assert "my_function" in decoded_json + assert "this is model response" in decoded_json + + del sys.modules["IPython"] + del sys.modules["IPython.display"] + + @mock.patch( + "vertexai._genai._evals_visualization._is_ipython_env", + return_value=True, + ) + def test_display_evaluation_result_with_non_ascii_character(self, mock_is_ipython): + """Tests that non-ASCII characters are handled correctly.""" + mock_display_module = mock.MagicMock() + mock_ipython_module = mock.MagicMock() + mock_ipython_module.display = mock_display_module + sys.modules["IPython"] = mock_ipython_module + sys.modules["IPython.display"] = mock_display_module + + dataset_df = pd.DataFrame( + [ + { + "prompt": "Test prompt with emoji 😊", + "response": "Test response with emoji 😊", + }, + ] + ) + eval_dataset = vertexai_genai_types.EvaluationDataset( + eval_dataset_df=dataset_df + ) + eval_result = vertexai_genai_types.EvaluationResult( + evaluation_dataset=[eval_dataset], + eval_case_results=[ + vertexai_genai_types.EvalCaseResult( + eval_case_index=0, + response_candidate_results=[ + vertexai_genai_types.ResponseCandidateResult( + response_index=0, metric_results={} + ) + ], + ) + ], + ) + + _evals_visualization.display_evaluation_result(eval_result) + + mock_display_module.HTML.assert_called_once() + html_content = mock_display_module.HTML.call_args[0][0] + # Verify that the new decoding logic is present in the HTML + assert "new TextDecoder().decode" in html_content + + match = re.search(r'atob\("([^"]+)"\)', html_content) + assert match + decoded_json = base64.b64decode(match.group(1)).decode("utf-8") + + # JSON serialization escapes non-ASCII characters (e.g. \uXXXX), so we + # parse it back to check for the actual characters. + parsed_json = json.loads(decoded_json) + assert "Test prompt with emoji 😊" in json.dumps( + parsed_json, ensure_ascii=False + ) + assert "Test response with emoji 😊" in json.dumps( + parsed_json, ensure_ascii=False + ) del sys.modules["IPython"] del sys.modules["IPython.display"] @@ -1290,7 +1356,7 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error( ) in str(excinfo.value) @mock.patch.object(_evals_utils, "EvalDatasetLoader") - @mock.patch("vertexai._genai._evals_common.InMemorySessionService") + @mock.patch("vertexai._genai._evals_common.InMemorySessionService") # fmt: skip @mock.patch("vertexai._genai._evals_common.Runner") @mock.patch("vertexai._genai._evals_common.LlmAgent") def test_run_inference_with_local_agent( diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py index 04012c9394..049f7f95de 100644 --- a/vertexai/_genai/_evals_visualization.py +++ b/vertexai/_genai/_evals_visualization.py @@ -14,8 +14,10 @@ # """Visualization utilities for GenAI Evaluation SDK.""" +import base64 import json import logging +import textwrap from typing import Any, Optional import pandas as pd @@ -78,9 +80,16 @@ def stringify_cell(cell: Any) -> Optional[str]: return df_copy +def _encode_to_base64(data: str) -> str: + """Encodes a string to a web-safe Base64 string.""" + return base64.b64encode(data.encode("utf-8")).decode("utf-8") + + def _get_evaluation_html(eval_result_json: str) -> str: """Returns a self-contained HTML for single evaluation visualization.""" - return f""" + payload_b64 = _encode_to_base64(eval_result_json) + return textwrap.dedent( + f""" @@ -249,12 +258,11 @@ def _get_evaluation_html(eval_result_json: str) -> str:

Evaluation Report

-
-
-
-
+ <
+
+