# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Visualization utilities for GenAI Evaluation SDK."""
import json
import logging
from typing import Optional
from pydantic import errors
import pandas as pd
from . import types
logger = logging.getLogger(__name__)
def _is_ipython_env() -> bool:
"""Checks if the code is running in an IPython environment."""
try:
from IPython import get_ipython
return get_ipython() is not None
except ImportError:
return False
def _preprocess_df_for_json(df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
"""Prepares a DataFrame for JSON serialization by converting complex objects to strings."""
if df is None:
return None
df_copy = df.copy()
for col in df_copy.columns:
if (
df_copy[col].dtype == "object"
or df_copy[col].apply(lambda x: isinstance(x, (dict, list))).any()
):
def stringify_cell(cell):
if pd.isna(cell):
return None
if isinstance(cell, (dict, list)):
try:
return json.dumps(cell, ensure_ascii=False)
except TypeError:
return str(cell)
elif not isinstance(cell, (str, int, float, bool)):
return str(cell)
return cell
df_copy[col] = df_copy[col].apply(stringify_cell)
return df_copy
def _get_evaluation_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for single evaluation visualization."""
return f"""
Evaluation Report
"""
def _get_comparison_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for a side-by-side eval comparison."""
return f"""
Eval Comparison Report
"""
def _get_inference_html(dataframe_json: str) -> str:
"""Returns a self-contained HTML for displaying inference results."""
return f"""
Inference Results
"""
def display_evaluation_result(
eval_result_obj: types.EvaluationResult,
candidate_names: Optional[list[str]] = None,
) -> None:
"""Displays evaluation result in an IPython environment."""
if not _is_ipython_env():
logger.warning("Skipping display: not in an IPython environment.")
return
else:
from IPython import display
try:
result_dump = eval_result_obj.model_dump(
mode="json", exclude_none=True, exclude={"evaluation_dataset"}
)
except errors.PydanticSerializationError as e:
logger.error(
"Serialization Error: %s\nCould not display the evaluation "
"result due to a data serialization issue. Please check the "
"content of the EvaluationResult object.",
e,
)
return
except Exception as e:
logger.error("Failed to serialize EvaluationResult: %s", e, exc_info=True)
raise
input_dataset_list = eval_result_obj.evaluation_dataset
is_comparison = input_dataset_list and len(input_dataset_list) > 1
metadata_payload = result_dump.get("metadata", {})
metadata_payload["candidate_names"] = candidate_names or metadata_payload.get(
"candidate_names"
)
if is_comparison:
if (
input_dataset_list
and input_dataset_list[0]
and input_dataset_list[0].eval_dataset_df is not None
):
metadata_payload["dataset"] = _preprocess_df_for_json(
input_dataset_list[0].eval_dataset_df
).to_dict(orient="records")
if "eval_case_results" in result_dump:
for case_res in result_dump["eval_case_results"]:
for resp_idx, cand_res in enumerate(
case_res.get("response_candidate_results", [])
):
if (
resp_idx < len(input_dataset_list)
and input_dataset_list[resp_idx].eval_dataset_df is not None
):
df = _preprocess_df_for_json(
input_dataset_list[resp_idx].eval_dataset_df
)
case_idx = case_res.get("eval_case_index")
if (
df is not None
and case_idx is not None
and case_idx < len(df)
):
cand_res["response_text"] = df.iloc[case_idx].get(
"response"
)
win_rates = eval_result_obj.win_rates if eval_result_obj.win_rates else {}
if "summary_metrics" in result_dump:
for summary in result_dump["summary_metrics"]:
if summary.get("metric_name") in win_rates:
summary.update(win_rates[summary["metric_name"]])
result_dump["metadata"] = metadata_payload
html_content = _get_comparison_html(json.dumps(result_dump))
else:
single_dataset = input_dataset_list[0] if input_dataset_list else None
if (
single_dataset is not None
and isinstance(single_dataset, types.EvaluationDataset)
and single_dataset.eval_dataset_df is not None
):
processed_df = _preprocess_df_for_json(single_dataset.eval_dataset_df)
metadata_payload["dataset"] = processed_df.to_dict(orient="records")
if "eval_case_results" in result_dump and processed_df is not None:
for case_res in result_dump["eval_case_results"]:
case_idx = case_res.get("eval_case_index")
if (
case_idx is not None
and case_idx < len(processed_df)
and case_res.get("response_candidate_results")
):
case_res["response_candidate_results"][0][
"response_text"
] = processed_df.iloc[case_idx].get("response")
result_dump["metadata"] = metadata_payload
html_content = _get_evaluation_html(json.dumps(result_dump))
display.display(display.HTML(html_content))
def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> None:
"""Displays an evaluation dataset in an IPython environment."""
if not _is_ipython_env():
logger.warning("Skipping display: not in an IPython environment.")
return
else:
from IPython import display
if (
eval_dataset_obj.eval_dataset_df is None
or eval_dataset_obj.eval_dataset_df.empty
):
logger.warning("No inference data to display.")
return
processed_df = _preprocess_df_for_json(eval_dataset_obj.eval_dataset_df)
dataframe_json_string = json.dumps(processed_df.to_json(orient="records"))
html_content = _get_inference_html(dataframe_json_string)
display.display(display.HTML(html_content))