# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Visualization utilities for GenAI Evaluation SDK.""" import json import logging from typing import Optional from pydantic import errors import pandas as pd from . import types logger = logging.getLogger(__name__) def _is_ipython_env() -> bool: """Checks if the code is running in an IPython environment.""" try: from IPython import get_ipython return get_ipython() is not None except ImportError: return False def _preprocess_df_for_json(df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]: """Prepares a DataFrame for JSON serialization by converting complex objects to strings.""" if df is None: return None df_copy = df.copy() for col in df_copy.columns: if ( df_copy[col].dtype == "object" or df_copy[col].apply(lambda x: isinstance(x, (dict, list))).any() ): def stringify_cell(cell): if pd.isna(cell): return None if isinstance(cell, (dict, list)): try: return json.dumps(cell, ensure_ascii=False) except TypeError: return str(cell) elif not isinstance(cell, (str, int, float, bool)): return str(cell) return cell df_copy[col] = df_copy[col].apply(stringify_cell) return df_copy def _get_evaluation_html(eval_result_json: str) -> str: """Returns a self-contained HTML for single evaluation visualization.""" return f""" Evaluation Report

Evaluation Report

""" def _get_comparison_html(eval_result_json: str) -> str: """Returns a self-contained HTML for a side-by-side eval comparison.""" return f""" Eval Comparison Report

Eval Comparison Report

""" def _get_inference_html(dataframe_json: str) -> str: """Returns a self-contained HTML for displaying inference results.""" return f""" Inference Results

Inference Results

""" def display_evaluation_result( eval_result_obj: types.EvaluationResult, candidate_names: Optional[list[str]] = None, ) -> None: """Displays evaluation result in an IPython environment.""" if not _is_ipython_env(): logger.warning("Skipping display: not in an IPython environment.") return else: from IPython import display try: result_dump = eval_result_obj.model_dump( mode="json", exclude_none=True, exclude={"evaluation_dataset"} ) except errors.PydanticSerializationError as e: logger.error( "Serialization Error: %s\nCould not display the evaluation " "result due to a data serialization issue. Please check the " "content of the EvaluationResult object.", e, ) return except Exception as e: logger.error("Failed to serialize EvaluationResult: %s", e, exc_info=True) raise input_dataset_list = eval_result_obj.evaluation_dataset is_comparison = input_dataset_list and len(input_dataset_list) > 1 metadata_payload = result_dump.get("metadata", {}) metadata_payload["candidate_names"] = candidate_names or metadata_payload.get( "candidate_names" ) if is_comparison: if ( input_dataset_list and input_dataset_list[0] and input_dataset_list[0].eval_dataset_df is not None ): metadata_payload["dataset"] = _preprocess_df_for_json( input_dataset_list[0].eval_dataset_df ).to_dict(orient="records") if "eval_case_results" in result_dump: for case_res in result_dump["eval_case_results"]: for resp_idx, cand_res in enumerate( case_res.get("response_candidate_results", []) ): if ( resp_idx < len(input_dataset_list) and input_dataset_list[resp_idx].eval_dataset_df is not None ): df = _preprocess_df_for_json( input_dataset_list[resp_idx].eval_dataset_df ) case_idx = case_res.get("eval_case_index") if ( df is not None and case_idx is not None and case_idx < len(df) ): cand_res["response_text"] = df.iloc[case_idx].get( "response" ) win_rates = eval_result_obj.win_rates if eval_result_obj.win_rates else {} if "summary_metrics" in result_dump: for summary in result_dump["summary_metrics"]: if summary.get("metric_name") in win_rates: summary.update(win_rates[summary["metric_name"]]) result_dump["metadata"] = metadata_payload html_content = _get_comparison_html(json.dumps(result_dump)) else: single_dataset = input_dataset_list[0] if input_dataset_list else None if ( single_dataset is not None and isinstance(single_dataset, types.EvaluationDataset) and single_dataset.eval_dataset_df is not None ): processed_df = _preprocess_df_for_json(single_dataset.eval_dataset_df) metadata_payload["dataset"] = processed_df.to_dict(orient="records") if "eval_case_results" in result_dump and processed_df is not None: for case_res in result_dump["eval_case_results"]: case_idx = case_res.get("eval_case_index") if ( case_idx is not None and case_idx < len(processed_df) and case_res.get("response_candidate_results") ): case_res["response_candidate_results"][0][ "response_text" ] = processed_df.iloc[case_idx].get("response") result_dump["metadata"] = metadata_payload html_content = _get_evaluation_html(json.dumps(result_dump)) display.display(display.HTML(html_content)) def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> None: """Displays an evaluation dataset in an IPython environment.""" if not _is_ipython_env(): logger.warning("Skipping display: not in an IPython environment.") return else: from IPython import display if ( eval_dataset_obj.eval_dataset_df is None or eval_dataset_obj.eval_dataset_df.empty ): logger.warning("No inference data to display.") return processed_df = _preprocess_df_for_json(eval_dataset_obj.eval_dataset_df) dataframe_json_string = json.dumps(processed_df.to_json(orient="records")) html_content = _get_inference_html(dataframe_json_string) display.display(display.HTML(html_content))