{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Validation Results via SpecklePy\n", "\n", "This notebook lists saved Data Validation checks for a project and builds a KPI\n", "DataFrame from the latest aggregate results.\n", "\n", "It follows [Get Data Validation Results with GraphQL](https://docs.speckle.systems/developers/api/guides/data-validation-results).\n", "\n", "## Setup with `.env`\n", "\n", "Create a `.env` file next to this notebook:\n", "\n", "```bash\n", "SPECKLE_HOST=https://app.speckle.systems\n", "SPECKLE_TOKEN=your_personal_access_token\n", "SPECKLE_PROJECT_ID=your_project_id\n", "```\n", "\n", "If you need to create a token first, see [Building with PATs](https://docs.speckle.systems/developers/authentication/pats)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install -q specklepy pandas python-dotenv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from collections import defaultdict\n", "\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "from gql import gql\n", "from specklepy.api.client import SpeckleClient\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "HOST = os.getenv(\"SPECKLE_HOST\", \"https://app.speckle.systems\")\n", "TOKEN = os.getenv(\"SPECKLE_TOKEN\")\n", "PROJECT_ID = os.getenv(\"SPECKLE_PROJECT_ID\")\n", "\n", "if not TOKEN:\n", " raise ValueError(\"Set SPECKLE_TOKEN in your .env file.\")\n", "if not PROJECT_ID:\n", " raise ValueError(\"Set SPECKLE_PROJECT_ID in your .env file.\")\n", "\n", "client = SpeckleClient(host=HOST)\n", "client.authenticate_with_token(TOKEN)\n", "print(f\"Authenticated. project={PROJECT_ID}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "DEFAULT_DISPLAY_CONFIG = {\n", " \"passThreshold\": 0.9,\n", " \"warningThreshold\": None,\n", " \"rulePassThreshold\": {},\n", " \"ruleWarningThreshold\": {},\n", " \"ruleSeverity\": {},\n", "}\n", "\n", "LIST_CHECKS_QUERY = gql(\"\"\"\n", "query ProjectValidationChecks($projectId: String!) {\n", " projectInsights(projectId: $projectId, type: \"model_validation\") {\n", " id\n", " name\n", " metadata\n", " aggregateResults(limit: 1) {\n", " timestamp\n", " summary\n", " }\n", " }\n", "}\n", "\"\"\")\n", "\n", "\n", "def run_query(query, variables: dict | None = None) -> dict:\n", " if variables:\n", " return client.httpclient.execute(query, variable_values=variables)\n", " return client.execute_query(query)\n", "\n", "\n", "def resolve_thresholds(display_config: dict, rule_name: str | None = None) -> dict:\n", " cfg = {**DEFAULT_DISPLAY_CONFIG, **(display_config or {})}\n", " if rule_name:\n", " pass_t = cfg[\"rulePassThreshold\"].get(rule_name, cfg[\"passThreshold\"])\n", " warn_map = cfg.get(\"ruleWarningThreshold\") or {}\n", " warn_t = warn_map[rule_name] if rule_name in warn_map else cfg.get(\"warningThreshold\")\n", " severity = cfg.get(\"ruleSeverity\", {}).get(rule_name, \"error\")\n", " return {\"passThreshold\": pass_t, \"warningThreshold\": warn_t, \"severity\": severity}\n", " return {\n", " \"passThreshold\": cfg[\"passThreshold\"],\n", " \"warningThreshold\": cfg.get(\"warningThreshold\"),\n", " \"severity\": \"error\",\n", " }\n", "\n", "\n", "def compute_pass_rate(summary: dict) -> float | None:\n", " pass_n = summary.get(\"pass\", 0) or 0\n", " fail_n = summary.get(\"fail\", 0) or 0\n", " total = pass_n + fail_n\n", " if total == 0:\n", " return None\n", " return pass_n / total\n", "\n", "\n", "def compute_score_pct(summary: dict) -> int | None:\n", " rate = compute_pass_rate(summary)\n", " if rate is None:\n", " return None\n", " return round(rate * 100)\n", "\n", "\n", "def compute_status(\n", " pass_rate: float | None,\n", " display_config: dict,\n", " rule_name: str | None = None,\n", ") -> str:\n", " if pass_rate is None:\n", " return \"na\"\n", " thresholds = resolve_thresholds(display_config, rule_name)\n", " if rule_name and thresholds.get(\"severity\") == \"info\":\n", " return \"info\"\n", " pass_t = thresholds[\"passThreshold\"]\n", " warn_t = thresholds.get(\"warningThreshold\")\n", " if pass_rate >= pass_t:\n", " return \"pass\"\n", " if warn_t is not None and pass_rate >= warn_t:\n", " return \"warning\"\n", " return \"fail\"\n", "\n", "\n", "def checks_to_kpi_df(checks: list[dict]) -> pd.DataFrame:\n", " rows = []\n", " for check in checks:\n", " agg_list = check.get(\"aggregateResults\") or []\n", " agg = agg_list[0] if agg_list else None\n", " summary = (agg or {}).get(\"summary\") or {}\n", " metadata = check.get(\"metadata\") or {}\n", " display_config = metadata.get(\"displayConfig\") or {}\n", " pass_rate = compute_pass_rate(summary)\n", " rows.append(\n", " {\n", " \"name\": check.get(\"name\"),\n", " \"insight_id\": check.get(\"id\"),\n", " \"pass\": summary.get(\"pass\", 0),\n", " \"fail\": summary.get(\"fail\", 0),\n", " \"score_pct\": compute_score_pct(summary),\n", " \"status\": compute_status(pass_rate, display_config),\n", " \"evaluated_at\": (agg or {}).get(\"timestamp\"),\n", " }\n", " )\n", " return pd.DataFrame(rows)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = run_query(LIST_CHECKS_QUERY, {\"projectId\": PROJECT_ID})\n", "checks = result.get(\"projectInsights\") or []\n", "kpi_df = checks_to_kpi_df(checks)\n", "kpi_df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }