{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Data Validation Results via SpecklePy\n",
        "\n",
        "This notebook lists saved Data Validation checks for a project and builds a KPI\n",
        "DataFrame from the latest aggregate results.\n",
        "\n",
        "It follows [Get Data Validation Results with GraphQL](https://docs.speckle.systems/developers/api/guides/data-validation-results).\n",
        "\n",
        "## Setup with `.env`\n",
        "\n",
        "Create a `.env` file next to this notebook:\n",
        "\n",
        "```bash\n",
        "SPECKLE_HOST=https://app.speckle.systems\n",
        "SPECKLE_TOKEN=your_personal_access_token\n",
        "SPECKLE_PROJECT_ID=your_project_id\n",
        "```\n",
        "\n",
        "If you need to create a token first, see [Building with PATs](https://docs.speckle.systems/developers/authentication/pats)."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%pip install -q specklepy pandas python-dotenv"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "from collections import defaultdict\n",
        "\n",
        "import pandas as pd\n",
        "from dotenv import load_dotenv\n",
        "from gql import gql\n",
        "from specklepy.api.client import SpeckleClient\n",
        "\n",
        "load_dotenv()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "HOST = os.getenv(\"SPECKLE_HOST\", \"https://app.speckle.systems\")\n",
        "TOKEN = os.getenv(\"SPECKLE_TOKEN\")\n",
        "PROJECT_ID = os.getenv(\"SPECKLE_PROJECT_ID\")\n",
        "\n",
        "if not TOKEN:\n",
        "    raise ValueError(\"Set SPECKLE_TOKEN in your .env file.\")\n",
        "if not PROJECT_ID:\n",
        "    raise ValueError(\"Set SPECKLE_PROJECT_ID in your .env file.\")\n",
        "\n",
        "client = SpeckleClient(host=HOST)\n",
        "client.authenticate_with_token(TOKEN)\n",
        "print(f\"Authenticated. project={PROJECT_ID}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "DEFAULT_DISPLAY_CONFIG = {\n",
        "    \"passThreshold\": 0.9,\n",
        "    \"warningThreshold\": None,\n",
        "    \"rulePassThreshold\": {},\n",
        "    \"ruleWarningThreshold\": {},\n",
        "    \"ruleSeverity\": {},\n",
        "}\n",
        "\n",
        "LIST_CHECKS_QUERY = gql(\"\"\"\n",
        "query ProjectValidationChecks($projectId: String!) {\n",
        "  projectInsights(projectId: $projectId, type: \"model_validation\") {\n",
        "    id\n",
        "    name\n",
        "    metadata\n",
        "    aggregateResults(limit: 1) {\n",
        "      timestamp\n",
        "      summary\n",
        "    }\n",
        "  }\n",
        "}\n",
        "\"\"\")\n",
        "\n",
        "\n",
        "def run_query(query, variables: dict | None = None) -> dict:\n",
        "    if variables:\n",
        "        return client.httpclient.execute(query, variable_values=variables)\n",
        "    return client.execute_query(query)\n",
        "\n",
        "\n",
        "def resolve_thresholds(display_config: dict, rule_name: str | None = None) -> dict:\n",
        "    cfg = {**DEFAULT_DISPLAY_CONFIG, **(display_config or {})}\n",
        "    if rule_name:\n",
        "        pass_t = cfg[\"rulePassThreshold\"].get(rule_name, cfg[\"passThreshold\"])\n",
        "        warn_map = cfg.get(\"ruleWarningThreshold\") or {}\n",
        "        warn_t = warn_map[rule_name] if rule_name in warn_map else cfg.get(\"warningThreshold\")\n",
        "        severity = cfg.get(\"ruleSeverity\", {}).get(rule_name, \"error\")\n",
        "        return {\"passThreshold\": pass_t, \"warningThreshold\": warn_t, \"severity\": severity}\n",
        "    return {\n",
        "        \"passThreshold\": cfg[\"passThreshold\"],\n",
        "        \"warningThreshold\": cfg.get(\"warningThreshold\"),\n",
        "        \"severity\": \"error\",\n",
        "    }\n",
        "\n",
        "\n",
        "def compute_pass_rate(summary: dict) -> float | None:\n",
        "    pass_n = summary.get(\"pass\", 0) or 0\n",
        "    fail_n = summary.get(\"fail\", 0) or 0\n",
        "    total = pass_n + fail_n\n",
        "    if total == 0:\n",
        "        return None\n",
        "    return pass_n / total\n",
        "\n",
        "\n",
        "def compute_score_pct(summary: dict) -> int | None:\n",
        "    rate = compute_pass_rate(summary)\n",
        "    if rate is None:\n",
        "        return None\n",
        "    return round(rate * 100)\n",
        "\n",
        "\n",
        "def compute_status(\n",
        "    pass_rate: float | None,\n",
        "    display_config: dict,\n",
        "    rule_name: str | None = None,\n",
        ") -> str:\n",
        "    if pass_rate is None:\n",
        "        return \"na\"\n",
        "    thresholds = resolve_thresholds(display_config, rule_name)\n",
        "    if rule_name and thresholds.get(\"severity\") == \"info\":\n",
        "        return \"info\"\n",
        "    pass_t = thresholds[\"passThreshold\"]\n",
        "    warn_t = thresholds.get(\"warningThreshold\")\n",
        "    if pass_rate >= pass_t:\n",
        "        return \"pass\"\n",
        "    if warn_t is not None and pass_rate >= warn_t:\n",
        "        return \"warning\"\n",
        "    return \"fail\"\n",
        "\n",
        "\n",
        "def checks_to_kpi_df(checks: list[dict]) -> pd.DataFrame:\n",
        "    rows = []\n",
        "    for check in checks:\n",
        "        agg_list = check.get(\"aggregateResults\") or []\n",
        "        agg = agg_list[0] if agg_list else None\n",
        "        summary = (agg or {}).get(\"summary\") or {}\n",
        "        metadata = check.get(\"metadata\") or {}\n",
        "        display_config = metadata.get(\"displayConfig\") or {}\n",
        "        pass_rate = compute_pass_rate(summary)\n",
        "        rows.append(\n",
        "            {\n",
        "                \"name\": check.get(\"name\"),\n",
        "                \"insight_id\": check.get(\"id\"),\n",
        "                \"pass\": summary.get(\"pass\", 0),\n",
        "                \"fail\": summary.get(\"fail\", 0),\n",
        "                \"score_pct\": compute_score_pct(summary),\n",
        "                \"status\": compute_status(pass_rate, display_config),\n",
        "                \"evaluated_at\": (agg or {}).get(\"timestamp\"),\n",
        "            }\n",
        "        )\n",
        "    return pd.DataFrame(rows)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "result = run_query(LIST_CHECKS_QUERY, {\"projectId\": PROJECT_ID})\n",
        "checks = result.get(\"projectInsights\") or []\n",
        "kpi_df = checks_to_kpi_df(checks)\n",
        "kpi_df"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "pygments_lexer": "ipython3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}