package mcp

import (
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"regexp"
	"sort"
	"strings"
	"time"

	"github.com/modelcontextprotocol/go-sdk/mcp"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
	"k8s.io/apimachinery/pkg/labels"
	"k8s.io/apimachinery/pkg/runtime"

	"github.com/skyhook-io/radar/internal/filter"
	"github.com/skyhook-io/radar/internal/helm"
	"github.com/skyhook-io/radar/internal/issues"
	"github.com/skyhook-io/radar/internal/k8s"
	"github.com/skyhook-io/radar/internal/resourcecontextrefs"
	"github.com/skyhook-io/radar/internal/search"
	"github.com/skyhook-io/radar/internal/summarycontext"
	"github.com/skyhook-io/radar/internal/timeline"
	aicontext "github.com/skyhook-io/radar/pkg/ai/context"
	"github.com/skyhook-io/radar/pkg/k8score"
	"github.com/skyhook-io/radar/pkg/resourcecontext"
	topology "github.com/skyhook-io/radar/pkg/topology"
)

// registerTools registers every MCP tool exposed at /mcp. The user-facing
// setup dialog catalog (web/src/components/home/mcpToolCatalog.ts) must list
// the same set — TestSetupDialogCoversAllTools fails CI when they diverge, so
// add/remove the catalog entry alongside any change here.
func registerTools(server *mcp.Server) {
	boolPtr := func(b bool) *bool { return &b }
	// All radar tools operate against the connected cluster (closed world),
	// not the open internet — set OpenWorldHint=false so MCP clients that
	// gate on this hint don't treat radar like a web-search tool.
	readOnly := &mcp.ToolAnnotations{
		ReadOnlyHint:  true,
		OpenWorldHint: boolPtr(false),
	}
	// writeTool reflects worst-case action across the tools that share it:
	// apply_resource can force SSA ownership; manage_node drains evict pods; manage_workload
	// rollback/restart overwrites desired state or terminates pods;
	// manage_gitops terminate/rollback aborts or overwrites; manage_cronjob
	// suspend mutates schedule state (not additive).
	writeTool := &mcp.ToolAnnotations{
		DestructiveHint: boolPtr(true),
		OpenWorldHint:   boolPtr(false),
	}

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_dashboard",
		Description: "Use for inventory-style cluster or namespace health triage, like " +
			"`kubectl get all` plus detected problems and warning events in one call. " +
			"Returns resource counts, failing pods, unhealthy workloads, recent Warning " +
			"events, and Helm release status so you can rank likely suspects before " +
			"calling get_resource or logs. Routing: unknown broken thing -> issues; " +
			"content/name search -> search; service routing/dependencies -> get_topology " +
			"or get_neighborhood; inventory/counts/Helm/events overview -> get_dashboard. " +
			"Use namespace for app-local triage; omit it when the root may be cluster-scoped.",
		Annotations: readOnly,
	}, logToolCall("get_dashboard", handleGetDashboard))

	mcp.AddTool(server, &mcp.Tool{
		Name: "top_resources",
		Description: "Use when investigating high CPU, memory pressure, OOMKills, " +
			"slow services, noisy pods, or uneven node load. Returns live metrics " +
			"ranked like `kubectl top pods|nodes | sort`, joined with Kubernetes " +
			"context: pod status, readiness, restarts, owner workload, requests, and " +
			"limits. kind=pods ranks individual Pods, kind=workloads aggregates Pods " +
			"to Deployments/StatefulSets/DaemonSets/Jobs, and kind=nodes ranks Nodes. " +
			"Use before reading logs when the symptom mentions CPU, memory, GC, OOM, " +
			"latency, or load.",
		Annotations: readOnly,
	}, logToolCall("top_resources", handleTopResources))

	mcp.AddTool(server, &mcp.Tool{
		Name: "list_resources",
		Description: "Use for a jq-like namespace sweep when you know the resource kind " +
			"(pods/po, deployments/deploy, services/svc, configmaps/cm, CRDs). Returns compact Kubernetes-shaped " +
			"rows plus summaryContext by default (managedBy, health, issueCount) so you can " +
			"compare many similar resources and pick suspects before calling get_resource. " +
			"For unknown kind/name searches, use search. For broad health triage, use " +
			"get_dashboard or issues first.",
		Annotations: readOnly,
	}, logToolCall("list_resources", handleListResources))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_resource",
		Description: "Use AFTER narrowing to one resource. Returns the resource's " +
			"Kubernetes-shaped spec/status/metadata plus resourceContext when available " +
			"(relationships, refs, issue/audit/policy rollups). This is the drill-down " +
			"tool, not the best first call for broad incidents. Start with issues, " +
			"get_dashboard, search, or list_resources to rank candidates; then call " +
			"get_resource for the exact object. If you are looking for a string across " +
			"ConfigMaps, CRD specs, env refs, or object content, use search instead of " +
			"fetching resources one by one. Use the group parameter for ambiguous " +
			"kinds such as Knative Service vs core Service.",
		Annotations: readOnly,
	}, logToolCall("get_resource", handleGetResource))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_topology",
		Description: "Use to map a multi-service incident or dependency graph, preferably " +
			"scoped to a namespace. " +
			"Returns Kubernetes resource nodes and edges (Services, workloads, Pods, " +
			"Ingresses, ConfigMaps, Secrets, owners) so you can see service-to-workload " +
			"traffic and ownership relationships instead of inspecting resources one by one. " +
			"Use view=traffic for routing/connectivity questions and view=resources for " +
			"ownership/deployment hierarchy. Always specify namespace unless you specifically " +
			"need a cross-namespace graph. If you already know the suspicious root, use " +
			"get_neighborhood for a smaller focused graph.",
		Annotations: readOnly,
	}, logToolCall("get_topology", handleGetTopology))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_neighborhood",
		Description: "Use when investigating cross-resource failures around a known " +
			"resource: service routing, targetPort/selector/endpoints problems, dependency " +
			"timeouts, config/secret refs, owner chains, or traffic not reaching pods. " +
			"Returns the BFS-expanded topology neighborhood around one root, which is " +
			"usually cheaper and clearer than get_topology once you have a suspect. " +
			"Typical flow: issues/search/list_resources identify a Service or workload, " +
			"then get_neighborhood traces its upstream/downstream Services, workloads, " +
			"Pods, refs, and owners. Profile auto (default) picks a bounded edge set " +
			"from the root kind; profile all expands every edge type and is heavier, " +
			"use it only when auto produced a too-narrow neighborhood. Hops defaults to " +
			"1 and maxes at 2. Nodes are RBAC-filtered; denied neighbors appear only as " +
			"aggregate omitted counts.",
		Annotations: readOnly,
	}, logToolCall("get_neighborhood", handleGetNeighborhood))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_events",
		Description: "Use for recent Kubernetes Warning events after an overview points " +
			"at a namespace or resource, or when the symptom is scheduling, pulling images, " +
			"restarts, failed mounts, readiness, or controller errors. Events are deduplicated " +
			"and sorted by recency with reason, message, and count. For a ranked issue list " +
			"that includes problems/conditions, use issues first.",
		Annotations: readOnly,
	}, logToolCall("get_events", handleGetEvents))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_pod_logs",
		Description: "Use only after narrowing to a specific Pod/container. Returns " +
			"diagnostically relevant log lines (errors, panics, stack traces, warnings) " +
			"or falls back to recent tail lines. Set grep to server-side filter like " +
			"`kubectl logs | grep PATTERN` when you know an error string, request path, " +
			"service name, or trace id. For broad incidents, first use issues, " +
			"get_dashboard, search, list_resources, or get_neighborhood to avoid reading " +
			"logs from many unrelated pods. If the target is a config value, feature flag, " +
			"CRD field, env ref, or YAML/spec content, use search rather than logs.",
		Annotations: readOnly,
	}, logToolCall("get_pod_logs", handleGetPodLogs))

	mcp.AddTool(server, &mcp.Tool{
		Name: "diagnose",
		Description: "Use when the agent's decision is 'this workload is broken — find the " +
			"root cause / localize the failure'. Bundles for a single Pod/Deployment/" +
			"StatefulSet/DaemonSet: the resource (Kubernetes-shaped detail) + diagnostic " +
			"resourceContext (managedBy, exposes, selectedBy, uses, runsOn, " +
			"issue/audit/policy rollups) + current AND previous container logs across the " +
			"workload's pods + recent Warning events filtered to this resource + a " +
			"startupBlockers section when the workload can't reach Running (unschedulable " +
			"with the offending node constraint named, admission/quota rejection, or a " +
			"post-bind CNI/volume stall). Use for " +
			"CrashLoopBackOff, OOMKills, failed deploys, image-pull errors, readiness " +
			"flaps, scheduling failures, error-spewing services, or any workload " +
			"root-causing where you would otherwise call get_resource → events → " +
			"get_pod_logs → get_pod_logs(previous=true) in sequence — this returns the " +
			"same data in one round-trip. If you only need ONE facet (e.g. just spec, " +
			"just logs), prefer the targeted tool. Not for CRDs or non-workload kinds; " +
			"use get_resource (with optional include=events) for those.",
		Annotations: readOnly,
	}, logToolCall("diagnose", handleDiagnose))

	mcp.AddTool(server, &mcp.Tool{
		Name: "list_namespaces",
		Description: "List all Kubernetes namespaces with their status. " +
			"Use to discover available namespaces before filtering other queries.",
		Annotations: readOnly,
	}, logToolCall("list_namespaces", handleListNamespaces))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_changes",
		Description: "Use when the symptom is 'this worked earlier' or 'something broke " +
			"after a deploy/config change.' Returns a chronological feed of resource " +
			"creates, updates, and deletes such as image changes, ConfigMap edits, scale " +
			"events, label edits, and rollout churn. This is often faster than reading " +
			"ReplicaSet histories or individual audit/log streams. Pair with since to " +
			"bound the window; filter by namespace, kind, or name when you know the scope. " +
			"Omit namespace when the relevant change may be outside the app namespace.",
		Annotations: readOnly,
	}, logToolCall("get_changes", handleGetChanges))

	// --- Audit tool (read-only) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_cluster_audit",
		Description: "Use when the agent's decision is 'is this cluster well-configured / " +
			"compliant?' — STATIC CONFIG POSTURE, not live operational state. Returns " +
			"best-practice findings: Security (runAsRoot, privileged containers, dangerous " +
			"capabilities, hostPath/hostNetwork, secret-in-ConfigMap), Reliability (single " +
			"replicas, missing PDB, missing TopologySpread, podHARisk, Service/Ingress " +
			"without matching backends, stuckTerminating, deprecatedAPIVersion), and " +
			"Efficiency (missing resource requests/limits, orphaned ConfigMaps/Secrets, " +
			"under/over-utilization). Each finding has remediation guidance. " +
			"INDEPENDENT of operational health: a healthy pod can have many audit findings " +
			"(badly configured but working), a crashing pod can have zero (cleanly " +
			"configured but failing). For 'what's broken right now?' use the issues tool. " +
			"Respects user's audit settings (ignored namespaces, disabled checks). Filter " +
			"by namespace, category, or severity. Resources absent from findings should " +
			"NOT be reported as non-compliant — empty findings for a scope means no " +
			"violations, not a failed check.",
		Annotations: readOnly,
	}, logToolCall("get_cluster_audit", handleGetAudit))

	// --- Helm tools (read-only) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "list_helm_releases",
		Description: "List all Helm releases in the cluster with their status and health. " +
			"Returns release name, namespace, chart, version, status (deployed/failed/pending), " +
			"and resource health (healthy/degraded/unhealthy). " +
			"Use to get an overview of what's deployed via Helm before inspecting individual releases.",
		Annotations: readOnly,
	}, logToolCall("list_helm_releases", handleListHelmReleases))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_helm_release",
		Description: "Get detailed information about a specific Helm release including owned resources " +
			"and their status. Optionally include values, revision history, or manifest diff between revisions " +
			"using the 'include' parameter (comma-separated: values, history, diff). " +
			"diff_revision_1 and diff_revision_2 are only used when include contains diff.",
		Annotations: readOnly,
	}, logToolCall("get_helm_release", handleGetHelmRelease))

	// --- Packages tool (read-only) ---
	//
	// Higher-level than list_helm_releases: collapses Helm releases,
	// workload labels, CRD registrations, and GitOps declarations
	// (Argo Applications + Flux HelmReleases/Kustomizations) into a
	// unified "what's installed in this cluster" view with source
	// provenance. Each row's `sources` field shows which detection
	// channels voted "this is installed" — H, L, C, A, F.
	mcp.AddTool(server, &mcp.Tool{
		Name: "list_packages",
		Description: "List installed packages (Helm releases, label-managed workloads, CRDs, " +
			"Argo Applications, Flux HelmReleases + Kustomizations) with their sources, " +
			"versions, and health. Each row carries a `sources` array (H=Helm API, " +
			"L=workload labels, C=CRD registrations, A=Argo declaration, F=Flux declaration) " +
			"so the caller can see WHY this package is detected, plus a `contributors` " +
			"array with per-source detail (each source's view of health/version, plus the " +
			"GitOps controller resource identity in declarationName/declarationNamespace " +
			"for sources A and F). Aggregated row-level health is worst-of contributors; " +
			"row-level version is first-source-priority — read `contributors` to detect " +
			"same-cluster disagreement. Use to answer 'what's installed?' / 'what version " +
			"of cert-manager is running?' / 'are there orphaned operators?' in a single " +
			"call instead of combining list_helm_releases + list_resources + manual merge. " +
			"Filter by namespace, source, or chart substring. Response includes " +
			"`sourcesErrored` listing any sources that failed (e.g. RBAC denied for Helm " +
			"release secrets, Helm client not initialized, GitOps informer errors other " +
			"than the controller's CRDs being absent). When this is non-empty, results " +
			"are still returned but are partial — fewer rows than expected may indicate a " +
			"dropped source rather than nothing installed. ArgoCD/FluxCD CRDs that are " +
			"simply not installed in the cluster do NOT appear in sourcesErrored.",
		Annotations: readOnly,
	}, logToolCall("list_packages", handleListPackages))

	// --- Issues (read-only) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "issues",
		Description: "Use when the agent's decision is 'what's broken right now?' — LIVE " +
			"OPERATIONAL STATE, not config posture. Returns a ranked list of currently " +
			"failing resources: failing Deployments/StatefulSets/CronJobs/HPAs/Nodes/Jobs/" +
			"PVCs, dangling-reference errors like Pod→missing PVC/CM/Secret/SA, HPA→missing " +
			"scaleTargetRef, Ingress→missing backend Service, RoleBinding→missing Role, " +
			"webhook→missing Service, pod startup blockers — why a Pod can't reach Running: " +
			"unschedulable (arch/taint/resources/affinity), admission-rejected " +
			"(quota/PodSecurity/webhook), or stuck post-bind (CNI/volume), and False " +
			".status.conditions on CRDs from Argo/Flux/Knative/Crossplane/cert-manager/KEDA. " +
			"Severity normalized to critical/warning. This is one curated stream — there is " +
			"no source filter; each row carries a `source` label (problem|missing_ref|" +
			"scheduling|condition) you can slice on via the CEL filter= if needed. Some " +
			"rows include `diagnostic_context`: deterministic facts such as explicit " +
			"missing refs, selected backend issues, or workload rollups; treat these as " +
			"triage context, not proof of root cause. " +
			"For raw Kubernetes Warning events use get_events; for static best-practice / " +
			"security-posture findings (runAsRoot, missing PDB, no probes, missing resource " +
			"limits) use get_cluster_audit — a separate axis that must never be conflated (a " +
			"healthy pod can have many audit findings; a crashing pod can have zero). Kyverno " +
			"PolicyReport violations are not in either — they surface per-resource via " +
			"get_resource's resourceContext policy rollup. " +
			"After identifying a suspect issue, call diagnose when the affected resource " +
			"is a workload (Pod/Deployment/StatefulSet/DaemonSet) — it bundles spec + " +
			"logs + events + context in one call. For non-workload kinds, call " +
			"get_resource. Use get_neighborhood when the failure likely crosses " +
			"Services/workloads/Pods/dependencies. Use namespace for app-local triage; " +
			"omit it when the root may be cluster-scoped or outside the app namespace.",
		Annotations: readOnly,
	}, logToolCall("issues", handleIssuesTool))

	// --- Search (read-only) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "search",
		Description: "Find resources by content/term match when you do not know which object " +
			"contains a string, config key, env ref, image, label/annotation value, " +
			"ConfigMap data, CRD field, or status message. Tokens are AND'd. " +
			"Secret content is intentionally NOT indexed — Secret names match by " +
			"metadata, but data values won't appear in snippets to avoid leaking " +
			"secret material through search results. " +
			"Examples: `readinessProbe user-service`, `image:flagd`, `kind:Pod label:app=cart error`. " +
			"Modifiers such as kind:Pod, ns:foo, label:app=bar, and image:redis narrow a " +
			"term match; modifier-only queries are enumeration, so use list_resources when " +
			"you already know the kind/namespace. Returns ranked hits with snippets and " +
			"summaryContext. Use CEL filter for structural predicates. Searches typed kinds " +
			"plus warmed CRDs; cold CRDs need list_resources first.",
		Annotations: readOnly,
	}, logToolCall("search", handleSearch))

	// --- RBAC reverse-lookup (read-only) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_subject_permissions",
		Description: "Get the effective RBAC permissions of a Kubernetes subject " +
			"(ServiceAccount, User, or Group) — what can this principal do across " +
			"the cluster. Returns: the bindings that grant access (each pointing at " +
			"its Role/ClusterRole), a deduplicated flat rule list, and (for " +
			"ServiceAccounts) the Pods running as this SA. " +
			"Use this to answer 'is this SA over-privileged?', 'why can X do Y?', " +
			"or 'what's the blast radius if this Pod is compromised?'. " +
			"For ServiceAccount, namespace is required. For User/Group, omit namespace " +
			"(those are external identities, not namespaced resources). " +
			"Inherited grants from implicit group memberships (system:authenticated, " +
			"system:serviceaccounts) are included for ServiceAccount subjects with the " +
			"`inheritedFromGroup` field set per binding so you can distinguish direct " +
			"from inherited grants.",
		Annotations: readOnly,
	}, logToolCall("get_subject_permissions", handleGetSubjectPermissions))

	mcp.AddTool(server, &mcp.Tool{
		Name: "get_workload_logs",
		Description: "Get aggregated logs from all pods of a workload (Deployment, StatefulSet, " +
			"or DaemonSet). Logs are collected from all matching pods concurrently, then " +
			"server-side filtered to errors, warnings, panics, and stack traces using " +
			"deterministic regex patterns and deduplicated. Set grep for additional " +
			"server-side filtering before that summary stage, like `kubectl logs | grep PATTERN`. " +
			"More useful than get_pod_logs when you need logs across all replicas of a workload. " +
			"If the target is a config value, feature flag, CRD field, env ref, or YAML/spec " +
			"content, use search rather than logs.",
		Annotations: readOnly,
	}, logToolCall("get_workload_logs", handleGetWorkloadLogs))

	// --- Write tools (workload, cronjob, gitops) ---

	mcp.AddTool(server, &mcp.Tool{
		Name: "manage_workload",
		Description: "Perform operations on a Kubernetes workload (Deployment, StatefulSet, or DaemonSet). " +
			"Supported actions: 'restart' triggers a rolling restart, 'scale' changes the replica count " +
			"(requires 'replicas' parameter), 'rollback' reverts to a previous revision " +
			"(requires 'revision' parameter). Use list_resources or get_dashboard first to identify the target.",
		Annotations: writeTool,
	}, logToolCall("manage_workload", handleManageWorkload))

	mcp.AddTool(server, &mcp.Tool{
		Name: "manage_cronjob",
		Description: "Perform operations on a Kubernetes CronJob. Supported actions: " +
			"'trigger' creates a manual Job run from the CronJob's template, " +
			"'suspend' pauses the CronJob schedule (no new Jobs will be created), " +
			"'resume' re-enables a suspended CronJob's schedule.",
		Annotations: writeTool,
	}, logToolCall("manage_cronjob", handleManageCronJob))

	mcp.AddTool(server, &mcp.Tool{
		Name: "manage_gitops",
		Description: "Perform operations on GitOps resources (ArgoCD or FluxCD). " +
			"For ArgoCD: actions are 'sync' (trigger deployment), 'refresh', 'terminate', 'rollback', " +
			"'suspend' (disable auto-sync), 'resume' (re-enable auto-sync). Resource kind is always Application. " +
			"For FluxCD: actions are 'reconcile' (trigger sync), 'sync-with-source', 'suspend', 'resume'. " +
			"Requires 'kind' parameter (kustomization, helmrelease, gitrepository, etc.).",
		Annotations: writeTool,
	}, logToolCall("manage_gitops", handleManageGitOps))

	mcp.AddTool(server, &mcp.Tool{
		Name: "apply_resource",
		Description: "Create or update a Kubernetes resource from a YAML manifest. " +
			"In 'apply' mode (default), performs a server-side apply with FieldManager=radar " +
			"and reports field ownership conflicts instead of taking ownership by default. " +
			"Set force=true only when you intend to take field ownership from other managers " +
			"(Helm, Flux, GitOps controllers, kubectl). " +
			"In 'create' mode, performs a strict create that fails if the resource already exists. " +
			"Supports multi-document YAML separated by '---'. " +
			"Use dry_run to validate without persisting changes and preview the server-side result. " +
			"Multi-document failures return per-document status because earlier documents may already be applied. " +
			"By default returns compact " +
			"post-mutation state, submitted-vs-live spec differences, rollout/pod status " +
			"for workloads, and current related issues; " +
			"set verify=false only when you need a terse write result.",
		Annotations: writeTool,
	}, logToolCall("apply_resource", handleApplyResource))

	mcp.AddTool(server, &mcp.Tool{
		Name: "patch_resource",
		Description: "Patch one existing Kubernetes resource with JSON Patch, JSON Merge Patch, or strategic merge patch. " +
			"Use this for precise field/list mutations such as removing a bad dnsConfig, hostPort, " +
			"initContainers field, sidecar container, nodeSelector, or replacing one scalar value. " +
			"Prefer this over apply_resource when you know the exact field to mutate and do not want " +
			"to rewrite the full manifest or take broad server-side-apply ownership. " +
			"For patch_type=json, patch must be an RFC 6902 JSON Patch array. For patch_type=merge, " +
			"patch must be a JSON object. For patch_type=strategic, use a JSON object against built-in " +
			"Kubernetes kinds when you need name-keyed list merging, such as editing one container. " +
			"By default returns compact post-patch state and " +
			"dry-run preview diffs; JSON Patch calls also include per-operation field checks. Set " +
			"verify=false only when you need a terse write result.",
		Annotations: writeTool,
	}, logToolCall("patch_resource", handlePatchResource))

	mcp.AddTool(server, &mcp.Tool{
		Name: "manage_node",
		Description: "Perform operations on a Kubernetes node. " +
			"Supported actions: 'cordon' marks the node as unschedulable (no new pods will be scheduled), " +
			"'uncordon' marks the node as schedulable again, " +
			"'drain' cordons the node and evicts all non-DaemonSet pods. " +
			"Drain options: 'delete_empty_dir_data' (allow evicting pods with emptyDir volumes), " +
			"'force' (evict pods not managed by a controller), 'timeout' (seconds, default 60).",
		Annotations: writeTool,
	}, logToolCall("manage_node", handleManageNode))
}

// Tool input types

type dashboardInput struct {
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to a specific namespace. Use when triaging one app/tenant namespace before drilling into individual resources."`
}

type topResourcesInput struct {
	Kind      string `json:"kind,omitempty" jsonschema:"what to rank: pods (default), workloads, or nodes"`
	Namespace string `json:"namespace,omitempty" jsonschema:"filter pods/workloads to a namespace. Required for namespace-restricted users unless they have cluster-wide namespace access."`
	Sort      string `json:"sort,omitempty" jsonschema:"sort by cpu (default) or memory"`
	Limit     int    `json:"limit,omitempty" jsonschema:"max rows returned, default 20, max 100"`
}

type listResourcesInput struct {
	Kind      string `json:"kind" jsonschema:"resource kind to list for a broad sweep, e.g. pods/po, deployments/deploy, services/svc, configmaps/cm. Prefer this before get_resource when comparing many same-kind objects."`
	Group     string `json:"group,omitempty" jsonschema:"API group when the kind is ambiguous (e.g. serving.knative.dev for Knative Service vs core Service)"`
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to a specific namespace for app-scoped triage"`
	Context   string `json:"context,omitempty" jsonschema:"per-row context: default attaches summaryContext (managedBy + health + issueCount) for suspect ranking; 'none' returns bare rows"`
}

type getResourceInput struct {
	Kind      string `json:"kind" jsonschema:"resource kind, e.g. pod, deployment, service"`
	Group     string `json:"group,omitempty" jsonschema:"API group when the kind is ambiguous (e.g. cluster.x-k8s.io for CAPI Cluster vs CNPG Cluster)"`
	Namespace string `json:"namespace,omitempty" jsonschema:"namespace for namespaced kinds. Leave empty for cluster-scoped kinds (Node, ClusterRole, ClusterRoleBinding, IngressClass, PriorityClass, StorageClass, etc.)."`
	Name      string `json:"name" jsonschema:"resource name"`
	Include   string `json:"include,omitempty" jsonschema:"optional sidecar data after narrowing to this object: events, metrics. Separate from context. For logs use get_pod_logs / get_workload_logs (container, previous, since, grep) or diagnose for the full workload bundle."`
	Context   string `json:"context,omitempty" jsonschema:"resourceContext tier: 'basic' (default; attaches managedBy / exposes / selectedBy / uses / runsOn / issueSummary / auditSummary rollups) or 'none' (bare minified resource). For full diagnostic tier with logs + events bundled, use the diagnose tool instead."`
}

type topologyInput struct {
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to a specific namespace for a multi-service incident map; recommended unless you need cross-namespace topology"`
	View      string `json:"view,omitempty" jsonschema:"view mode: traffic for service routing/connectivity or resources for ownership hierarchy"`
	Format    string `json:"format,omitempty" jsonschema:"output format: graph (default, full node/edge data) or summary (text description of resource chains)"`
}

type eventsInput struct {
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to a specific namespace"`
	Limit     int    `json:"limit,omitempty" jsonschema:"max 100, default 20"`
	Kind      string `json:"kind,omitempty" jsonschema:"filter to events involving this resource kind (e.g. Pod, Deployment)"`
	Name      string `json:"name,omitempty" jsonschema:"filter to events involving this resource name"`
}

type getChangesInput struct {
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to a specific namespace"`
	Kind      string `json:"kind,omitempty" jsonschema:"filter to a resource kind (e.g. Deployment, Pod)"`
	Name      string `json:"name,omitempty" jsonschema:"filter to a specific resource name"`
	Since     string `json:"since,omitempty" jsonschema:"duration to look back, e.g. 1h, 30m, 24h (default 1h)"`
	Limit     int    `json:"limit,omitempty" jsonschema:"max changes to return (default 20, max 50)"`
}

type podLogsInput struct {
	Namespace string `json:"namespace" jsonschema:"pod namespace"`
	Name      string `json:"name" jsonschema:"pod name"`
	Container string `json:"container,omitempty" jsonschema:"container name, defaults to first container"`
	TailLines int    `json:"tail_lines,omitempty" jsonschema:"number of lines to fetch from the end (default 200)"`
	Grep      string `json:"grep,omitempty" jsonschema:"optional regular expression to keep matching log lines before diagnostic filtering, like kubectl logs | grep PATTERN"`
	Since     string `json:"since,omitempty" jsonschema:"only return logs newer than this duration (e.g. 30s, 10m, 1h), like kubectl logs --since"`
	Previous  bool   `json:"previous,omitempty" jsonschema:"return logs from the previous terminated container instance (e.g. for CrashLoopBackOff diagnosis), like kubectl logs -p"`
}

type searchInput struct {
	Query   string `json:"query" jsonschema:"search query for unknown resources or broad content scans. Free tokens AND'd. Matches identity plus searchable object content. Examples: adServiceFailure, kind:NetworkChaos delay, kind:ConfigMap flagd, image:flagd. Modifiers: kind:Pod, kind:NetworkChaos, ns:foo, label:k=v, image:redis"`
	Limit   int    `json:"limit,omitempty" jsonschema:"max hits returned (default 50, max 500)"`
	Include string `json:"include,omitempty" jsonschema:"per-hit detail: summary (default), raw, or none"`
	Filter  string `json:"filter,omitempty" jsonschema:"optional CEL boolean expression run against each candidate K8s object. Bindings: kind, apiVersion, metadata, spec, status, labels, annotations. Use has(x.y) before optional fields. Examples: 'kind == \"Pod\" && status.phase == \"Failed\"', 'labels[\"app\"] == \"cart\"', 'has(status.readyReplicas) && status.readyReplicas == 0'"`
	Context string `json:"context,omitempty" jsonschema:"per-hit context: default attaches summaryContext (managedBy + health + issueCount) for suspect ranking; 'none' returns bare hits"`
}

type issuesInput struct {
	Namespace string `json:"namespace,omitempty" jsonschema:"filter to one namespace"`
	Severity  string `json:"severity,omitempty" jsonschema:"comma-separated: critical,warning"`
	Kind      string `json:"kind,omitempty" jsonschema:"comma-separated kind filter (e.g. Deployment,Pod)"`
	Limit     int    `json:"limit,omitempty" jsonschema:"max issues returned (default 200, max 1000)"`
	Filter    string `json:"filter,omitempty" jsonschema:"optional CEL boolean expression run against each composed Issue. Bindings: severity (critical|warning), category (e.g. crashloop, image_pull_failed, missing_config_ref, gitops_sync_failed), category_group (startup|runtime|scheduling|configuration|networking|storage|scaling|security|control_plane), source (problem|missing_ref|scheduling|condition), kind, group, ns (the namespace — use 'ns', not 'namespace' which is a CEL reserved word), name, reason, message, count (int, the affected-resource fan-out), grouping_scope (workload|service|node|…), restart_count (int), last_terminated_reason, first_seen + last_seen (unix seconds — prefer first_seen for onset/age; last_seen churns to compose-time). For cross-cluster scoping use clusters= (not a CEL predicate). Examples: 'severity == \"critical\" && count > 5', 'category_group == \"startup\"', 'restart_count > 10', 'first_seen < timestamp(\"2026-05-01T00:00:00Z\").getSeconds()'"`
}

// Tool handlers

func handleGetDashboard(ctx context.Context, req *mcp.CallToolRequest, input dashboardInput) (*mcp.CallToolResult, any, error) {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	// Dashboard summary doesn't currently take a multi-namespace input, so we
	// gate on the requested namespace, or on cluster-wide namespaced access
	// when the caller doesn't pin a namespace. Cluster-scoped dashboard fields
	// are still gated per kind below.
	if input.Namespace != "" {
		if !checkNamespaceAccess(ctx, input.Namespace) {
			return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", input.Namespace)
		}
	} else if filterNamespacesForUser(ctx, nil) != nil {
		return nil, nil, fmt.Errorf("forbidden: dashboard summary requires cluster-wide namespace access — pass a specific namespace you have access to")
	}

	dashboard := buildDashboard(ctx, cache, input.Namespace, canReadClusterScopedKind(ctx, "nodes", "", "list"), canReadClusterScopedKind(ctx, "namespaces", "", "list"))
	// Surface the truncation explicitly so the agent doesn't read the capped
	// problem list as the full set. The wire shape already carries
	// TotalProblems + ProblemsBySeverity for richer counts; this hint is
	// the steering signal that says "you're seeing a subset, narrow with
	// namespace= for full coverage."
	if dashboard.TotalProblems > len(dashboard.Problems) {
		return toJSONResult(getDashboardResponseMCP{
			mcpDashboard: dashboard,
			NarrowHint: fmt.Sprintf(
				"showing %d of %d problems (sorted by severity then recency) — narrow with namespace= for the full list at that scope",
				len(dashboard.Problems), dashboard.TotalProblems,
			),
		})
	}
	return toJSONResult(dashboard)
}

// getDashboardResponseMCP wraps mcpDashboard so the JSON shape stays
// identical except for the added narrowHint field when the dashboard cap
// truncated the problem list. Same pattern as get_changes / get_events.
type getDashboardResponseMCP struct {
	mcpDashboard
	NarrowHint string `json:"narrowHint,omitempty"`
}

func handleTopResources(ctx context.Context, _ *mcp.CallToolRequest, input topResourcesInput) (*mcp.CallToolResult, any, error) {
	opts := k8s.NormalizeTopMetricsOptions(k8s.TopMetricsOptions{
		Kind:      input.Kind,
		Namespace: input.Namespace,
		Sort:      input.Sort,
		Limit:     input.Limit,
	})
	switch opts.Kind {
	case k8s.TopMetricsKindPods, k8s.TopMetricsKindWorkloads:
	case k8s.TopMetricsKindNodes:
		if !canReadClusterScopedKind(ctx, "nodes", "", "list") {
			return toJSONResult(k8s.TopMetricsResponse{
				Kind:   opts.Kind,
				Sort:   opts.Sort,
				Reason: "no access to nodes (cluster-scoped resource requires explicit RBAC)",
			})
		}
	default:
		return nil, nil, fmt.Errorf("unknown kind %q (want pods, workloads, or nodes)", input.Kind)
	}

	if opts.Kind != k8s.TopMetricsKindNodes {
		if opts.Namespace != "" {
			if !checkNamespaceAccess(ctx, opts.Namespace) {
				return toJSONResult(k8s.TopMetricsResponse{
					Kind:      opts.Kind,
					Sort:      opts.Sort,
					Namespace: opts.Namespace,
					Reason:    "no access to namespace",
				})
			}
		} else if filterNamespacesForUser(ctx, nil) != nil {
			return nil, nil, fmt.Errorf("namespace is required when access is namespace-restricted")
		}
	}

	resp := k8s.BuildTopMetrics(opts)
	// Two narrowHint conditions can fire together:
	//   1. results at/above the limit suggest the cap was reached
	//   2. some resources were skipped because metrics-server hasn't
	//      scraped them yet (new pods, Pending, scrape gap) — useful
	//      to surface so the agent knows "no top consumers" isn't
	//      necessarily "no data" when SkippedNoMetrics is high.
	itemCount := len(resp.Items)
	if itemCount == 0 {
		itemCount = len(resp.Workloads)
	}
	var hints []string
	if opts.Limit > 0 && itemCount >= opts.Limit {
		hints = append(hints, fmt.Sprintf(
			"returned %d rows at limit — narrow with namespace= (for pods/workloads), tighten sort by switching cpu/memory, or raise limit (cap 100)",
			itemCount,
		))
	}
	if resp.SkippedNoMetrics > 0 {
		hints = append(hints, fmt.Sprintf(
			"%d %s skipped (no metrics samples yet — typically new/Pending pods or a metrics-server scrape gap)",
			resp.SkippedNoMetrics, opts.Kind,
		))
	}
	if len(hints) > 0 {
		return toJSONResult(topResourcesResponseMCP{
			TopMetricsResponse: resp,
			NarrowHint:         strings.Join(hints, "; "),
		})
	}
	return toJSONResult(resp)
}

// topResourcesResponseMCP embeds TopMetricsResponse so the JSON shape stays
// identical except for the added narrowHint field.
type topResourcesResponseMCP struct {
	k8s.TopMetricsResponse
	NarrowHint string `json:"narrowHint,omitempty"`
}

func handleListResources(ctx context.Context, req *mcp.CallToolRequest, input listResourcesInput) (*mcp.CallToolResult, any, error) {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	kind := strings.ToLower(input.Kind)
	group := input.Group
	var requested []string
	if input.Namespace != "" {
		requested = []string{input.Namespace}
	}

	// Cluster-scoped kinds (static cluster-only list + cluster-scoped CRDs
	// from discovery) are gated per-kind via SAR. Run BEFORE the namespace
	// filter check so users with explicit cluster-scoped RBAC but no
	// namespace access can still read those resources.
	//
	// "namespaces" is cluster-scoped at the K8s API. Full Namespace objects
	// require explicit list-namespaces SAR. Read access to resources IN a
	// namespace (list pods etc.) does not imply read access to the Namespace
	// resource itself. Restricted users use the dedicated list_namespaces
	// MCP tool, which serves a synthesized {name, status} view.
	isNamespacesKind := kind == "namespaces" || kind == "namespace"
	clusterScoped, _, _ := k8s.ClassifyKindScope(kind, group)
	if clusterScoped && !isNamespacesKind {
		if !canReadClusterScopedKind(ctx, kind, group, "list") {
			return toJSONResult([]any{})
		}
	}
	if isNamespacesKind && !canReadClusterScopedKind(ctx, "namespaces", "", "list") {
		return toJSONResult([]any{})
	}

	// Filter the requested namespaces against the user's RBAC. Returns:
	//   nil       — auth off or cluster-wide namespaced access: pass through
	//   []string{} — user has no namespace access
	//   [...]     — restrict reads to these namespaces
	allowed := filterNamespacesForUser(ctx, requested)
	if !clusterScoped && allowed != nil && len(allowed) == 0 {
		return toJSONResult([]any{})
	}

	// Per-kind RBAC inside a namespace for Secrets. The cache reads as the SA,
	// and the chart can grant the SA cluster-wide secrets (rbac.secrets,
	// rbac.helm, auth.mode != "none", or cloud.enabled — Helm release
	// visibility), so per-user RBAC must gate the read. canReadInNamespace
	// and filterNamespacesByCanRead pass through when no user is on context
	// (auth-mode=none — SA RBAC at the cache layer is the only gate). Other
	// namespaced kinds are deferred.
	if kind == "secrets" || kind == "secret" {
		if allowed == nil {
			if !canReadInNamespace(ctx, "", "secrets", "", "list") {
				return toJSONResult([]any{})
			}
		} else {
			allowed = filterNamespacesByCanRead(ctx, "", "secrets", "list", allowed)
			if len(allowed) == 0 {
				return toJSONResult([]any{})
			}
		}
	}

	// For cluster-scoped reads, force a cluster-wide list (don't iterate
	// per allowed namespace — cluster-scoped resources don't live there).
	listScope := allowed
	if clusterScoped {
		listScope = nil
	}

	// When a group is specified, route to the dynamic cache so CRDs whose
	// plural collides with a core kind (e.g. Knative serving.knative.dev/Service
	// vs corev1 ""/Service) reach the right resource. FetchResourceList is
	// group-blind — it would silently return the core typed list, dropping the
	// caller's group filter. But a built-in addressed by its own group
	// (deployments?group=apps) is a typed lookup — the dynamic cache has no
	// informer for built-ins — so only true CRDs / plural collisions go dynamic.
	// Mirrors the group-aware dispatch in the REST handlers.
	if group != "" && !k8s.TypedKindOwnsGroup(kind, group) {
		return listDynamicResources(ctx, cache, kind, group, listScope, clusterScoped, input.Context)
	}

	// Try typed cache first (group=="" → core/built-in lookup).
	objs, err := k8s.FetchResourceList(cache, kind, listScope)
	if err == k8s.ErrUnknownKind {
		// Fall through to dynamic cache for CRDs. ClassifyKindScope/SAR
		// above already authorized cluster-scoped CRDs; namespaced CRDs
		// are scoped via listScope. Pass clusterScoped through so the
		// issue index drops the namespace filter for cluster-scoped
		// CRDs — those issues live at namespace="" and would otherwise
		// be filtered out by the user's namespaced-access set.
		return listDynamicResources(ctx, cache, kind, group, listScope, clusterScoped, input.Context)
	}
	if err != nil {
		return nil, nil, fmt.Errorf("failed to list %s: %w", kind, err)
	}

	// Cluster-scoped kinds (FetchResourceList ignores `allowed` for them) and
	// "namespaces" need post-filtering for namespace-restricted users.
	// Skip post-filter when the user is cluster-scoped-authorized for a
	// non-namespaces kind (clusterScoped && !allowed-restricted) — they
	// should see the full cluster-scoped result. For "namespaces" the
	// per-user filter ALWAYS applies even though it's cluster-scoped at
	// the API.
	if allowed != nil && (!clusterScoped || isNamespacesKind) {
		objs = retainAllowedObjects(objs, allowed, kind)
	}

	results, err := aicontext.MinifyList(objs, aicontext.LevelSummary)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to minify: %w", err)
	}

	// Attach summaryContext per row unless caller opted out. Issue index
	// is scoped to the listed kind so the per-row count reflects only
	// the resource being listed (not unrelated noise in the namespace).
	//
	// Cluster-scoped kinds (Node, PV, cluster-scoped CRDs) emit issues
	// at namespace="" — scoping the index to the user's namespaced
	// access set would silently zero issueCount on every row. The
	// cluster-scoped RBAC gate above (canReadClusterScopedKind) already
	// authorized the read, so we pass nil here to compose cluster-wide.
	if input.Context != "none" {
		idxNamespaces := allowed
		if clusterScoped {
			idxNamespaces = nil
		}
		if builder := newResourceSummaryContextBuilder(idxNamespaces); builder != nil {
			summarycontext.AttachToTypedList(results, objs, builder)
		}
	}

	return toJSONResult(results)
}

func listDynamicResources(ctx context.Context, cache *k8s.ResourceCache, kind, group string, namespaces []string, clusterScoped bool, contextMode string) (*mcp.CallToolResult, any, error) {
	var rawItems []*unstructured.Unstructured
	if len(namespaces) > 0 {
		for _, ns := range namespaces {
			items, err := cache.ListDynamicWithGroup(ctx, kind, ns, group)
			if err != nil {
				return nil, nil, fmt.Errorf("failed to list %s: %w", kind, err)
			}
			rawItems = append(rawItems, items...)
		}
	} else {
		items, err := cache.ListDynamicWithGroup(ctx, kind, "", group)
		if err != nil {
			return nil, nil, fmt.Errorf("failed to list %s: %w", kind, err)
		}
		rawItems = items
	}

	allItems := make([]any, 0, len(rawItems))
	for _, item := range rawItems {
		allItems = append(allItems, aicontext.MinifyUnstructured(item, aicontext.LevelSummary))
	}

	if contextMode != "none" {
		// Cluster-scoped CRDs emit issues at namespace="" — passing a
		// namespace-restricted slice would silently zero issueCount on
		// every row. Caller has already gated cluster-scoped reads via
		// canReadClusterScopedKind, so cluster-wide compose is safe.
		idxNamespaces := namespaces
		if clusterScoped {
			idxNamespaces = nil
		}
		if builder := newResourceSummaryContextBuilder(idxNamespaces); builder != nil {
			summarycontext.AttachToUnstructuredList(allItems, rawItems, builder)
		}
	}

	return toJSONResult(allItems)
}

func handleGetResource(ctx context.Context, req *mcp.CallToolRequest, input getResourceInput) (*mcp.CallToolResult, any, error) {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	kind := strings.ToLower(input.Kind)
	group := input.Group
	namespace := input.Namespace
	name := input.Name

	// Cluster-scoped GETs are gated per-kind via SAR — that catches both
	// the static cluster-only list and dynamic cluster-scoped CRDs (via
	// discovery). Run BEFORE the namespace check so users with cluster-
	// scoped RBAC but no namespace access can still read those resources.
	//
	// "namespaces" is cluster-scoped at the K8s API but exposed as a
	// per-user filtered list — gate via the user's namespace access for
	// the requested name, not via cluster-scoped SAR.
	isNamespacesKind := kind == "namespaces" || kind == "namespace"
	clusterScoped, _, _ := k8s.ClassifyKindScope(kind, group)
	if isNamespacesKind {
		// Full Namespace object access requires explicit get-namespaces SAR.
		// Read access to resources IN a namespace does not imply read access
		// to the Namespace object itself.
		if !canReadClusterScopedKind(ctx, "namespaces", "", "get") {
			return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", name)
		}
	} else if clusterScoped {
		if !canReadClusterScopedKind(ctx, kind, group, "get") {
			return nil, nil, fmt.Errorf("forbidden: %s requires explicit cluster-scoped RBAC", kind)
		}
	} else if !checkNamespaceAccess(ctx, namespace) {
		return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", namespace)
	} else if kind == "secrets" || kind == "secret" {
		// Per-kind RBAC inside the namespace — the chart can grant the SA
		// cluster-wide secrets (Helm release visibility), so namespace-list
		// discovery is not a sufficient gate. The list handler has the
		// matching list-SAR.
		if !canReadInNamespace(ctx, "", "secrets", namespace, "get") {
			return nil, nil, fmt.Errorf("forbidden: no access to secrets in namespace %q", namespace)
		}
	}

	// Fetch the resource. When group is set, skip the typed cache and route
	// directly to the dynamic cache: typed FetchResource is group-blind
	// (e.g. for kind=services it returns the core Service regardless of any
	// group qualifier), so a group-qualified call like serving.knative.dev/
	// Service would silently leak the wrong object.
	var resourceData any
	var rawObj runtime.Object
	if group != "" && !k8s.TypedKindOwnsGroup(kind, group) {
		u, dynErr := cache.GetDynamicWithGroup(ctx, kind, namespace, name, group)
		if dynErr != nil {
			return nil, nil, fmt.Errorf("resource not found: %w", dynErr)
		}
		resourceData = aicontext.MinifyUnstructured(u, aicontext.LevelDetail)
		rawObj = u
	} else {
		obj, err := k8s.FetchResource(cache, kind, namespace, name)
		if err == k8s.ErrUnknownKind {
			u, dynErr := cache.GetDynamicWithGroup(ctx, kind, namespace, name, group)
			if dynErr != nil {
				return nil, nil, fmt.Errorf("resource not found: %w", dynErr)
			}
			resourceData = aicontext.MinifyUnstructured(u, aicontext.LevelDetail)
			rawObj = u
		} else if err != nil {
			return nil, nil, fmt.Errorf("resource not found: %w", err)
		} else {
			k8s.SetTypeMeta(obj)
			minified, minErr := aicontext.Minify(obj, aicontext.LevelDetail)
			if minErr != nil {
				return nil, nil, fmt.Errorf("failed to minify: %w", minErr)
			}
			resourceData = minified
			rawObj = obj
		}
	}

	// Build the resourceContext sidecar unless the caller opted out. Basic
	// tier is the default: cheap managedBy / exposes / selectedBy /
	// runsOn / uses / issueSummary / auditSummary / policySummary. Pass
	// context=none for a bare minified resource (bulk scans, raw jq work).
	contextMode := strings.ToLower(strings.TrimSpace(input.Context))
	includes := parseIncludes(input.Include)
	skipContext := contextMode == "none"

	var resourceCtx *resourcecontext.ResourceContext
	var warnings []string
	if !skipContext {
		resourceCtx = buildMCPResourceContext(ctx, rawObj, kind, namespace, name, resourcecontext.TierBasic)

		// State-derived advisory warnings (deletionTimestamp, external manager,
		// terminating namespace, workload health-condition history, PVC stuck
		// Pending). Cheap — operates on the object we already fetched. Skipped
		// for context=none so that mode stays a bare object for raw jq work.
		warnings = k8score.EnrichRuntimeObjectWarnings(rawObj)
	}

	// Three shapes:
	//   - bare resource: no includes, context=none
	//   - resource + resourceContext: no includes, default context
	//   - resource + resourceContext + extras: includes set
	if len(includes) == 0 && resourceCtx == nil && len(warnings) == 0 {
		return toJSONResult(resourceData)
	}

	result := map[string]any{"resource": resourceData}
	if resourceCtx != nil {
		result["resourceContext"] = resourceCtx
	}
	if len(warnings) > 0 {
		result["warnings"] = warnings
	}
	if len(includes) > 0 {
		attachResourceExtras(ctx, cache, result, includes, kind, namespace, name)
	}
	return toJSONResult(result)
}

// buildMCPResourceContext assembles the resourceContext sidecar for MCP
// get_resource. Mirrors the REST handler's buildAIResourceContext: pre-
// computes IssueSummary + AuditSummary in the caller, threads the
// PolicyReport index when Kyverno is installed, hands a request-scoped
// RBAC checker to Build for per-ref gating, and lets Build's own
// fallback resolve Relationships via topology.GetRelationshipsWithObject
// (which applies KindForGVK so cross-group CRDs map to the right
// topology node).
func buildMCPResourceContext(ctx context.Context, obj runtime.Object, kind, namespace, name string, tier resourcecontext.ContextTier) *resourcecontext.ResourceContext {
	if obj == nil {
		return nil
	}
	cache := k8s.GetResourceCache()

	gvk := obj.GetObjectKind().GroupVersionKind()
	canonicalKind := gvk.Kind
	if canonicalKind == "" {
		canonicalKind = kind
	}
	canonicalGroup := gvk.Group

	issueSum := computeMCPIssueSummary(cache, canonicalGroup, canonicalKind, namespace, name)
	auditSum := computeMCPAuditSummary(cache, canonicalGroup, canonicalKind, namespace, name)

	opts := resourcecontext.Options{
		Tier:            tier,
		AccessChecker:   newMCPRequestScopedChecker(ctx),
		IssueSummary:    issueSum,
		AuditSummary:    auditSum,
		AppReferences:   resourcecontextrefs.AppReferencesFromEnvServiceChecks(k8s.FindEnvServiceRefChecksForObject(cache, obj)),
		ServiceBackends: mcpServiceBackendLookup{cache: cache},
	}

	if idx := k8s.GetPolicyReportIndex(); idx != nil {
		opts.PolicyReports = mcpPolicyReportLookupAdapter{idx: idx}
	}

	if topo, prov, dyn, ok := mcpTopologyForContext(namespace); ok {
		opts.Topology = topo
		opts.Provider = prov
		opts.DynamicProv = dyn
	}

	return resourcecontext.Build(ctx, obj, opts)
}

// attachResourceExtras populates optional extras (events, metrics, logs) on
// the result map based on the includes set. relationship synthesis moved to
// resourceContext via Build and is no longer routed through this function.
func attachResourceExtras(ctx context.Context, cache *k8s.ResourceCache, result map[string]any, includes map[string]bool, kind, namespace, name string) {
	if includes["events"] {
		if eventLister := cache.Events(); eventLister != nil {
			var events []*corev1.Event
			var listErr error
			if namespace != "" {
				events, listErr = eventLister.Events(namespace).List(labels.Everything())
			} else {
				events, listErr = eventLister.List(labels.Everything())
			}
			if listErr != nil {
				log.Printf("[mcp] Failed to list events for %s/%s/%s: %v", kind, namespace, name, listErr)
				result["eventsError"] = listErr.Error()
			} else {
				// Sidecar include — controller-level events only. Pod-level
				// events on a workload's pods (CrashLoopBackOff, etc.) require
				// resolving the pod set; that's the diagnose tool's job, not
				// this sidecar's. nil podNames intentionally restricts to
				// InvolvedObject == this kind+name.
				matched := filterEventsByInvolvedObject(events, normalizeDisplayKind(kind), name, nil)
				if len(matched) > 0 {
					deduplicated := aicontext.DeduplicateEvents(matched)
					if len(deduplicated) > 10 {
						deduplicated = deduplicated[:10]
					}
					result["events"] = deduplicated
				}
			}
		} else {
			result["eventsError"] = "events lister unavailable (insufficient permissions or cache cold)"
		}
	}

	if includes["metrics"] {
		if isPodKind(kind) {
			if metrics, err := k8s.GetPodMetrics(ctx, namespace, name); err == nil {
				result["metrics"] = metrics
			} else {
				log.Printf("[mcp] Failed to get pod metrics for %s/%s: %v", namespace, name, err)
				result["metricsError"] = err.Error()
			}
		}
	}

	// include=logs was dropped from get_resource (it was Pod-only and lacked
	// container/previous/since/grep). Signal it explicitly rather than silently
	// no-op'ing, so a client on a stale schema is redirected instead of seeing
	// an empty success.
	if includes["logs"] {
		result["logsError"] = "include=logs is no longer supported here; use get_pod_logs or get_workload_logs (container, previous, since, grep) or diagnose for the full workload bundle"
	}

	// Any other token (typo, or a value like "relationships" that moved to
	// resourceContext) is silently dropped by the branches above. Surface it
	// so the caller learns the token did nothing rather than seeing an empty
	// success — the same reason logs gets an explicit error.
	var unknown []string
	for tok := range includes {
		switch tok {
		case "events", "metrics", "logs":
		default:
			unknown = append(unknown, tok)
		}
	}
	if len(unknown) > 0 {
		sort.Strings(unknown)
		result["includeError"] = fmt.Sprintf("unknown include value(s): %s (valid: events, metrics)", strings.Join(unknown, ", "))
	}

}

// normalizeDisplayKind converts a lowercase kind to its display form for matching
// against InvolvedObject.Kind and topology node kinds (e.g. "pod" → "Pod").
func normalizeDisplayKind(kind string) string {
	displayKinds := map[string]string{
		"pod": "Pod", "pods": "Pod",
		"service": "Service", "services": "Service",
		"deployment": "Deployment", "deployments": "Deployment",
		"daemonset": "DaemonSet", "daemonsets": "DaemonSet",
		"statefulset": "StatefulSet", "statefulsets": "StatefulSet",
		"replicaset": "ReplicaSet", "replicasets": "ReplicaSet",
		"ingress": "Ingress", "ingresses": "Ingress",
		"configmap": "ConfigMap", "configmaps": "ConfigMap",
		"secret": "Secret", "secrets": "Secret",
		"job": "Job", "jobs": "Job",
		"cronjob": "CronJob", "cronjobs": "CronJob",
		"node": "Node", "nodes": "Node",
		"namespace": "Namespace", "namespaces": "Namespace",
		"persistentvolumeclaim": "PersistentVolumeClaim", "persistentvolumeclaims": "PersistentVolumeClaim",
		"persistentvolume": "PersistentVolume", "persistentvolumes": "PersistentVolume",
		"storageclass": "StorageClass", "storageclasses": "StorageClass",
		"horizontalpodautoscaler": "HorizontalPodAutoscaler", "horizontalpodautoscalers": "HorizontalPodAutoscaler",
		"poddisruptionbudget": "PodDisruptionBudget", "poddisruptionbudgets": "PodDisruptionBudget",
		"role": "Role", "roles": "Role",
		"clusterrole": "ClusterRole", "clusterroles": "ClusterRole",
		"rolebinding": "RoleBinding", "rolebindings": "RoleBinding",
		"clusterrolebinding": "ClusterRoleBinding", "clusterrolebindings": "ClusterRoleBinding",
		"serviceaccount": "ServiceAccount", "serviceaccounts": "ServiceAccount",
		"ingressclass": "IngressClass", "ingressclasses": "IngressClass",
		"priorityclass": "PriorityClass", "priorityclasses": "PriorityClass",
		"runtimeclass": "RuntimeClass", "runtimeclasses": "RuntimeClass",
		"lease": "Lease", "leases": "Lease",
		"mutatingwebhookconfiguration": "MutatingWebhookConfiguration", "mutatingwebhookconfigurations": "MutatingWebhookConfiguration",
		"validatingwebhookconfiguration": "ValidatingWebhookConfiguration", "validatingwebhookconfigurations": "ValidatingWebhookConfiguration",
	}
	if display, ok := displayKinds[kind]; ok {
		return display
	}
	return kind
}

func isPodKind(kind string) bool {
	return kind == "pod" || kind == "pods"
}

func handleGetChanges(ctx context.Context, req *mcp.CallToolRequest, input getChangesInput) (*mcp.CallToolResult, any, error) {
	store := timeline.GetStore()
	if store == nil {
		return nil, nil, fmt.Errorf("timeline store not initialized")
	}

	since := 1 * time.Hour
	if input.Since != "" {
		parsed, err := time.ParseDuration(input.Since)
		if err != nil {
			return nil, nil, fmt.Errorf("invalid duration %q: %w", input.Since, err)
		}
		if parsed <= 0 {
			return nil, nil, fmt.Errorf("duration must be positive, got %q", input.Since)
		}
		since = parsed
	}

	limit := 20
	if input.Limit > 0 {
		limit = min(input.Limit, 50)
	}

	var requested []string
	if input.Namespace != "" {
		requested = []string{input.Namespace}
	}
	allowed := filterNamespacesForUser(ctx, requested)
	if allowed != nil && len(allowed) == 0 {
		// Wrap the empty result so capped + uncapped + denied agree on wire shape.
		return toJSONResult(getChangesResponseMCP{Changes: []mcpChange{}})
	}

	queryOpts := timeline.QueryOptions{
		Since:        time.Now().Add(-since),
		FilterPreset: "default",
	}
	switch {
	case input.Namespace != "":
		queryOpts.Namespaces = []string{input.Namespace}
	case allowed != nil:
		queryOpts.Namespaces = allowed
	}
	if input.Kind != "" {
		queryOpts.Kinds = []string{input.Kind}
	}
	// When name filtering is needed client-side, fetch more to compensate for post-filter reduction
	if input.Name != "" {
		queryOpts.Limit = limit * 10
	} else {
		queryOpts.Limit = limit
	}

	events, err := store.Query(ctx, queryOpts)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to query timeline: %w", err)
	}
	// Track whether the upstream store query hit its cap — if so there may
	// be more changes outside the window even after client-side filtering.
	upstreamCapped := len(events) >= queryOpts.Limit

	// Client-side name filter (QueryOptions doesn't support name filtering)
	if input.Name != "" {
		filtered := events[:0]
		for _, e := range events {
			if e.Name == input.Name {
				filtered = append(filtered, e)
			}
		}
		events = filtered
		if len(events) > limit {
			events = events[:limit]
		}
	}

	changes := make([]mcpChange, 0, len(events))
	for _, e := range events {
		summary := ""
		if e.Diff != nil && e.Diff.Summary != "" {
			summary = e.Diff.Summary
		} else if e.Message != "" {
			summary = k8s.Truncate(e.Message, 100)
		}
		changes = append(changes, mcpChange{
			Kind:       e.Kind,
			Namespace:  e.Namespace,
			Name:       e.Name,
			ChangeType: string(e.EventType),
			Summary:    summary,
			Timestamp:  e.Timestamp.Format(time.RFC3339),
		})
	}

	// Always wrap so capped + uncapped agree on wire shape
	// ({changes: [...], narrowHint?: "..."}).
	resp := getChangesResponseMCP{Changes: changes}
	if upstreamCapped {
		// queryOpts.Limit is the actual store-side cap (10x when a name
		// filter triggered fetch-extra). Citing `limit` would mislead
		// the agent on the name-filter path where the real cap is higher.
		resp.NarrowHint = fmt.Sprintf(
			"upstream feed capped at %d entries — narrow with namespace=, kind=, name=, shorten since= (e.g. 15m), or raise limit (cap 50)",
			queryOpts.Limit,
		)
	}
	return toJSONResult(resp)
}

type getChangesResponseMCP struct {
	Changes    []mcpChange `json:"changes"`
	NarrowHint string      `json:"narrowHint,omitempty"`
}

func handleGetTopology(ctx context.Context, req *mcp.CallToolRequest, input topologyInput) (*mcp.CallToolResult, any, error) {
	// Topology weaves cluster-scoped (Nodes, IngressClasses, NetworkPolicies)
	// with namespaced resources, so we gate on access to whatever the caller
	// requested. Namespace-restricted users must scope to a namespace they
	// can see; otherwise we refuse rather than build a partial graph.
	var requested []string
	if input.Namespace != "" {
		requested = []string{input.Namespace}
	}
	allowed := filterNamespacesForUser(ctx, requested)
	if allowed != nil && len(allowed) == 0 {
		if input.Namespace != "" {
			return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", input.Namespace)
		}
		return nil, nil, fmt.Errorf("forbidden: topology requires either a namespace you can access or cluster-wide access")
	}

	opts := topology.DefaultBuildOptions()
	if input.Namespace != "" {
		opts.Namespaces = []string{input.Namespace}
	} else if allowed != nil {
		// Namespace-restricted user with no explicit pick — scope to their allowed set.
		opts.Namespaces = allowed
	}
	if input.View == "traffic" {
		opts.ViewMode = topology.ViewModeTraffic
	}

	builder := topology.NewBuilder(k8s.NewTopologyResourceProvider(k8s.GetResourceCache())).WithDynamic(k8s.NewTopologyDynamicProvider(k8s.GetDynamicResourceCache(), k8s.GetResourceDiscovery()))
	topo, err := builder.Build(opts)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to build topology: %w", err)
	}

	// Topology pulls cluster-scoped resources (Nodes, Karpenter NodePool /
	// NodeClaim, GatewayClass, PV, StorageClass, …) regardless of the
	// namespace scope. Strip the kinds the user can't list so a namespace-
	// restricted user can't enumerate cluster infrastructure via the
	// topology tool. Cluster-wide pod access does NOT imply cluster-scoped
	// reads; per-kind SAR is the gate.
	if deny := deniedClusterScopedTopoKinds(ctx); len(deny) > 0 {
		topo.StripNodeKinds(deny)
	}

	if strings.ToLower(input.Format) == "summary" {
		return toJSONResult(buildTopologySummary(topo))
	}

	return toJSONResult(topo)
}

// deniedClusterScopedTopoKinds returns the set of cluster-scoped topology
// NodeKinds the calling user cannot list. Walks topology.ClusterScopedKinds
// (centralized table — see pkg/topology/cluster_scoped_kinds.go). Reuses
// canReadClusterScopedKind's per-user canI cache so subsequent topology
// calls within the same TTL don't re-SAR.
func deniedClusterScopedTopoKinds(ctx context.Context) map[topology.NodeKind]bool {
	deny := make(map[topology.NodeKind]bool)
	for _, ck := range topology.ClusterScopedKinds {
		if !canReadClusterScopedKind(ctx, ck.Resource, ck.Group, "list") {
			deny[ck.Kind] = true
		}
	}
	return deny
}

// topologySummary is an LLM-friendly text representation of the topology.
type topologySummary struct {
	Namespaces []nsSummary   `json:"namespaces"`
	Problems   []string      `json:"problems,omitempty"`
	Stats      topologyStats `json:"stats"`
}

type nsSummary struct {
	Namespace string   `json:"namespace"`
	Chains    []string `json:"chains"`
}

type topologyStats struct {
	Nodes int `json:"nodes"`
	Edges int `json:"edges"`
}

func buildTopologySummary(topo *topology.Topology) topologySummary {
	// Build lookup maps
	nodeByID := make(map[string]*topology.Node, len(topo.Nodes))
	for i := range topo.Nodes {
		nodeByID[topo.Nodes[i].ID] = &topo.Nodes[i]
	}

	// Build adjacency: source → targets
	children := make(map[string][]string)
	parents := make(map[string][]string)
	for _, e := range topo.Edges {
		children[e.Source] = append(children[e.Source], e.Target)
		parents[e.Target] = append(parents[e.Target], e.Source)
	}

	// Find root nodes (no incoming edges)
	roots := make(map[string]bool)
	for _, n := range topo.Nodes {
		if len(parents[n.ID]) == 0 {
			roots[n.ID] = true
		}
	}

	// Walk chains from roots, group by namespace
	visited := make(map[string]bool)
	nsChains := make(map[string][]string)
	var problems []string

	for _, n := range topo.Nodes {
		if !roots[n.ID] || visited[n.ID] {
			continue
		}
		chain := walkChain(n.ID, nodeByID, children, visited, 0)
		if chain == "" {
			continue
		}
		ns := nodeNamespace(nodeByID[n.ID])
		nsChains[ns] = append(nsChains[ns], chain)
	}

	// Also walk any unvisited nodes (cycles or isolated nodes)
	for _, n := range topo.Nodes {
		if visited[n.ID] {
			continue
		}
		desc := describeNode(&n)
		ns := nodeNamespace(&n)
		nsChains[ns] = append(nsChains[ns], desc)
		visited[n.ID] = true
	}

	// Detect problems
	for _, n := range topo.Nodes {
		if n.Status == topology.StatusUnhealthy || n.Status == topology.StatusDegraded {
			problems = append(problems, fmt.Sprintf("%s %s: %s", n.Kind, n.Name, n.Status))
		}
	}

	// Build sorted namespace list
	var namespaces []nsSummary
	sortedNs := make([]string, 0, len(nsChains))
	for ns := range nsChains {
		sortedNs = append(sortedNs, ns)
	}
	sort.Strings(sortedNs)
	for _, ns := range sortedNs {
		namespaces = append(namespaces, nsSummary{
			Namespace: ns,
			Chains:    nsChains[ns],
		})
	}

	return topologySummary{
		Namespaces: namespaces,
		Problems:   problems,
		Stats:      topologyStats{Nodes: len(topo.Nodes), Edges: len(topo.Edges)},
	}
}

// walkChain recursively describes a resource chain from a root node.
func walkChain(nodeID string, nodeByID map[string]*topology.Node, children map[string][]string, visited map[string]bool, depth int) string {
	if depth > 10 || visited[nodeID] {
		return ""
	}
	visited[nodeID] = true

	node, ok := nodeByID[nodeID]
	if !ok {
		return ""
	}

	desc := describeNode(node)
	kids := children[nodeID]
	if len(kids) == 0 {
		return desc
	}

	// For single-child chains, flatten into arrows
	if len(kids) == 1 {
		childDesc := walkChain(kids[0], nodeByID, children, visited, depth+1)
		if childDesc != "" {
			return desc + " → " + childDesc
		}
		return desc
	}

	// For multiple children, list them
	var childDescs []string
	for _, kid := range kids {
		childDesc := walkChain(kid, nodeByID, children, visited, depth+1)
		if childDesc != "" {
			childDescs = append(childDescs, childDesc)
		}
	}
	if len(childDescs) == 0 {
		return desc
	}
	if len(childDescs) == 1 {
		return desc + " → " + childDescs[0]
	}
	return desc + " → [" + strings.Join(childDescs, ", ") + "]"
}

func describeNode(n *topology.Node) string {
	desc := fmt.Sprintf("%s/%s", n.Kind, n.Name)

	// Add status annotation for unhealthy nodes
	if n.Status == topology.StatusUnhealthy {
		desc += " (unhealthy)"
	} else if n.Status == topology.StatusDegraded {
		desc += " (degraded)"
	}

	// Add useful data annotations
	if n.Data != nil {
		if ready, ok := n.Data["readyReplicas"]; ok {
			if desired, ok2 := n.Data["replicas"]; ok2 {
				desc += fmt.Sprintf(" (%v/%v ready)", ready, desired)
			}
		}
		if host, ok := n.Data["host"]; ok && host != "" {
			desc += fmt.Sprintf(" [%v]", host)
		}
	}

	return desc
}

func nodeNamespace(n *topology.Node) string {
	if n.Data != nil {
		if ns, ok := n.Data["namespace"].(string); ok && ns != "" {
			return ns
		}
	}
	return "(cluster)"
}

func handleGetEvents(ctx context.Context, req *mcp.CallToolRequest, input eventsInput) (*mcp.CallToolResult, any, error) {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	eventLister := cache.Events()
	if eventLister == nil {
		return nil, nil, fmt.Errorf("insufficient permissions to list events")
	}

	var requested []string
	if input.Namespace != "" {
		requested = []string{input.Namespace}
	}
	allowed := filterNamespacesForUser(ctx, requested)
	if allowed != nil && len(allowed) == 0 {
		// Wrap the empty result so capped + uncapped + denied agree on wire shape.
		return toJSONResult(getEventsResponseMCP{Events: []aicontext.DeduplicatedEvent{}})
	}

	var events []*corev1.Event
	var err error
	switch {
	case input.Namespace != "":
		events, err = eventLister.Events(input.Namespace).List(labels.Everything())
	case allowed != nil:
		// Namespace-restricted user, no explicit pick: aggregate across allowed.
		for _, ns := range allowed {
			items, listErr := eventLister.Events(ns).List(labels.Everything())
			if listErr != nil {
				err = listErr
				break
			}
			events = append(events, items...)
		}
	default:
		events, err = eventLister.List(labels.Everything())
	}
	if err != nil {
		return nil, nil, fmt.Errorf("failed to list events: %w", err)
	}

	// Filter by InvolvedObject kind/name if specified
	if input.Kind != "" || input.Name != "" {
		filtered := events[:0]
		for _, e := range events {
			if input.Kind != "" && !strings.EqualFold(e.InvolvedObject.Kind, input.Kind) {
				continue
			}
			if input.Name != "" && e.InvolvedObject.Name != input.Name {
				continue
			}
			filtered = append(filtered, e)
		}
		events = filtered
	}

	// Convert to non-pointer slice for DeduplicateEvents
	eventValues := make([]corev1.Event, len(events))
	for i, e := range events {
		eventValues[i] = *e
	}

	deduplicated := aicontext.DeduplicateEvents(eventValues)

	limit := 20
	if input.Limit > 0 {
		limit = min(input.Limit, 100)
	}
	preCap := len(deduplicated)
	if preCap > limit {
		deduplicated = deduplicated[:limit]
	}

	// Always wrap into the response struct so capped + uncapped agree on
	// wire shape ({events: [...], narrowHint?: "..."}).
	resp := getEventsResponseMCP{Events: deduplicated}
	if preCap > limit {
		resp.NarrowHint = fmt.Sprintf(
			"returned %d of %d events — narrow with namespace=, kind=, name=, or raise limit (cap 100)",
			limit, preCap,
		)
	}
	return toJSONResult(resp)
}

type getEventsResponseMCP struct {
	Events     []aicontext.DeduplicatedEvent `json:"events"`
	NarrowHint string                        `json:"narrowHint,omitempty"`
}

func handleGetPodLogs(ctx context.Context, req *mcp.CallToolRequest, input podLogsInput) (*mcp.CallToolResult, any, error) {
	if !checkNamespaceAccess(ctx, input.Namespace) {
		return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", input.Namespace)
	}

	clientset := k8s.ClientFromContext(ctx)
	if clientset == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	tailLines := int64(200)
	if input.TailLines > 0 {
		tailLines = int64(input.TailLines)
	}
	if strings.TrimSpace(input.Grep) != "" {
		if _, err := regexp.Compile(input.Grep); err != nil {
			return nil, nil, fmt.Errorf("invalid grep regex: %w", err)
		}
	}
	sinceSeconds, err := parseLogsSince(input.Since)
	if err != nil {
		return nil, nil, err
	}

	opts := &corev1.PodLogOptions{
		TailLines:    &tailLines,
		SinceSeconds: sinceSeconds,
		Previous:     input.Previous,
	}
	if input.Container != "" {
		opts.Container = input.Container
	}

	stream, err := clientset.CoreV1().Pods(input.Namespace).GetLogs(input.Name, opts).Stream(ctx)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to get logs for %s/%s: %w", input.Namespace, input.Name, err)
	}
	defer stream.Close()

	data, err := io.ReadAll(stream)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to read logs: %w", err)
	}

	// rawLines counts lines BEFORE grep — grep filtering down would make
	// filtered.TotalLines (post-grep) smaller than tailLines even on a
	// capped stream, suppressing the hint exactly when the agent needs it.
	rawLines := countLines(string(data))
	filtered, err := aicontext.FilterLogsByPattern(string(data), input.Grep)
	if err != nil {
		return nil, nil, fmt.Errorf("invalid grep regex: %w", err)
	}

	warnings := computePodLogsWarnings(input.Namespace, input.Name, input.Container, input.Previous, rawLines)
	var narrowHint string
	// Heuristic: if we received tailLines or more raw lines, the kubectl
	// stream was likely capped (there may be older lines we didn't fetch).
	if int64(rawLines) >= tailLines {
		narrowHint = fmt.Sprintf(
			"log stream tailed to %d lines (cap reached) — narrow with since= (e.g. 10m), grep= regex, container=, or raise tail_lines",
			rawLines,
		)
	}
	if narrowHint == "" && len(warnings) == 0 {
		return toJSONResult(filtered)
	}
	return toJSONResult(podLogsResponseMCP{
		FilteredLogs: filtered,
		NarrowHint:   narrowHint,
		Warnings:     warnings,
	})
}

// computePodLogsWarnings inspects the pod's status to surface common
// pitfalls in interpreting an empty/short logs response:
//
//   - when the pod isn't Running, application logs are usually unavailable
//     because the container hasn't started yet. The agent thinks the app
//     isn't writing logs when actually the pod is still scheduling/pulling.
//   - when a container has restarted (restartCount > 0) and the caller didn't
//     pass previous=true, the current container's logs are likely the
//     next-crash in progress; the error that killed the last container is in
//     previous.
//
// Best-effort — if the pod can't be fetched, return no warnings rather than
// failing the logs call (the caller already has whatever logs we returned).
func computePodLogsWarnings(namespace, name, container string, previous bool, rawLines int) []string {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil
	}
	obj, err := k8s.FetchResource(cache, "pods", namespace, name)
	if err != nil {
		return nil
	}
	pod, ok := obj.(*corev1.Pod)
	if !ok {
		return nil
	}

	var out []string

	if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodSucceeded {
		out = append(out, fmt.Sprintf(
			"Pod is in phase `%s`; application logs are typically unavailable until containers start. Inspect scheduling/pull state via `diagnose` (kind=pod), `get_resource` with include=events, or check pod conditions for the underlying blocker.",
			pod.Status.Phase,
		))
	}

	if !previous {
		statuses := pod.Status.ContainerStatuses
		if container != "" {
			statuses = filterContainerStatuses(statuses, container)
		}
		if cs := pickCrashIndicator(statuses); cs != nil {
			reason := cs.LastTerminationState.Terminated.Reason
			if reason == "" {
				reason = "(reason unset)"
			}
			out = append(out, fmt.Sprintf(
				"Container `%s` has restarted %d time(s); last termination: `%s` (exit code %d). The error that triggered the previous crash is in the previous container's logs — call again with `previous: true`%s.",
				cs.Name,
				cs.RestartCount,
				reason,
				cs.LastTerminationState.Terminated.ExitCode,
				crashloopSuffix(rawLines),
			))
		}
	}

	return out
}

// pickCrashIndicator returns the first container with restartCount > 0 that has
// a recorded previous termination, or nil when no container has crashed. The
// previous termination may have an empty reason; the caller renders that.
func pickCrashIndicator(statuses []corev1.ContainerStatus) *corev1.ContainerStatus {
	for i := range statuses {
		cs := &statuses[i]
		if cs.RestartCount == 0 {
			continue
		}
		if cs.LastTerminationState.Terminated == nil {
			continue
		}
		return cs
	}
	return nil
}

func filterContainerStatuses(statuses []corev1.ContainerStatus, name string) []corev1.ContainerStatus {
	for i := range statuses {
		if statuses[i].Name == name {
			return statuses[i : i+1]
		}
	}
	return nil
}

func crashloopSuffix(rawLines int) string {
	if rawLines == 0 {
		return " (current logs returned empty — likely captured between restarts)"
	}
	return ""
}

// countLines returns the number of newline-delimited lines in s, treating
// a trailing newline as a terminator (not a separate line). Used to detect
// stream truncation before grep filtering rewrites TotalLines.
func countLines(s string) int {
	if s == "" {
		return 0
	}
	n := strings.Count(s, "\n")
	if !strings.HasSuffix(s, "\n") {
		n++
	}
	return n
}

// podLogsResponseMCP embeds FilteredLogs so the JSON shape stays identical
// except for added narrowHint + warnings fields.
type podLogsResponseMCP struct {
	aicontext.FilteredLogs
	NarrowHint string   `json:"narrowHint,omitempty"`
	Warnings   []string `json:"warnings,omitempty"`
}

func handleListNamespaces(ctx context.Context, req *mcp.CallToolRequest, input struct{}) (*mcp.CallToolResult, any, error) {
	cache := k8s.GetResourceCache()
	if cache == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}

	lister := cache.Namespaces()
	if lister == nil {
		return nil, nil, fmt.Errorf("insufficient permissions to list namespaces")
	}

	namespaces, err := lister.List(labels.Everything())
	if err != nil {
		return nil, nil, fmt.Errorf("failed to list namespaces: %w", err)
	}

	// Filter to the user's allowed namespaces. nil = no filter (auth off /
	// cluster-admin). Empty = no access (return []).
	allowed := filterNamespacesForUser(ctx, nil)
	if allowed != nil {
		set := make(map[string]bool, len(allowed))
		for _, ns := range allowed {
			set[ns] = true
		}
		filtered := namespaces[:0]
		for _, ns := range namespaces {
			if set[ns.Name] {
				filtered = append(filtered, ns)
			}
		}
		namespaces = filtered
	}

	result := make([]map[string]any, 0, len(namespaces))
	for _, ns := range namespaces {
		entry := map[string]any{
			"name":   ns.Name,
			"status": string(ns.Status.Phase),
		}
		if len(ns.Labels) > 0 {
			entry["labels"] = ns.Labels
		}
		result = append(result, entry)
	}

	return toJSONResult(result)
}

// Dashboard builder for MCP (simplified version of server/dashboard.go)

type mcpDashboard struct {
	Cluster            mcpClusterInfo         `json:"cluster"`
	Nodes              mcpNodeSummary         `json:"nodes"`
	VersionSkew        []string               `json:"versionSkew,omitempty"`
	Health             mcpHealthSummary       `json:"health"`
	Problems           []mcpProblem           `json:"problems"`
	TotalProblems      int                    `json:"totalProblems"`                // count before the dashboard cap was applied
	ProblemsBySeverity map[string]int         `json:"problemsBySeverity,omitempty"` // critical/high/medium/warning counts across the full set
	RecentChanges      []mcpChange            `json:"recentChanges,omitempty"`
	WarningEvents      int                    `json:"warningEvents"`
	TopWarnings        []mcpWarning           `json:"topWarnings"`
	HelmReleases       mcpHelmSummary         `json:"helmReleases"`
	Metrics            *mcpMetrics            `json:"metrics,omitempty"`
	TopologyNodes      int                    `json:"topologyNodes"`
	TopologyEdges      int                    `json:"topologyEdges"`
	ResourceCounts     map[string]int         `json:"resourceCounts"`
	Visibility         *k8s.VisibilitySummary `json:"visibility,omitempty"`
}

type mcpChange struct {
	Kind       string `json:"kind"`
	Namespace  string `json:"namespace,omitempty"`
	Name       string `json:"name"`
	ChangeType string `json:"changeType"`
	Summary    string `json:"summary,omitempty"`
	Timestamp  string `json:"timestamp"`
}

type mcpMetrics struct {
	CPUUsagePercent   int `json:"cpuUsagePercent,omitempty"`
	CPURequestPercent int `json:"cpuRequestPercent,omitempty"`
	MemUsagePercent   int `json:"memUsagePercent,omitempty"`
	MemRequestPercent int `json:"memRequestPercent,omitempty"`
}

type mcpClusterInfo struct {
	Name     string `json:"name"`
	Platform string `json:"platform"`
	Version  string `json:"version"`
}

type mcpNodeSummary struct {
	Total    int `json:"total"`
	Ready    int `json:"ready"`
	NotReady int `json:"notReady"`
	Cordoned int `json:"cordoned"`
}

type mcpHealthSummary struct {
	HealthyPods int `json:"healthyPods"`
	WarningPods int `json:"warningPods"`
	ErrorPods   int `json:"errorPods"`
}

type mcpProblem struct {
	Kind                 string `json:"kind"`
	Namespace            string `json:"namespace,omitempty"`
	Name                 string `json:"name"`
	Group                string `json:"group,omitempty"`
	Severity             string `json:"severity,omitempty"`
	Reason               string `json:"reason"`
	Message              string `json:"message,omitempty"`
	Age                  string `json:"age"`
	RestartCount         int32  `json:"restartCount,omitempty"`         // Pod problems only
	LastTerminatedReason string `json:"lastTerminatedReason,omitempty"` // Pod problems only (OOMKilled / Error / Completed)
	// ageSeconds is for sort tiebreak; not serialized so it doesn't widen
	// the wire shape callers depend on.
	ageSeconds int64 `json:"-"`
}

// problemSeverityRank maps the Problem.Severity vocabulary onto sort order.
// Lower rank sorts first. Unknown severities fall to the end (rank=99) so a
// future "info"-tier value doesn't accidentally outrank critical via the
// Go zero-value (which would happen with map[string]int default lookups).
func problemSeverityRank(s string) int {
	switch s {
	case "critical":
		return 0
	case "high":
		return 1
	case "medium":
		return 2
	case "warning":
		return 3
	}
	return 99
}

// sortMCPDashboardProblems applies (severity desc, age desc) to the
// dashboard problem list before the cap, so a critical missing-ref isn't
// dropped in favour of a medium-severity workload problem that happened
// to be appended earlier.
func sortMCPDashboardProblems(problems []mcpProblem) {
	sort.SliceStable(problems, func(i, j int) bool {
		ri, rj := problemSeverityRank(problems[i].Severity), problemSeverityRank(problems[j].Severity)
		if ri != rj {
			return ri < rj
		}
		// Same severity: most recent (lowest ageSeconds) first. Newly-
		// failed resources are usually more interesting for triage than
		// the chronic crash-loopers — and chronic ones are already
		// signaled via the RestartCount field on the row.
		return problems[i].ageSeconds < problems[j].ageSeconds
	})
}

// countBySeverity tallies problems across the full uncapped set so the
// agent can see the real scale even when only 30 rows are returned.
func countBySeverity(problems []mcpProblem) map[string]int {
	if len(problems) == 0 {
		return nil
	}
	out := make(map[string]int, 4)
	for _, p := range problems {
		out[p.Severity]++
	}
	return out
}

type mcpWarning struct {
	Reason  string `json:"reason"`
	Message string `json:"message"`
	Count   int    `json:"count"`
}

type mcpHelmSummary struct {
	Total    int              `json:"total"`
	Releases []mcpHelmRelease `json:"releases,omitempty"`
	// Unavailable + UnavailableReason are populated when the Helm read
	// failed (Helm client not initialized, RBAC denied, network error,
	// etc.) — distinguishes "this cluster has zero Helm releases" from
	// "Helm is broken; results aren't an honest count." LLM consumers
	// should surface UnavailableReason to the user instead of confidently
	// reporting Total=0.
	Unavailable       bool   `json:"unavailable,omitempty"`
	UnavailableReason string `json:"unavailableReason,omitempty"`
}

type mcpHelmRelease struct {
	Name           string `json:"name"`
	Namespace      string `json:"namespace"`
	Chart          string `json:"chart"`
	ChartVersion   string `json:"chartVersion"`
	Status         string `json:"status"`
	ResourceHealth string `json:"resourceHealth,omitempty"`
}

func buildDashboard(ctx context.Context, cache *k8s.ResourceCache, namespace string, includeNodes bool, includeNamespaces bool) mcpDashboard {
	d := mcpDashboard{
		ResourceCounts: make(map[string]int),
	}
	if result := k8s.GetCachedPermissionResult(); result != nil {
		d.Visibility = k8s.BuildVisibilitySummary(result, namespace)
	}

	// Cluster info
	if info, err := k8s.GetClusterInfo(ctx); err == nil {
		d.Cluster = mcpClusterInfo{
			Name:     info.Cluster,
			Platform: info.Platform,
			Version:  info.KubernetesVersion,
		}
	}

	now := time.Now()

	// Collect all problems first (uncapped), sort by severity+age, then
	// apply the dashboard cap so higher-severity findings cannot be displaced
	// by lower-severity findings appended earlier in the sequence.
	var allProblems []mcpProblem

	// Pod health
	if podLister := cache.Pods(); podLister != nil {
		var pods []*corev1.Pod
		if namespace != "" {
			pods, _ = podLister.Pods(namespace).List(labels.Everything())
		} else {
			pods, _ = podLister.List(labels.Everything())
		}
		d.ResourceCounts["pods"] = len(pods)
		for _, pod := range pods {
			switch k8s.ClassifyPodHealth(pod, now) {
			case "healthy":
				d.Health.HealthyPods++
			case "warning":
				d.Health.WarningPods++
			case "error":
				d.Health.ErrorPods++
				severity := "critical"
				// PodProblemReason returns the kubelet's waiting/terminated
				// reason; ClassifyPodHealth==error implies the pod is in a
				// failing state, so critical is the right default.
				restarts, lastTermReason := k8s.PodRestartContext(pod)
				ageDur := now.Sub(pod.CreationTimestamp.Time)
				allProblems = append(allProblems, mcpProblem{
					Kind:                 "Pod",
					Namespace:            pod.Namespace,
					Name:                 pod.Name,
					Severity:             severity,
					Reason:               k8s.PodProblemReason(pod),
					Age:                  k8s.FormatAge(ageDur),
					RestartCount:         restarts,
					LastTerminatedReason: lastTermReason,
					ageSeconds:           int64(ageDur.Seconds()),
				})
			}
		}
	}

	// DetectProblems emits Pod-level rows (CrashLoopBackOff, not-ready,
	// etc.) as well as workload/HPA/CronJob/Node ones. The dashboard pod
	// loop above is the canonical source for pod problems, so skip Pod
	// here to avoid the same failing pod appearing twice.
	for _, p := range k8s.DetectProblems(cache, namespace) {
		if p.Kind == "Pod" {
			continue
		}
		if p.Kind == "Node" && !includeNodes {
			continue
		}
		allProblems = append(allProblems, mcpProblem{
			Kind:                 p.Kind,
			Namespace:            p.Namespace,
			Name:                 p.Name,
			Group:                p.Group,
			Severity:             p.Severity,
			Reason:               p.Reason,
			Message:              p.Message,
			Age:                  p.Age,
			RestartCount:         p.RestartCount,
			LastTerminatedReason: p.LastTerminatedReason,
			ageSeconds:           p.AgeSeconds,
		})
	}

	// Scheduling problems: unschedulable pods (with the offending node
	// constraint named), admission rejections (quota/PodSecurity/webhook —
	// no Pod exists, so the pod loop above can't see them), and post-bind
	// CNI/volume stalls. The pod loop only emits "error" pods, so these are
	// additive; the seenProblem dedup below keys on reason, letting a pod's
	// scheduling row coexist with a distinct missing-ref row.
	sched := k8s.DetectSchedulingProblems(cache, namespace)
	sched = append(sched, k8s.DetectAdmissionProblems(cache, namespace)...)
	sched = append(sched, k8s.DetectPostBindProblems(cache, namespace)...)
	for _, p := range sched {
		allProblems = append(allProblems, mcpProblem{
			Kind:       p.Kind,
			Namespace:  p.Namespace,
			Name:       p.Name,
			Group:      p.Group,
			Severity:   p.Severity,
			Reason:     p.Reason,
			Message:    p.Message,
			Age:        p.Age,
			ageSeconds: p.AgeSeconds,
		})
	}

	// Missing-ref problems (Pod→missing CM/Secret/PVC/SA, HPA→missing
	// target, Ingress→missing backend, PVC→missing SC, RoleBinding→missing
	// roleRef). Pod rows are intentionally kept here because the pod-error
	// loop above only catches running-but-broken pods (CrashLoopBackOff,
	// not-ready); Pods that fail to schedule on a missing PVC stay Pending
	// without container statuses and would otherwise be invisible. Dedupe
	// Dedupe exact-duplicate rows, not resource identities. Keying on
	// (kind, ns, name, reason) lets distinct missing-ref reasons on the
	// same Pod survive — a Pod missing both PVC and ConfigMap emits two
	// rows — and prevents a generic kubelet-state row (e.g.
	// CreateContainerConfigError) from suppressing the underlying
	// root-cause row (e.g. Missing Secret) for the same resource.
	seenProblem := make(map[string]bool, len(allProblems))
	for _, p := range allProblems {
		seenProblem[p.Kind+"/"+p.Namespace+"/"+p.Name+"/"+p.Reason] = true
	}
	missingRefs := k8s.DetectMissingRefs(cache, namespace)
	missingRefs = append(missingRefs, k8s.DetectMissingWebhookRefs(cache, k8s.GetDynamicResourceCache(), k8s.GetResourceDiscovery(), namespace)...)
	for _, p := range missingRefs {
		if seenProblem[p.Kind+"/"+p.Namespace+"/"+p.Name+"/"+p.Reason] {
			continue
		}
		allProblems = append(allProblems, mcpProblem{
			Kind:       p.Kind,
			Namespace:  p.Namespace,
			Name:       p.Name,
			Group:      p.Group,
			Severity:   p.Severity,
			Reason:     p.Reason,
			Message:    p.Message,
			Age:        p.Age,
			ageSeconds: p.AgeSeconds,
		})
	}

	// CAPI problems (Cluster API resources)
	for _, p := range k8s.DetectCAPIProblems(k8s.GetDynamicResourceCache(), k8s.GetResourceDiscovery(), namespace) {
		allProblems = append(allProblems, mcpProblem{
			Kind:       p.Kind,
			Namespace:  p.Namespace,
			Name:       p.Name,
			Group:      p.Group,
			Severity:   p.Severity,
			Reason:     p.Reason,
			Message:    p.Message,
			Age:        p.Age,
			ageSeconds: p.AgeSeconds,
		})
	}

	// Sort by severity desc then by age desc (newest first), then cap.
	// Without this, the cap drops critical missing-refs in favour of
	// medium-severity workload problems that happened to be appended
	// earlier — the agent gets a non-representative subset.
	sortMCPDashboardProblems(allProblems)
	d.TotalProblems = len(allProblems)
	d.ProblemsBySeverity = countBySeverity(allProblems)
	const dashboardCap = 30
	if len(allProblems) > dashboardCap {
		allProblems = allProblems[:dashboardCap]
	}
	d.Problems = allProblems

	// Deployment resource count
	if depLister := cache.Deployments(); depLister != nil {
		if namespace != "" {
			items, _ := depLister.Deployments(namespace).List(labels.Everything())
			d.ResourceCounts["deployments"] = len(items)
		} else {
			items, _ := depLister.List(labels.Everything())
			d.ResourceCounts["deployments"] = len(items)
		}
	}

	// Node health summary (cluster-scoped, not filtered by namespace)
	if includeNodes {
		if nodeLister := cache.Nodes(); nodeLister != nil {
			nodes, _ := nodeLister.List(labels.Everything())
			d.ResourceCounts["nodes"] = len(nodes)
			d.Nodes.Total = len(nodes)

			for _, node := range nodes {
				h := k8s.ClassifyNodeHealth(node)
				if h.Ready {
					if h.Unschedulable {
						d.Nodes.Cordoned++
					} else {
						d.Nodes.Ready++
					}
				} else {
					d.Nodes.NotReady++
				}
			}

			// Version skew
			if skew := k8s.DetectVersionSkew(nodes); skew != nil {
				for v := range skew.Versions {
					d.VersionSkew = append(d.VersionSkew, v)
				}
				sort.Strings(d.VersionSkew)
			}
		}
	}

	// Simple resource counts for other types
	countResources(cache, namespace, &d, includeNamespaces)

	// Warning events — deduplicate first, then sort by count
	if eventLister := cache.Events(); eventLister != nil {
		var events []*corev1.Event
		if namespace != "" {
			events, _ = eventLister.Events(namespace).List(labels.Everything())
		} else {
			events, _ = eventLister.List(labels.Everything())
		}

		var warningValues []corev1.Event
		for _, e := range events {
			if e.Type == "Warning" {
				warningValues = append(warningValues, *e)
			}
		}
		d.WarningEvents = len(warningValues)

		// Deduplicate and sort by count descending to surface systemic issues
		deduplicated := aicontext.DeduplicateEvents(warningValues)
		sort.Slice(deduplicated, func(i, j int) bool {
			return deduplicated[i].Count > deduplicated[j].Count
		})

		limit := min(len(deduplicated), 5)
		for _, e := range deduplicated[:limit] {
			d.TopWarnings = append(d.TopWarnings, mcpWarning{
				Reason:  e.Reason,
				Message: k8s.Truncate(e.Message, 200),
				Count:   e.Count,
			})
		}
	}

	// Helm releases — sort failed-first before slicing
	helmClient := helm.GetClient()
	if helmClient == nil {
		d.HelmReleases.Unavailable = true
		d.HelmReleases.UnavailableReason = "Helm client not initialized."
	} else {
		username, groups := userFromContext(ctx)
		releases, err := helmClient.ListReleasesAsUser(namespace, username, groups)
		if err != nil {
			// Not fatal for the dashboard — a viewer with no helm access
			// still sees everything else. Surface to LLM consumers via
			// Unavailable so they don't confidently report "Total=0
			// releases" when in fact the read failed.
			log.Printf("[mcp] Dashboard helm list failed: %v", err)
			d.HelmReleases.Unavailable = true
			if helm.IsForbiddenError(err) {
				d.HelmReleases.UnavailableReason = "RBAC denied: caller cannot list Helm release secrets in this scope."
			} else {
				d.HelmReleases.UnavailableReason = "Helm read failed: " + err.Error()
			}
		} else {
			d.HelmReleases.Total = len(releases)

			// Sort: failed/pending-install first, then unhealthy/degraded
			sort.SliceStable(releases, func(i, j int) bool {
				return helm.StatusPriority(releases[i].Status, releases[i].ResourceHealth) < helm.StatusPriority(releases[j].Status, releases[j].ResourceHealth)
			})

			limit := min(len(releases), 5)
			for _, r := range releases[:limit] {
				d.HelmReleases.Releases = append(d.HelmReleases.Releases, mcpHelmRelease{
					Name:           r.Name,
					Namespace:      r.Namespace,
					Chart:          r.Chart,
					ChartVersion:   r.ChartVersion,
					Status:         r.Status,
					ResourceHealth: r.ResourceHealth,
				})
			}
		}
	}

	// Metrics (best-effort — silently skip if metrics-server unavailable).
	// Metrics-server forwards the impersonation headers, so a user without
	// metrics.k8s.io/nodes access gets a 403 here and the field is left empty.
	if includeNodes {
		if client := k8s.ClientFromContext(ctx); client != nil {
			data, err := client.CoreV1().RESTClient().Get().
				AbsPath("/apis/metrics.k8s.io/v1beta1/nodes").
				DoRaw(ctx)
			if err == nil {
				var nodeMetricsList struct {
					Items []struct {
						Usage struct {
							CPU    string `json:"cpu"`
							Memory string `json:"memory"`
						} `json:"usage"`
					} `json:"items"`
				}
				if err := json.Unmarshal(data, &nodeMetricsList); err != nil {
					log.Printf("[mcp] Failed to parse node metrics: %v", err)
				} else if len(nodeMetricsList.Items) > 0 {
					if nodeLister := cache.Nodes(); nodeLister != nil {
						allNodes, _ := nodeLister.List(labels.Everything())
						var cpuCapMillis, memCapBytes int64
						for _, n := range allNodes {
							cpuCapMillis += n.Status.Capacity.Cpu().MilliValue()
							memCapBytes += n.Status.Capacity.Memory().Value()
						}

						var cpuUsageMillis, memUsageBytes int64
						for _, item := range nodeMetricsList.Items {
							cpuUsageMillis += k8s.ParseCPUToMillis(item.Usage.CPU)
							memUsageBytes += k8s.ParseMemoryToBytes(item.Usage.Memory)
						}

						var cpuReqMillis, memReqBytes int64
						if podLister := cache.Pods(); podLister != nil {
							allPods, _ := podLister.List(labels.Everything())
							for _, pod := range allPods {
								if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
									continue
								}
								for _, c := range pod.Spec.Containers {
									if c.Resources.Requests != nil {
										if cpu, ok := c.Resources.Requests[corev1.ResourceCPU]; ok {
											cpuReqMillis += cpu.MilliValue()
										}
										if mem, ok := c.Resources.Requests[corev1.ResourceMemory]; ok {
											memReqBytes += mem.Value()
										}
									}
								}
							}
						}

						if cpuCapMillis > 0 && memCapBytes > 0 {
							d.Metrics = &mcpMetrics{
								CPUUsagePercent:   int(cpuUsageMillis * 100 / cpuCapMillis),
								CPURequestPercent: int(cpuReqMillis * 100 / cpuCapMillis),
								MemUsagePercent:   int(memUsageBytes * 100 / memCapBytes),
								MemRequestPercent: int(memReqBytes * 100 / memCapBytes),
							}
						}
					}
				}
			}
		}
	}

	// Topology summary
	if includeNodes || namespace != "" {
		opts := topology.DefaultBuildOptions()
		if namespace != "" {
			opts.Namespaces = []string{namespace}
		}
		builder := topology.NewBuilder(k8s.NewTopologyResourceProvider(k8s.GetResourceCache())).WithDynamic(k8s.NewTopologyDynamicProvider(k8s.GetDynamicResourceCache(), k8s.GetResourceDiscovery()))
		if topo, err := builder.Build(opts); err == nil {
			d.TopologyNodes = len(topo.Nodes)
			d.TopologyEdges = len(topo.Edges)
		} else {
			log.Printf("[mcp] Failed to build topology for dashboard: %v", err)
		}
	}

	// Correlate recent changes with problems — only show changes for broken resources
	if store := timeline.GetStore(); store != nil && len(d.Problems) > 0 {
		problemKeys := make(map[string]bool, len(d.Problems))
		for _, p := range d.Problems {
			problemKeys[fmt.Sprintf("%s/%s/%s", p.Kind, p.Namespace, p.Name)] = true
		}

		queryOpts := timeline.QueryOptions{
			Since:        now.Add(-1 * time.Hour),
			Limit:        20,
			FilterPreset: "workloads",
		}
		if namespace != "" {
			queryOpts.Namespaces = []string{namespace}
		}
		changes, err := store.Query(ctx, queryOpts)
		if err != nil {
			log.Printf("[mcp] Failed to query timeline for dashboard changes: %v", err)
		}
		if err == nil {
			for _, c := range changes {
				key := fmt.Sprintf("%s/%s/%s", c.Kind, c.Namespace, c.Name)
				// Also check owner chain (e.g. Pod problem → Deployment change)
				ownerKey := ""
				if c.Owner != nil {
					ownerKey = fmt.Sprintf("%s/%s/%s", c.Owner.Kind, c.Namespace, c.Owner.Name)
				}
				if problemKeys[key] || (ownerKey != "" && problemKeys[ownerKey]) {
					summary := ""
					if c.Diff != nil && c.Diff.Summary != "" {
						summary = c.Diff.Summary
					} else if c.Message != "" {
						summary = k8s.Truncate(c.Message, 100)
					}
					d.RecentChanges = append(d.RecentChanges, mcpChange{
						Kind:       c.Kind,
						Namespace:  c.Namespace,
						Name:       c.Name,
						ChangeType: string(c.EventType),
						Summary:    summary,
						Timestamp:  c.Timestamp.Format(time.RFC3339),
					})
					if len(d.RecentChanges) >= 5 {
						break
					}
				}
			}
		}
	}

	return d
}

func countResources(cache *k8s.ResourceCache, namespace string, d *mcpDashboard, includeNamespaces bool) {
	if svcLister := cache.Services(); svcLister != nil {
		if namespace != "" {
			items, _ := svcLister.Services(namespace).List(labels.Everything())
			d.ResourceCounts["services"] = len(items)
		} else {
			items, _ := svcLister.List(labels.Everything())
			d.ResourceCounts["services"] = len(items)
		}
	}
	if ingLister := cache.Ingresses(); ingLister != nil {
		if namespace != "" {
			items, _ := ingLister.Ingresses(namespace).List(labels.Everything())
			d.ResourceCounts["ingresses"] = len(items)
		} else {
			items, _ := ingLister.List(labels.Everything())
			d.ResourceCounts["ingresses"] = len(items)
		}
	}
	if ssLister := cache.StatefulSets(); ssLister != nil {
		if namespace != "" {
			items, _ := ssLister.StatefulSets(namespace).List(labels.Everything())
			d.ResourceCounts["statefulsets"] = len(items)
		} else {
			items, _ := ssLister.List(labels.Everything())
			d.ResourceCounts["statefulsets"] = len(items)
		}
	}
	if dsLister := cache.DaemonSets(); dsLister != nil {
		if namespace != "" {
			items, _ := dsLister.DaemonSets(namespace).List(labels.Everything())
			d.ResourceCounts["daemonsets"] = len(items)
		} else {
			items, _ := dsLister.List(labels.Everything())
			d.ResourceCounts["daemonsets"] = len(items)
		}
	}
	if jobLister := cache.Jobs(); jobLister != nil {
		if namespace != "" {
			items, _ := jobLister.Jobs(namespace).List(labels.Everything())
			d.ResourceCounts["jobs"] = len(items)
		} else {
			items, _ := jobLister.List(labels.Everything())
			d.ResourceCounts["jobs"] = len(items)
		}
	}
	if cjLister := cache.CronJobs(); cjLister != nil {
		if namespace != "" {
			items, _ := cjLister.CronJobs(namespace).List(labels.Everything())
			d.ResourceCounts["cronjobs"] = len(items)
		} else {
			items, _ := cjLister.List(labels.Everything())
			d.ResourceCounts["cronjobs"] = len(items)
		}
	}
	if includeNamespaces {
		if nsLister := cache.Namespaces(); nsLister != nil {
			items, _ := nsLister.List(labels.Everything())
			d.ResourceCounts["namespaces"] = len(items)
		}
	}
}

func handleIssuesTool(ctx context.Context, _ *mcp.CallToolRequest, input issuesInput) (*mcp.CallToolResult, any, error) {
	provider := issues.NewCacheProvider()
	if provider == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}
	var allowedNamespaces []string
	if input.Namespace != "" {
		if !checkNamespaceAccess(ctx, input.Namespace) {
			// Explicit denial must NOT read as "[] = nothing broken" — for an
			// agent that's an unauthorized → healthy trust gap. Surface it.
			return nil, nil, fmt.Errorf("forbidden: no access to namespace %q", input.Namespace)
		}
		allowedNamespaces = []string{input.Namespace}
	} else {
		allowedNamespaces = filterNamespacesForUser(ctx, nil)
		if allowedNamespaces != nil && len(allowedNamespaces) == 0 {
			return toJSONResult(map[string]any{"issues": []issues.Issue{}, "total": 0, "total_matched": 0})
		}
	}
	severities, err := parseSeverityList(input.Severity)
	if err != nil {
		return nil, nil, err
	}
	filters := issues.Filters{
		Severities: severities,
		Kinds:      splitCSVStr(input.Kind),
		Limit:      input.Limit,
		Namespaces: allowedNamespaces,
		// Agents get the grouped issue model — structured triage objects,
		// not per-pod evidence rows they'd have to re-aggregate. Raw object
		// state stays available via get_resource / get_events.
		Grouped: true,
		CanReadClusterScoped: func(kind, group string) bool {
			return canReadClusterScopedKind(ctx, kind, group, "list")
		},
	}
	if input.Filter != "" {
		f, err := filter.CachedIssueFilter(input.Filter)
		if err != nil {
			return nil, nil, fmt.Errorf("filter: %w", err)
		}
		filters.Filter = f
	}
	out, stats := issues.ComposeWithStats(provider, filters)
	// Shared response shape (issues.ListResponse) — identical to /api/issues so
	// HTTP and MCP can't drift.
	resp := issues.NewListResponse(out, stats)
	resp.ClusterContext = provider.ClusterContextForIssues(allowedNamespaces, func(group, resource string) bool {
		return canReadInNamespace(ctx, group, resource, "kube-system", "list")
	})
	// Steering hint when the issue list was capped (MCP-only).
	if stats.TotalMatched > len(out) {
		resp.NarrowHint = fmt.Sprintf(
			"returned %d of %d issues — narrow with namespace=, kind=, severity=critical, add filter= CEL, or raise limit (cap 1000)",
			len(out), stats.TotalMatched,
		)
	}
	if resp.ClusterContext != nil && resp.ClusterContext.DNS != nil && resp.ClusterContext.DNS.Hint != "" {
		if resp.NarrowHint != "" {
			resp.NarrowHint += "; "
		}
		resp.NarrowHint += resp.ClusterContext.DNS.Hint
	}
	if result := k8s.GetCachedPermissionResult(); result != nil {
		if visibility := k8s.BuildVisibilitySummary(result, k8s.VisibilityNamespace(allowedNamespaces)); visibility != nil {
			resp.Visibility = visibility
		}
	}
	return toJSONResult(resp)
}

func parseSeverityList(v string) ([]issues.Severity, error) {
	if v == "" {
		return nil, nil
	}
	var out []issues.Severity
	for _, p := range strings.Split(v, ",") {
		switch strings.ToLower(strings.TrimSpace(p)) {
		case "":
			continue
		case "critical":
			out = append(out, issues.SeverityCritical)
		case "warning":
			out = append(out, issues.SeverityWarning)
		default:
			return nil, fmt.Errorf("unknown severity %q (want: critical, warning)", p)
		}
	}
	return out, nil
}

func splitCSVStr(v string) []string {
	if v == "" {
		return nil
	}
	var out []string
	for _, p := range strings.Split(v, ",") {
		if t := strings.TrimSpace(p); t != "" {
			out = append(out, t)
		}
	}
	return out
}

func intersectAllowedNamespaces(allowed, requested []string) []string {
	if allowed == nil {
		return requested
	}
	if len(requested) == 0 {
		return allowed
	}
	set := make(map[string]struct{}, len(allowed))
	for _, ns := range allowed {
		set[ns] = struct{}{}
	}
	out := make([]string, 0, len(requested))
	for _, ns := range requested {
		if _, ok := set[ns]; ok {
			out = append(out, ns)
		}
	}
	return out
}

// mcpSensitiveSearchKinds is the MCP mirror of the REST sensitiveSearchKinds —
// cluster-scoped kinds that need their own list-SAR per user. Secrets are
// namespaced and handled by mcpSearchSecretsRBAC instead, which supports
// per-namespace permission.
var mcpSensitiveSearchKinds = []struct {
	Kind     string
	Resource string
	Group    string
}{
	{"Node", "nodes", ""},
	{"PersistentVolume", "persistentvolumes", ""},
	{"StorageClass", "storageclasses", "storage.k8s.io"},
	{"Namespace", "namespaces", ""},
}

func mcpSearchSkipKinds(ctx context.Context) map[string]bool {
	user, perms := resolveUserPerms(ctx)
	if user == nil {
		return nil
	}
	client := k8s.GetClient()
	if client == nil {
		out := make(map[string]bool, len(mcpSensitiveSearchKinds))
		for _, k := range mcpSensitiveSearchKinds {
			out[k.Kind] = true
		}
		return out
	}
	out := make(map[string]bool, len(mcpSensitiveSearchKinds))
	for _, k := range mcpSensitiveSearchKinds {
		if perms != nil {
			if allowed, ok := perms.CanI("list", k.Group, k.Resource, ""); ok {
				if !allowed {
					out[k.Kind] = true
				}
				continue
			}
		}
		allowed, err := subjectCanI(ctx, client, user.Username, user.Groups, "", k.Group, k.Resource, "list")
		if err != nil {
			log.Printf("[mcp] search SAR failed for %s on %s/%s: %v", user.Username, k.Group, k.Resource, err)
			out[k.Kind] = true
			continue
		}
		if perms != nil {
			perms.SetCanI("list", k.Group, k.Resource, "", allowed)
		}
		if !allowed {
			out[k.Kind] = true
		}
	}
	return out
}

// mcpSearchSecretsRBAC mirrors Server.computeSearchSecretsRBAC for MCP. See
// that function for the three-case semantics. scanNamespaces is the
// effective scan scope (intersection of the user's RBAC-allowed namespaces
// and any `ns:` modifier in the query) — nil means a true cluster-wide scan.
//
// Cached canI hits short-circuit before the live-client path, so a seeded
// test cache authorizes without needing a real K8s client.
func mcpSearchSecretsRBAC(ctx context.Context, scanNamespaces []string) (decision string, scopedNamespaces []string) {
	if user, _ := resolveUserPerms(ctx); user == nil {
		return "", nil
	}

	if scanNamespaces == nil {
		if canReadInNamespace(ctx, "", "secrets", "", "list") {
			return "", nil
		}
		return "skip", nil
	}
	if len(scanNamespaces) == 0 {
		return "skip", nil
	}
	scoped := make([]string, 0, len(scanNamespaces))
	for _, ns := range scanNamespaces {
		if canReadInNamespace(ctx, "", "secrets", ns, "list") {
			scoped = append(scoped, ns)
		}
	}
	if len(scoped) == 0 {
		return "skip", nil
	}
	return "override", scoped
}

func handleSearch(ctx context.Context, req *mcp.CallToolRequest, input searchInput) (*mcp.CallToolResult, any, error) {
	provider := search.NewCacheProvider()
	if provider == nil {
		return nil, nil, fmt.Errorf("not connected to cluster")
	}
	query := input.Query
	if query == "" {
		return nil, nil, fmt.Errorf("query is required")
	}
	parsed := search.Parse(query)
	allowed := filterNamespacesForUser(ctx, nil)
	if allowed != nil && len(allowed) == 0 {
		return toJSONResult(search.Result{Hits: []search.Hit{}})
	}
	scanNamespaces := intersectAllowedNamespaces(allowed, parsed.NSFilter)
	if allowed != nil && len(scanNamespaces) == 0 {
		return toJSONResult(search.Result{Hits: []search.Hit{}})
	}
	parsed.NSFilter = scanNamespaces
	var include search.IncludeMode
	switch input.Include {
	case "", "summary":
		include = search.IncludeSummary
	case "raw":
		include = search.IncludeRaw
	case "none":
		include = search.IncludeNone
	default:
		return nil, nil, fmt.Errorf("unknown include=%q (want: summary, raw, none)", input.Include)
	}

	skipKinds := mcpSearchSkipKinds(ctx)
	// Secrets: per-namespace SAR fanout. See REST handleSearch for rationale.
	// scanNamespaces (not allowed) is the SAR-fanout input — a user with
	// AllowedNamespaces==nil (cluster-wide-pods sentinel) who constrains
	// with `ns:team-a` should fanout over team-a, not run a cluster-scope
	// `list secrets` SAR they may not have.
	var namespacesByKind map[string][]string
	switch decision, scoped := mcpSearchSecretsRBAC(ctx, scanNamespaces); decision {
	case "skip":
		if skipKinds == nil {
			skipKinds = make(map[string]bool)
		}
		skipKinds["Secret"] = true
	case "override":
		// scoped ⊆ scanNamespaces ⊆ parsed.NSFilter already (the SAR fanout
		// iterates scanNamespaces, which is the upstream intersection of
		// allowed and parsed.NSFilter). Use the SAR result directly.
		namespacesByKind = map[string][]string{"Secret": scoped}
	}

	opts := search.Options{
		Limit:            input.Limit,
		Include:          include,
		Namespaces:       scanNamespaces,
		SkipKinds:        skipKinds,
		NamespacesByKind: namespacesByKind,
		CanReadClusterScoped: func(kind, group, resource string) bool {
			return canReadClusterScopedKind(ctx, kind, group, "list")
		},
	}
	if input.Filter != "" {
		f, err := filter.CachedObjectFilter(input.Filter)
		if err != nil {
			return nil, nil, fmt.Errorf("filter: %w", err)
		}
		opts.Filter = f
	}
	// Search uses the dual-index variant: hits are mixed-kind (a single
	// query can return both namespaced Pods and cluster-scoped Nodes),
	// so a single namespace-scoped issue index zeroes issueCount on
	// cluster-scoped hits whose problems live at namespace="". The
	// builder routes per-hit by scope; CanReadClusterScoped above
	// already gates which cluster-scoped kinds are reachable.
	if input.Context != "none" {
		if builder := newSearchSummaryContextBuilder(scanNamespaces); builder != nil {
			opts.SummaryBuilder = search.SummaryBuilderFunc(builder)
		}
	}
	result, err := search.Search(ctx, provider, parsed, opts)
	if err != nil {
		return nil, nil, err
	}
	// Steering hint when the result was capped.
	if result.TotalMatched > result.Total {
		return toJSONResult(searchResponseMCP{
			Result: result,
			NarrowHint: fmt.Sprintf(
				"returned %d of %d hits — narrow with modifiers (kind:, ns:, label:k=v, image:), tighten the query, add a filter= CEL expression, or raise limit (cap 500)",
				result.Total, result.TotalMatched,
			),
		})
	}
	return toJSONResult(result)
}

// searchResponseMCP embeds search.Result so the JSON shape stays identical
// except for the added narrowHint field.
type searchResponseMCP struct {
	search.Result
	NarrowHint string `json:"narrowHint,omitempty"`
}

// toJSONResult marshals data into a text content MCP result.
func toJSONResult(data any) (*mcp.CallToolResult, any, error) {
	b, err := json.Marshal(data)
	if err != nil {
		log.Printf("[mcp] Failed to marshal result: %v", err)
		return nil, nil, fmt.Errorf("failed to marshal result: %w", err)
	}
	return &mcp.CallToolResult{
		Content: []mcp.Content{
			&mcp.TextContent{Text: string(b)},
		},
	}, nil, nil
}