#!/usr/bin/env python3
"""Check ARIS skill inventory drift across mainline, Codex mirror, and docs."""
from __future__ import annotations
import re
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
SKILLS_ROOT = REPO_ROOT / "skills"
CODEX_ROOT = SKILLS_ROOT / "skills-codex"
CATALOG = REPO_ROOT / "docs" / "SKILLS_CATALOG.md"
README = REPO_ROOT / "README.md"
README_CN = REPO_ROOT / "README_CN.md"
AGENT_GUIDE = REPO_ROOT / "AGENT_GUIDE.md"
ARIS_INTRO = REPO_ROOT / "docs" / "ARIS_INTRO.md"
ARIS_INTRO_HTML = REPO_ROOT / "docs" / "ARIS_INTRO.html"
CODEX_README = CODEX_ROOT / "README.md"
CODEX_README_CN = CODEX_ROOT / "README_CN.md"
BOM = b"\xef\xbb\xbf"
FORBIDDEN_CODEX_REVIEWER_STRINGS = (
"mcp__codex__codex",
"codex-reply",
"reviewer-continuation",
"threadId",
)
# Phase A (issue #240): cross-language anchor IDs that MUST exist as
# explicit `` in both README.md and README_CN.md so that
# cross-language hyperlinks resolve identically. Adding a new numbered
# section means adding it to both READMEs AND extending this list.
REQUIRED_README_ANCHORS = (
"contents",
"more-than-just-a-prompt",
"whats-new",
"quick-start",
"features",
"score-progression",
"community-showcase",
"awesome-community-skills",
"workflows",
"skills-catalog",
"setup",
"customization",
"alternative-model-combinations",
"community",
"citation",
"star-history",
"acknowledgements",
"license",
"prerequisites",
"install-skills",
"gpu-server-setup",
"alt-a-glm--gpt",
"-optional-gpt-54-pro-via-oracle",
"-research-wiki--persistent-research-memory",
)
def skill_names(root: Path) -> set[str]:
return {path.parent.name for path in root.glob("*/SKILL.md")}
def allowed_tools(text: str) -> list[str]:
"""Tokens on the frontmatter `allowed-tools:` line (empty if absent)."""
match = re.search(r"^allowed-tools:\s*(.+)$", text, flags=re.MULTILINE)
if not match:
return []
return [tok.strip() for tok in match.group(1).split(",") if tok.strip()]
def frontmatter_split(text: str) -> str:
"""Return the body after a leading YAML frontmatter block (whole text if
no frontmatter). Anchors on the opening `---` fence and the first closing
`---` fence, so `---` horizontal rules later in the body are not mistaken
for the frontmatter boundary."""
match = re.match(r"^---\n.*?\n---\n", text, flags=re.DOTALL)
return text[match.end():] if match else text
def readme_anchors(text: str) -> set[str]:
return set(re.findall(r'', text))
def numbered_h2_count(text: str) -> int:
return len(re.findall(r"^## \d+\.\s", text, flags=re.MULTILINE))
def read(path: Path) -> str:
return path.read_text(encoding="utf-8")
def catalog_names() -> set[str]:
text = read(CATALOG)
return set(re.findall(r"\[`/([^`]+)`\]\(\.\./skills/[^)]+/SKILL\.md\)", text))
def require(condition: bool, message: str, failures: list[str]) -> None:
if not condition:
failures.append(message)
def require_count(path: Path, text: str, pattern: str, expected_count: int, failures: list[str]) -> None:
match = re.search(pattern, text)
rel = path.relative_to(REPO_ROOT)
if match is None:
failures.append(f"{rel} is missing live count pattern: {pattern}")
return
actual = int(match.group("count"))
if actual != expected_count:
failures.append(f"{rel} reports {actual} skills; expected {expected_count}")
def check_inventory() -> list[str]:
failures: list[str] = []
main = skill_names(SKILLS_ROOT)
codex = skill_names(CODEX_ROOT)
catalog = catalog_names()
missing_codex = sorted(main - codex)
extra_codex = sorted(codex - main)
missing_catalog = sorted(main - catalog)
extra_catalog = sorted(catalog - main)
require(not missing_codex, f"missing Codex mirrors: {', '.join(missing_codex)}", failures)
require(not extra_codex, f"unexpected Codex-only skills: {', '.join(extra_codex)}", failures)
require(not missing_catalog, f"missing catalog entries: {', '.join(missing_catalog)}", failures)
require(not extra_catalog, f"catalog entries without mainline skills: {', '.join(extra_catalog)}", failures)
catalog_text = read(CATALOG)
readme = read(README)
readme_cn = read(README_CN)
agent_guide = read(AGENT_GUIDE)
aris_intro = read(ARIS_INTRO)
aris_intro_html = read(ARIS_INTRO_HTML)
codex_readme = read(CODEX_README)
codex_readme_cn = read(CODEX_README_CN)
expected_count = len(main)
count_checks = [
(CATALOG, catalog_text, r"\*\*(?P\d+) skills\*\*"),
(README, readme, r"π\s+\*\*(?P\d+) composable skills\*\*"),
(README, readme, r"ARIS ships \*\*(?P\d+)\+ skills\*\*"),
(README_CN, readme_cn, r"π\s+\*\*(?P\d+) δΈͺε―η»ε skill\*\*"),
(README_CN, readme_cn, r"ARIS η°ζ \*\*(?P\d+)\+ δΈͺ skill\*\*"),
(AGENT_GUIDE, agent_guide, r"Full catalog.*?\*\*(?P\d+) skills\*\*"),
(ARIS_INTRO, aris_intro, r"collection of \*\*(?P\d+) composable Claude Code skills\*\*"),
(ARIS_INTRO, aris_intro, r"## The (?P\d+) Skills"),
(ARIS_INTRO, aris_intro, r"δΈη» (?P\d+) δΈͺε―η»εη Claude Code skills"),
(ARIS_INTRO_HTML, aris_intro_html, r"collection of (?P\d+) composable Claude Code skills"),
(ARIS_INTRO_HTML, aris_intro_html, r'id="the-(?P\d+)-skills"'),
(ARIS_INTRO_HTML, aris_intro_html, r"δΈη» (?P\d+) δΈͺε―η»εη Claude Code skills"),
(CODEX_README, codex_readme, r"all `(?P\d+)` mainline skills"),
(CODEX_README_CN, codex_readme_cn, r"`(?P\d+)`[^\n]*skill"),
]
for path, text, pattern in count_checks:
require_count(path, text, pattern, expected_count, failures)
for skill_file in sorted(CODEX_ROOT.glob("*/SKILL.md")):
if skill_file.read_bytes().startswith(BOM):
failures.append(f"{skill_file.relative_to(REPO_ROOT)} starts with UTF-8 BOM before frontmatter")
text = read(skill_file)
for forbidden in FORBIDDEN_CODEX_REVIEWER_STRINGS:
if forbidden in text:
failures.append(f"{skill_file.relative_to(REPO_ROOT)} contains forbidden reviewer string: {forbidden}")
# README parity (EN β CN) β Phase A invariant from #240
en_anchors = readme_anchors(readme)
cn_anchors = readme_anchors(readme_cn)
for required in REQUIRED_README_ANCHORS:
if required not in en_anchors:
failures.append(f"README.md missing required anchor: ")
if required not in cn_anchors:
failures.append(f"README_CN.md missing required anchor: ")
en_h2 = numbered_h2_count(readme)
cn_h2 = numbered_h2_count(readme_cn)
require(en_h2 == 17, f"README.md has {en_h2} numbered H2 sections; expected 17 (Phase A)", failures)
require(cn_h2 == 17, f"README_CN.md has {cn_h2} numbered H2 sections; expected 17 (Phase A)", failures)
# Agent-grant hygiene (WB2): `Agent` in allowed-tools is the Tier-2
# fan-out capability gate. Per shared-references/fan-out-pattern.md it is
# granted ONLY to skills that actually fan out, and such skills MUST cite
# the convention doc in their body. A grant without that citation is a
# vestigial/boilerplate grant and fails the drift check.
for skill_file in sorted(SKILLS_ROOT.glob("*/SKILL.md")):
text = read(skill_file)
if "Agent" not in allowed_tools(text):
continue
if "fan-out-pattern.md" not in frontmatter_split(text):
rel = skill_file.relative_to(REPO_ROOT)
failures.append(
f"{rel} grants `Agent` in allowed-tools but its body does not "
f"cite fan-out-pattern.md β vestigial grant or undocumented "
f"fan-out (see shared-references/fan-out-pattern.md)"
)
return failures
def main() -> int:
failures = check_inventory()
if failures:
print("ARIS skill inventory drift detected:", file=sys.stderr)
for failure in failures:
print(f"- {failure}", file=sys.stderr)
return 1
print("ARIS skill inventory is consistent.")
return 0
if __name__ == "__main__":
raise SystemExit(main())