#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
#   "pymupdf",
#   "rich",
# ]
# ///


# 2015: https://educate.iowa.gov/media/8214/download?inline=
# 2025: https://educate.iowa.gov/media/10837/download?inline
import pathlib
import pymupdf
import re
import urllib.request
import subprocess
import rich.progress

import difflib


old_path = pathlib.Path("2015.pdf")
new_path = pathlib.Path("2025.pdf")

if not old_path.exists():
    urllib.request.urlretrieve(
        "https://educate.iowa.gov/media/8214/download?inline=", old_path
    )

if not new_path.exists():
    urllib.request.urlretrieve(
        "https://educate.iowa.gov/media/10837/download?inline", new_path
    )


def main():
    old = pymupdf.open(old_path)
    new = pymupdf.open(new_path)

    xpr_old = re.compile(r"HS-(PS|ESS|LS|ETS|A&P|AST)-?(\d+)-(\d+)\.")
    xpr_new = re.compile(r"HS-(PS|ESS|LS|ETS|A&P|AST)-?(\d+)-(\d+)\.?")

    clean = re.compile(r"\s\s+")
    quotes = re.compile("’")

    pages = [page for page in old if "HS-" in page.get_text()]
    old_records = []

    for page in rich.progress.track(pages, description="Parsing 2015"):
        page_text = page.get_text()
        m = xpr_old.search(page_text)
        groups = m.groups()
        group = f"HS-{groups[0]}" + "-".join(groups[1:])

        text = (
            page_text[m.end() :]
            .strip()
            .split("The performance expectation")[0]
            .replace("\n", " ")
            .strip()
        )
        text = text.split("[")[0].strip()
        text = text.rstrip("*")
        text = clean.sub(" ", text)
        text = quotes.sub("'", text)

        old_records.append((group, text))

    new_pages = [page for page in new if "HS-" in page.get_text()]

    new_records = []
    for page in rich.progress.track(new_pages, description="Parsing 2025"):
        tabs = page.find_tables()
        tables = [x.extract() for x in tabs.tables]

        for table in tables:
            for r in table:
                if r[0] and xpr_new.match(r[0]):
                    group, text, *_ = r

                    if group == "HS-PS-2-4":
                        # consistency
                        group = "HS-PS2-4"

                    if group == "HS-LS4-3.":
                        group = "HS-LS4-3"  # remove the .
                        text = r[2]

                    text = text.replace("\n", " ").strip()
                    text = quotes.sub("'", text)
                    text = text.replace(
                        "cost- benefit", "cost-benefit"
                    )  # bad line break
                new_records.append((group, text))

            # else:
            #     raise ValueError

    a = dict(old_records)
    b = dict(new_records)

    matched = set(a) & set(b)
    removed = set(a) - set(b)
    added = set(b) - set(a)

    def colorize_diff(diff):
        """Apply color formatting to diff output."""
        for line in diff:
            if line.startswith("+"):
                yield f"\033[32m{line}\033[0m"  # Green for additions
            elif line.startswith("-"):
                yield f"\033[31m{line}\033[0m"  # Red for deletions
            elif line.startswith("?"):
                yield f"\033[33m{line}\033[0m"  # Yellow for diff indicators
            else:
                yield line  # No color for unchanged lines

    def diff_strings(a: str, b: str):
        """Pretty-print the difference between two strings with colors."""
        diff = difflib.ndiff(a.splitlines(), b.splitlines())
        colored_diff = "\n".join(colorize_diff(diff))
        print(colored_diff)

    for k in sorted(matched):
        old_text = a[k]
        new_text = b[k]

        if old_text != new_text:
            print("-" * 80)
            print(k)
            diff_strings(old_text, new_text)

    # write out the old ones, for git
    p = pathlib.Path("standards/hs-science.md")
    p.parent.mkdir(parents=True, exist_ok=True)

    sections = [f"## {k}\n\n{a[k]}" for k in sorted(matched)]
    p.write_text("\n\n".join(sections))

    subprocess.check_call(["git", "add", "standards/hs-science.md"])
    subprocess.check_call(["git", "commit", "-m", "Added 2015 standards"])

    sections = [f"## {k}\n\n{b[k]}" for k in sorted(matched)]
    p.write_text("\n\n".join(sections))

    subprocess.check_call(["git", "add", "standards/hs-science.md"])
    subprocess.check_call(["git", "commit", "-m", "Added 2025 changes"])

    print("Removed standards:")
    for k in removed:
        print("-" * 80)
        v = a[k]
        print(f"{k}\n{v}", end="\n\n")

    print("Added standards:")
    for k in sorted(added):
        print("-" * 80)
        v = b[k]
        print(f"{k}\n{v}", end="\n\n")


if __name__ == "__main__":
    main()