#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations

import argparse
import math
import random
import sys
import time
from typing import List, Tuple, Set, Dict, Optional

# Try to enable BPSW/ECPP via SymPy if available
try:
    from sympy import isprime as _sympy_isprime  # BPSW + extras under the hood
    _HAVE_SYMPY = True
except Exception:
    _HAVE_SYMPY = False

# ---------------- Utilities & small primes ----------------

def sieve_upto(n: int) -> List[int]:
    if n < 2:
        return []
    sieve = bytearray(b"\x01") * (n + 1)
    sieve[0:2] = b"\x00\x00"
    r = int(n**0.5)
    for p in range(2, r + 1):
        if sieve[p]:
            start = p * p
            step = p
            sieve[start:n + 1:step] = b"\x00" * (((n - start) // step) + 1)
    return [i for i, is_p in enumerate(sieve) if is_p]

def first_k_odd_primes(k: int) -> List[int]:
    if k <= 0:
        return []
    # crude overestimate to ensure enough primes
    if k < 6:
        limit = 100
    else:
        kk = float(k)
        limit = int(max(100, kk * (math.log(kk) + math.log(math.log(kk))) * 8))
    while True:
        primes = sieve_upto(limit)
        odd_primes = [p for p in primes if p % 2 == 1]
        if len(odd_primes) >= k:
            return odd_primes[:k]
        limit *= 2

# ---------------- Miller–Rabin (probable primality) ----------------

def _dec(n: int) -> Tuple[int, int]:
    d, s = n - 1, 0
    while d % 2 == 0:
        d //= 2
        s += 1
    return d, s

def _mr_witness(a: int, d: int, n: int, s: int) -> bool:
    x = pow(a, d, n)
    if x in (1, n - 1):
        return False
    for _ in range(s - 1):
        x = (x * x) % n
        if x == n - 1:
            return False
    return True

def is_probable_prime_mr(n: int, rounds: int = 6, rng: Optional[random.Random] = None) -> bool:
    """Your original MR (random bases). Probabilistic."""
    if n < 2:
        return False
    # quick trial division by small primes up to 97
    for p in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97):
        if n == p:
            return True
        if n % p == 0:
            return False
    d, s = _dec(n)
    if rng is None:
        rng = random.Random(2025)
    # Note: for huge n, using fixed good bases is often faster; we keep random here by design.
    for _ in range(rounds):
        a = rng.randrange(2, n - 1)
        if _mr_witness(a, d, n, s):
            return False
    return True

def is_probable_prime_mr_fixed(n: int) -> bool:
    """
    Miller–Rabin with a fixed small set of bases — a strong screen when SymPy isn't available.
    For large n this is still probabilistic, but very reliable in practice.
    """
    if n < 2:
        return False
    # Small primes
    small_primes = (2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37)
    for p in small_primes:
        if n == p:
            return True
        if n % p == 0:
            return False
    d, s = _dec(n)
    for a in (2, 3, 5, 7, 11, 13, 17, 19):  # a modest fixed set
        if a % n == 0:
            return True
        if _mr_witness(a, d, n, s):
            return False
    return True

# ---------------- Baillie–PSW (via SymPy when available) ----------------

def is_probable_prime_bpsw(n: int) -> bool:
    """
    Baillie–PSW path:
      - If SymPy is present, delegate to sympy.isprime (BPSW and, when needed, ECPP),
        which is extremely reliable and often *proves* primality.
      - If SymPy is not present, fall back to a strong MR screen with fixed bases.
    """
    if n < 2:
        return False
    if _HAVE_SYMPY:
        return bool(_sympy_isprime(n))
    # Fallback (no SymPy): MR with fixed bases as a high-quality probabilistic screen.
    return is_probable_prime_mr_fixed(n)

# ---------------- Sampling ----------------

def random_even_with_digits(rng: random.Random, d: int) -> int:
    if d < 1:
        raise ValueError("digits must be >= 1")
    if d == 1:
        return rng.choice([2, 4, 6, 8])
    first = rng.randint(1, 9)
    middle = ''.join(str(rng.randint(0, 9)) for _ in range(d - 2))
    last = rng.choice("02468")
    return int(str(first) + middle + last)

# ---------------- Goldbach engines ----------------

def goldbach_subtractor_scan(
    n: int,
    subtractors: List[int],
    primality: str,
    mr_rounds: int,
    rng: random.Random,
    cap_per_n: Optional[int] = None,
) -> List[Tuple[int, int]]:
    """
    Return up to cap_per_n unordered decompositions found by scanning fixed subtractor primes.
    Keeps scanning (no early stop) and deduplicates unordered pairs.
    """
    if n % 2:
        n -= 1
    seen: Set[Tuple[int, int]] = set()
    recorded: List[Tuple[int, int]] = []

    # choose primality test
    if primality == "bpsw":
        def isprime(q: int) -> bool:
            return is_probable_prime_bpsw(q)
    elif primality == "mr":
        def isprime(q: int) -> bool:
            return is_probable_prime_mr(q, rounds=mr_rounds, rng=rng)
    else:
        raise ValueError(f"unknown primality mode: {primality}")

    for p in subtractors:
        q = n - p
        if q < 2:
            continue
        # parity: q is odd unless q == 2; skip even q > 2 quickly
        if q != 2 and (q & 1) == 0:
            continue
        if isprime(q):
            a, b = (p, q) if p <= q else (q, p)
            if (a, b) not in seen:
                seen.add((a, b))
                recorded.append((a, b))
                if cap_per_n is not None and len(recorded) >= cap_per_n:
                    break
    return recorded

# ---------------- Heuristics (Hardy–Littlewood style, baseline) ----------------

def hl_expected_reps(n: int) -> float:
    """
    Baseline expected number of Goldbach representations ~ n / (ln n)^2.
    (Singular series omitted for simplicity; sufficient for trend-checking.)
    """
    if n < 6:
        return 0.0
    ln = math.log(n)
    return float(n) / (ln * ln)

def heuristic_success_prob(n: int) -> float:
    """
    Tiny-budget success probability proxy:
    p_hit ≈ 1 - exp(-(1/ln(n/2))^2 * sqrt(n)/ln n).
    This increases with n, reflecting that expected reps grow ~ n/(ln n)^2.
    """
    if n < 6:
        return 0.0
    ln = math.log(n)
    ln_half = math.log(n / 2.0)
    p = 1.0 / (ln_half * ln_half)  # local prime density near n/2 squared
    k = math.sqrt(n / ln)          # crude effective trials in a narrow band
    x = -p * k
    if x < -700:                   # clamp to avoid underflow in exp
        return 1.0
    return max(0.0, min(1.0, 1.0 - math.exp(x)))

# ---------------- Cc-like tracker ----------------

class CC:
    def __init__(self, value: float = 0.0):
        self.value = float(value)
    def bump(self, eps: float) -> None:
        self.value = min(1.0, self.value + eps)

# ---------------- RH test via Robin's criterion (equivalent to RH) ----------------

# Euler–Mascheroni (double precision is fine here)
_EULER_GAMMA = 0.5772156649015328606

def _sigma_from_factorization(factors: Dict[int, int]) -> int:
    """
    σ(n) from prime-power factorization: n = ∏ p^a
    σ(n) = ∏ (p^(a+1)-1)/(p-1)
    """
    res = 1
    for p, a in factors.items():
        # integer-safe formula
        res *= (pow(p, a + 1) - 1) // (p - 1)
    return res

def _robin_rhs(n: int) -> float:
    # RHS = e^γ * n * log log n  (only defined for n >= 3 for log log)
    if n < 3:
        return float('inf')
    return math.exp(_EULER_GAMMA) * n * math.log(math.log(n))

def _primorial_until_digits(target_digits: int) -> List[Tuple[int, Dict[int, int]]]:
    """
    Build a sequence of (n, factors) where n is a primorial, increasing until
    the number of decimal digits of n >= target_digits.
    Returns list of all intermediate primorials (useful for a sweep).
    """
    # We only need primes up to ~ ln(10^D) ~ D * ln 10; be generous and grow if needed.
    limit = max(200, int(target_digits * 6))
    while True:
        ps = sieve_upto(limit)
        n = 1
        factors: Dict[int, int] = {}
        out: List[Tuple[int, Dict[int, int]]] = []
        for p in ps:
            n *= p
            factors = dict(factors)
            factors[p] = factors.get(p, 0) + 1
            out.append((n, factors))
            if len(str(n)) >= target_digits:
                return out
        # Not enough? increase the sieve window
        limit *= 2

def _greedy_smooth_enrichments(base_n: int, base_factors: Dict[int,int],
                               max_digits: int,
                               max_extra_exp: int = 4,
                               primes_to_enrich: int = 6) -> List[Tuple[int, Dict[int,int]]]:
    """
    From a base primorial, greedily raise the first few small primes to increase σ(n)/n
    without exceeding a decimal-digit cap. Returns a small set of enriched candidates.
    Keeps exponents nonincreasing with p (a standard heuristic for superabundant forms).
    """
    frontier = [(base_n, dict(base_factors))]
    results = [(base_n, dict(base_factors))]

    small_primes = sorted(base_factors.keys())
    small_primes = [p for p in small_primes if p <= 17][:primes_to_enrich]

    for p in small_primes:
        new_frontier = []
        for (n, fac) in frontier:
            a0 = fac.get(p, 0)
            for da in range(1, max_extra_exp + 1):
                a = a0 + da
                n2 = n * (p ** da)
                if len(str(n2)) > max_digits:
                    break
                fac2 = dict(fac)
                fac2[p] = a
                results.append((n2, fac2))
                new_frontier.append((n2, fac2))
        ranked = sorted(
            new_frontier,
            key=lambda t: _sigma_from_factorization(t[1]) / t[0],
            reverse=True
        )
        frontier = ranked[:8]
    return results

def run_rh_robin_mode(digits: int,
                      max_extra_exp: int = 4,
                      enrich: bool = True) -> None:
    """
    Check Robin's inequality σ(n) < e^γ n log log n for a stress-test list of n
    near 10^digits: primorials (up to the target) and optionally 'smooth enrichments'.
    Prints a summary and worst slack.
    """
    ladder = _primorial_until_digits(digits)

    checked = 0
    violations = 0
    worst_slack = float('inf')
    worst_rec: Optional[Tuple[int, float, float, float, Dict[int,int]]] = None  # (n, sigma, rhs, slack, factors)

    for (n, f) in ladder:
        sigma_n = _sigma_from_factorization(f)
        rhs = _robin_rhs(n)
        slack = (rhs - sigma_n) / rhs  # relative margin (>0 = safe)
        checked += 1
        if slack < worst_slack:
            worst_slack = slack
            worst_rec = (n, float(sigma_n), float(rhs), float(slack), f)
        if sigma_n >= rhs and n >= 5041:
            violations += 1
            print(f"[VIOLATION] n={n}  σ(n)={sigma_n}  rhs={rhs}  (digits={len(str(n))})")

        if enrich:
            for (n2, f2) in _greedy_smooth_enrichments(n, f, max_digits=digits, max_extra_exp=max_extra_exp):
                sigma_n2 = _sigma_from_factorization(f2)
                rhs2 = _robin_rhs(n2)
                slack2 = (rhs2 - sigma_n2) / rhs2
                checked += 1
                if slack2 < worst_slack:
                    worst_slack = slack2
                    worst_rec = (n2, float(sigma_n2), float(rhs2), float(slack2), f2)
                if sigma_n2 >= rhs2 and n2 >= 5041:
                    violations += 1
                    print(f"[VIOLATION] n={n2}  σ(n)={sigma_n2}  rhs={rhs2}  (digits={len(str(n2))})")

    print("=== RH stress test via Robin's criterion ===")
    print(f"target scale: ~10^{digits}  (primorial ladder + smooth enrichments)")
    print(f"cases checked: {checked}  violations: {violations}")
    if worst_rec is not None:
        n0, sig0, rhs0, sl0, fac0 = worst_rec
        print(f"worst relative slack (closest to boundary): {sl0:.6e}")
        print(f"  n (digits={len(str(n0))}): {n0}")
        print(f"  σ(n) = {sig0}")
        print(f"  rhs  = {rhs0}  (= e^γ n log log n)")
        print(f"  σ(n)/[e^γ n log log n] = {sig0 / rhs0:.9f}")
        fac_items = sorted(fac0.items())
        print("  factors:", " * ".join([f"{p}^{a}" for (p,a) in fac_items]))

# ---------------- CLI & main ----------------

def parse_args() -> argparse.Namespace:
    ap = argparse.ArgumentParser(
        description=(
            "Goldbach PoC with heuristic + verification modes, plus RH stress test via Robin's criterion.\n"
            "Modes:\n"
            "  verify:    subtractor-scan verification only\n"
            "  heuristic: heuristic expectations only (works at huge n)\n"
            "  hybrid:    heuristics + tiny scan; track agreement (Cc)\n"
            "  rh-robin:  RH-equivalent inequality (Robin) on primorials + enrichments\n"
        )
    )
    ap.add_argument("--mode", choices=["verify", "heuristic", "hybrid", "rh-robin"], default="hybrid")
    ap.add_argument("--trials", type=int, default=500, help="number of random tests to run (default: 500)")
    ap.add_argument("--digits", type=int, default=30, help="number of digits in scale (even n for Goldbach; ~10^digits for RH test)")
    ap.add_argument("--subtractors", type=int, default=40, help="how many odd primes to subtract (10..200; default: 40)")
    ap.add_argument("--seed", type=int, default=2025, help="RNG seed (default: 2025)")
    ap.add_argument("--mr-rounds", type=int, default=6, help="Miller–Rabin rounds per candidate (default: 6)")
    ap.add_argument("--cap-per-n", type=int, default=1, help="max decomps to record per n in scan (default: 1)")
    ap.add_argument("--eps", type=float, default=0.002, help="Cc bump per agreement (default: 0.002)")
    ap.add_argument("--print-examples", type=int, default=0, help="show up to K example n with results")

    # NEW: choose primality test
    ap.add_argument(
        "--primality",
        choices=["bpsw", "mr"],
        default="bpsw",
        help="primality test for confirming q: 'bpsw' (SymPy isprime if available; fallback MR) or 'mr' (your MR). Default: bpsw"
    )

    # RH-Robin mode options
    ap.add_argument("--robin-extra-exp", type=int, default=4, help="max extra exponent per small prime in RH Robin enrichments (default: 4)")
    ap.add_argument("--robin-no-enrich", action="store_true", help="disable smooth enrichments; primorials only")

    return ap.parse_args()

def main() -> None:
    args = parse_args()

    # Basic validation for Goldbach modes
    if args.mode in ("verify", "heuristic", "hybrid"):
        if not (10 <= args.subtractors <= 200):
            print(f"error: --subtractors must be between 10 and 200 (got {args.subtractors})", file=sys.stderr)
            sys.exit(2)
        if args.digits < 1:
            print(f"error: --digits must be >= 1 (got {args.digits})", file=sys.stderr)
            sys.exit(2)
        if args.primality == "bpsw" and not _HAVE_SYMPY:
            print("[note] SymPy not found; using a strong Miller–Rabin fallback for BPSW mode.", file=sys.stderr)

    # RH Robin mode runs independently and returns
    if args.mode == "rh-robin":
        if args.digits < 3:
            print(f"error: --digits should be >= 3 for RH Robin (log log n requires n>=3)", file=sys.stderr)
            sys.exit(2)
        run_rh_robin_mode(
            digits=args.digits,
            max_extra_exp=max(1, int(args.robin_extra_exp)),
            enrich=(not args.robin_no_enrich),
        )
        return

    rng = random.Random(args.seed)
    subtractor_primes = first_k_odd_primes(args.subtractors)

    t0 = time.time()

    # Verification stats
    total_hits = 0                # n with ≥1 decomp (verification/hybrid only)
    total_decomps = 0
    per_n_counts: List[int] = []
    example_counter = 0
    example_limit = max(0, int(args.print_examples))

    # Heuristic stats
    h_sum_expected = 0.0
    h_sum_success_prob = 0.0
    h_high_conf = 0

    # Agreement / Cc
    cc = CC(0.0)
    agreements = 0
    trials = args.trials

    examples: List[Tuple[int, Dict[str, object]]] = []

    for _ in range(trials):
        n = random_even_with_digits(rng, args.digits)

        # Heuristics
        H_E = hl_expected_reps(n)
        H_P = heuristic_success_prob(n)

        if args.mode in ("heuristic", "hybrid"):
            h_sum_expected += H_E
            h_sum_success_prob += H_P
            if H_P >= 0.8:
                h_high_conf += 1

        rec: Dict[str, object] = {"n": n, "H_E": H_E, "H_P": H_P}

        # Verification / hybrid
        if args.mode in ("verify", "hybrid"):
            decomps = goldbach_subtractor_scan(
                n,
                subtractor_primes,
                args.primality,
                args.mr_rounds,
                rng,
                cap_per_n=args.cap_per_n
            )
            c = len(decomps)
            per_n_counts.append(c)
            total_decomps += c
            if c > 0:
                total_hits += 1
                rec["decomps"] = decomps
                rec["hit"] = True
            else:
                rec["decomps"] = []
                rec["hit"] = False

            if args.mode == "hybrid":
                # An "agreement": heuristic predicts likely success (H_P >= 0.6) and we hit, or predicts unlikely and we miss
                pred_hit = (H_P >= 0.6)
                if (c > 0) == pred_hit:
                    agreements += 1
                    cc.bump(args.eps)

        if example_counter < example_limit:
            examples.append((n, rec))
            example_counter += 1

    elapsed = time.time() - t0
    per_n_time = elapsed / trials if trials else float("nan")

    # Summaries
    print("=== Goldbach Proof of Concept (heuristic-aware) ===")
    print(f"mode: {args.mode}   trials: {trials}   digits(n): {args.digits}")
    print(f"subtractors: {args.subtractors}   mr_rounds: {args.mr_rounds}   cap_per_n: {args.cap_per_n}")
    print(f"seed: {args.seed}")
    print(f"primality: {args.primality}{' (SymPy isprime)' if args.primality=='bpsw' and _HAVE_SYMPY else ''}")
    print(f"elapsed: {elapsed:.6f} s   per_n: {per_n_time*1000:.3f} ms")

    if args.mode in ("heuristic", "hybrid"):
        avg_E = h_sum_expected / trials if trials else 0.0
        avg_P = h_sum_success_prob / trials if trials else 0.0
        print("\n--- Heuristic summary ---")
        print(f"avg expected reps E[R(n)] ~ n/(ln n)^2 : {avg_E:.3f}")
        print(f"avg success-prob proxy (tiny-budget)    : {avg_P:.3f}")
        print(f"high-confidence cases (H_P >= 0.8)      : {h_high_conf}/{trials}")

    if args.mode in ("verify", "hybrid"):
        hit_rate = total_hits / trials if trials else float("nan")
        max_decomps = max(per_n_counts) if per_n_counts else 0
        min_decomps = min(per_n_counts) if per_n_counts else 0
        mean_decomps_per_hit = (total_decomps / total_hits) if total_hits else 0.0
        print("\n--- Verification (subtractor scan) ---")
        print(f"n with ≥1 decomp: {total_hits}   hit_rate: {hit_rate:.3f}")
        print(f"total decomps recorded: {total_decomps}")
        print(f"per-hit mean decomps: {mean_decomps_per_hit:.3f}   per-n min/max (over all n): {min_decomps}/{max_decomps}")

    if args.mode == "hybrid":
        agree_rate = agreements / trials if trials else 0.0
        print("\n--- Agreement & Cc-like metric ---")
        print(f"agreements (heuristic vs verification): {agreements}/{trials}   agree_rate: {agree_rate:.3f}")
        print(f"final Cc (bumped by eps on agreement): {cc.value:.6f}   eps: {args.eps}")

    # Examples
    if examples:
        print("\n--- examples (first few) ---")
        for n, rec in examples:
            if args.mode == "heuristic":
                print(f"{n}  H_E={rec['H_E']:.3f}  H_P={rec['H_P']:.3f}")
            elif args.mode == "verify":
                ds = rec["decomps"]
                note = "HIT" if rec.get("hit") else "miss"
                shown = ", ".join(f"({p},{q})" for (p, q) in ds) if ds else ""
                print(f"{n}  {note}  decomps: {shown}")
            else:
                ds = rec["decomps"]
                note = "HIT" if rec.get("hit") else "miss"
                tail = f" with {', '.join(f'({p},{q})' for (p,q) in ds)}" if ds else ""
                print(f"{n}  H_E={rec['H_E']:.3f}  H_P={rec['H_P']:.3f}  -> {note}{tail}")

if __name__ == "__main__":
    main()