"""Filesystem scanner."""

from __future__ import annotations

import fnmatch
import os
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Iterable

from pqc_lint.findings import Finding, ScanReport
from pqc_lint.patterns import ALL_MATCHERS, MATCHERS_BY_LANGUAGE, PatternMatcher
from pqc_lint.rules import RULE_BY_ID

DEFAULT_EXCLUDES = (
    "**/.git/**",
    "**/node_modules/**",
    "**/__pycache__/**",
    "**/.venv/**",
    "**/venv/**",
    "**/dist/**",
    "**/build/**",
    "**/.pytest_cache/**",
    "**/.ruff_cache/**",
    "**/*.min.js",
)

# Hard size cap so we don't try to scan 500 MB binaries
MAX_FILE_SIZE_BYTES = 2 * 1024 * 1024  # 2 MB


def _matches_any(path: str, globs: Iterable[str]) -> bool:
    normalized = path.replace(os.sep, "/")
    return any(fnmatch.fnmatch(normalized, g) for g in globs)


@dataclass
class Scanner:
    """Walks a directory and runs pattern matchers against each file."""
    excludes: tuple[str, ...] = DEFAULT_EXCLUDES
    languages: tuple[str, ...] = ()          # empty = all
    matchers: list[PatternMatcher] = field(default_factory=list)
    max_file_size: int = MAX_FILE_SIZE_BYTES

    def __post_init__(self) -> None:
        if not self.matchers:
            if self.languages:
                self.matchers = [
                    MATCHERS_BY_LANGUAGE[lang]
                    for lang in self.languages
                    if lang in MATCHERS_BY_LANGUAGE
                ]
            else:
                self.matchers = list(ALL_MATCHERS)

    def _pick_matcher(self, path: str) -> PatternMatcher | None:
        for m in self.matchers:
            if m.matches_file(path):
                return m
        return None

    def scan_file(self, file_path: str, root: str | None = None) -> list[Finding]:
        matcher = self._pick_matcher(file_path)
        if not matcher:
            return []
        try:
            if os.path.getsize(file_path) > self.max_file_size:
                return []
            with open(file_path, "r", encoding="utf-8", errors="replace") as f:
                content = f.read()
        except (OSError, UnicodeDecodeError):
            return []

        rel = os.path.relpath(file_path, root) if root else file_path
        rel = rel.replace(os.sep, "/")
        return list(matcher.scan(rel, content, RULE_BY_ID))

    def scan_path(self, path: str) -> ScanReport:
        started = time.time()
        report = ScanReport(
            scan_root=path,
            started_at=datetime.now(timezone.utc).isoformat(),
        )

        if os.path.isfile(path):
            root = os.path.dirname(path) or "."
            findings = self.scan_file(path, root=root)
            report.findings.extend(findings)
            report.files_scanned += 1
            report.duration_ms = int((time.time() - started) * 1000)
            return report

        for dirpath, dirnames, filenames in os.walk(path):
            # prune directories matching excludes
            kept_dirs = []
            for d in dirnames:
                candidate = os.path.join(dirpath, d)
                if not _matches_any(candidate, self.excludes):
                    kept_dirs.append(d)
            dirnames[:] = kept_dirs

            for fn in filenames:
                fp = os.path.join(dirpath, fn)
                if _matches_any(fp, self.excludes):
                    report.files_skipped += 1
                    continue
                matcher = self._pick_matcher(fp)
                if not matcher:
                    report.files_skipped += 1
                    continue
                findings = self.scan_file(fp, root=path)
                report.findings.extend(findings)
                report.files_scanned += 1

        report.duration_ms = int((time.time() - started) * 1000)
        return report