""" Safe archive auditor and extractor for RAR/ZIP files. - Flags dangerous paths (.. traversal, absolute paths, UNC, weird prefixes) - Optionally extracts only safe entries, preserving directory structure Requires: pip install rarfile (and UnRAR/Unarchiver installed on your system for rarfile to read .rar) """ import os import sys import zipfile from pathlib import Path try: import rarfile # pip install rarfile HAS_RARFILE = True except Exception: HAS_RARFILE = False DANGEROUS_PREFIXES = ( "/", "\\", # absolute on *nix or root-like on Windows "C:\\", "D:\\", # absolute Windows drives (extend as needed) ) UNC_PREFIX = ("\\\\",) # Windows UNC paths def is_dangerous_path(member_name: str) -> bool: # Normalize separators and collapse things like a/b/../c norm = Path(member_name.replace("\\", "/")).as_posix() # Reject absolute/UNC if norm.startswith(DANGEROUS_PREFIXES) or member_name.startswith(UNC_PREFIX): return True # Reject traversal at any position parts = [p for p in norm.split("/") if p not in ("", ".")] if any(p == ".." for p in parts): return True # Extra hardening for Windows reserved names and device paths lowered = norm.lower() if lowered.startswith(("con", "prn", "aux", "nul")): # crude, but catches suspicious names like "con" or "aux.txt" at root return True if lowered.startswith(("\\\\?\\", "\\\\.\\", "?:/")): return True # Disallow weird control chars if any(ord(c) < 32 for c in member_name): return True return False def safe_join(base_dir: Path, member_name: str) -> Path: # Build a final destination path and ensure it stays under base_dir target = base_dir / member_name try: target.resolve().relative_to(base_dir.resolve()) except Exception: raise ValueError(f"path escapes extraction dir: {member_name}") return target def audit_zip(path: Path): issues = [] with zipfile.ZipFile(path) as zf: for info in zf.infolist(): name = info.filename if is_dangerous_path(name): issues.append(("danger", name)) else: issues.append(("ok", name)) return issues def extract_zip_safe(path: Path, dest: Path): dest.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(path) as zf: for info in zf.infolist(): name = info.filename if is_dangerous_path(name): print(f"[SKIP] dangerous path in ZIP: {name}") continue out_path = safe_join(dest, name) if name.endswith("/"): out_path.mkdir(parents=True, exist_ok=True) else: out_path.parent.mkdir(parents=True, exist_ok=True) with zf.open(info, "r") as src, open(out_path, "wb") as dst: dst.write(src.read()) def audit_rar(path: Path): if not HAS_RARFILE: raise RuntimeError("rarfile module not available. pip install rarfile") issues = [] with rarfile.RarFile(path) as rf: for info in rf.infolist(): name = info.filename if is_dangerous_path(name): issues.append(("danger", name)) else: issues.append(("ok", name)) return issues def extract_rar_safe(path: Path, dest: Path): if not HAS_RARFILE: raise RuntimeError("rarfile module not available. pip install rarfile") dest.mkdir(parents=True, exist_ok=True) with rarfile.RarFile(path) as rf: for info in rf.infolist(): name = info.filename if is_dangerous_path(name): print(f"[SKIP] dangerous path in RAR: {name}") continue out_path = safe_join(dest, name) if info.isdir(): out_path.mkdir(parents=True, exist_ok=True) else: out_path.parent.mkdir(parents=True, exist_ok=True) with rf.open(info, "r") as src, open(out_path, "wb") as dst: dst.write(src.read()) def main(): if len(sys.argv) < 3: print("Usage: python safe_extract.py [dest]") sys.exit(1) cmd = sys.argv[1].lower() archive = Path(sys.argv[2]) if archive.suffix.lower() == ".zip": if cmd == "audit": for status, name in audit_zip(archive): tag = "!!" if status == "danger" else "OK" print(f"[{tag}] {name}") elif cmd == "extract": dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("") # default dest extract_zip_safe(archive, dest) print(f"Extracted safe ZIP entries to: {dest}") else: print("Unknown command") elif archive.suffix.lower() == ".rar": if cmd == "audit": for status, name in audit_rar(archive): tag = "!!" if status == "danger" else "OK" print(f"[{tag}] {name}") elif cmd == "extract": dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("") extract_rar_safe(archive, dest) print(f"Extracted safe RAR entries to: {dest}") else: print("Unknown command") else: print("Unsupported archive type. Use .zip or .rar") if __name__ == "__main__": main()