#!/usr/bin/env python3 """ Simple PDF reader to test pypdf behavior for vulnerability CVE-2026-24688. Usage: python simple_read_pdf.py This will attempt to: 1. Open the PDF 2. Access basic metadata 3. Access outline/bookmarks (THIS is where circular ref vulnerability triggers) 4. Extract text from first page ⚠️ WARNING: If PDF has circular outline references, this will hang! """ import sys import time from pathlib import Path # Add pypdf to path sys.path.insert(0, str(Path(__file__).parent)) from pypdf import PdfReader def read_pdf(pdf_path: str): """ Read PDF and display information. Args: pdf_path: Path to PDF file """ pdf_path = Path(pdf_path) if not pdf_path.exists(): print(f"❌ Error: File not found: {pdf_path}") return 1 print("=" * 70) print(f"📄 Reading PDF: {pdf_path.name}") print("=" * 70) print() # Step 1: Open PDF print("Step 1: Opening PDF...") start = time.time() try: reader = PdfReader(str(pdf_path)) elapsed = time.time() - start print(f"✅ Opened successfully ({elapsed:.3f}s)") except Exception as e: print(f"❌ Failed to open: {e}") return 1 print() # Step 2: Basic metadata print("Step 2: Reading metadata...") try: metadata = reader.metadata if metadata: print(f" Title: {metadata.get('/Title', 'N/A')}") print(f" Author: {metadata.get('/Author', 'N/A')}") print(f" Subject: {metadata.get('/Subject', 'N/A')}") print(f" Creator: {metadata.get('/Creator', 'N/A')}") else: print(" No metadata found") print(f" Pages: {len(reader.pages)}") print(f" Encrypted: {reader.is_encrypted}") except Exception as e: print(f"⚠️ Warning: {e}") print() # Step 3: Outline (THIS IS WHERE VULNERABILITY TRIGGERS!) print("Step 3: Reading outline/bookmarks...") print("⚠️ THIS IS WHERE CIRCULAR REFERENCE VULNERABILITY TRIGGERS!") print("⏳ If this hangs, you'll need to Ctrl+C to kill it...") print() start = time.time() try: outline = reader.outline elapsed = time.time() - start if outline: print(f"✅ Outline read successfully ({elapsed:.3f}s)") print(f" Bookmark count: {len(outline)}") # Show first few bookmarks print("\n First few bookmarks:") for i, item in enumerate(outline[:5]): if isinstance(item, list): print(f" [{i+1}] (nested outline)") else: title = item.get('/Title', 'Untitled') print(f" [{i+1}] {title}") if len(outline) > 5: print(f" ... and {len(outline) - 5} more") else: elapsed = time.time() - start print(f"✅ No outline/bookmarks ({elapsed:.3f}s)") except KeyboardInterrupt: print() print() print("=" * 70) print("❌ KILLED BY USER (Ctrl+C)") print("=" * 70) print() print("🔥 This PDF has CIRCULAR OUTLINE REFERENCES!") print(" The code was stuck in an infinite loop.") print() print("This demonstrates the vulnerability:") print(" Location: pypdf/_doc_common.py:858-873") print(" Issue: No cycle detection in outline traversal") print(" Impact: Denial of Service (infinite loop)") print() return 1 except RecursionError as e: elapsed = time.time() - start print(f"❌ RecursionError after {elapsed:.3f}s: {e}") print() print("🔥 This PDF has NESTED CIRCULAR REFERENCES!") print(" The code exceeded Python's recursion limit.") print() return 1 except Exception as e: elapsed = time.time() - start print(f"⚠️ Error after {elapsed:.3f}s: {e}") return 1 print() # Step 4: Extract text from first page print("Step 4: Extracting text from first page...") try: if len(reader.pages) > 0: first_page = reader.pages[0] text = first_page.extract_text() if text: print(f"✅ Text extracted ({len(text)} characters)") print("\n First 200 characters:") print(" " + "-" * 66) preview = text[:200].replace('\n', '\n ') print(f" {preview}") if len(text) > 200: print(" ...") print(" " + "-" * 66) else: print(" No text found on first page") else: print(" No pages in PDF") except Exception as e: print(f"⚠️ Warning: {e}") print() print("=" * 70) print("✅ PDF read successfully - No vulnerabilities detected") print("=" * 70) return 0 def main(): if len(sys.argv) < 2: print("Simple PDF Reader (pypdf)") print() print("Usage:") print(" python simple_read_pdf.py ") print() print("Examples:") print(" python simple_read_pdf.py document.pdf") print(" python simple_read_pdf.py /path/to/file.pdf") print() print("To test the circular reference vulnerability:") print(" python simple_read_pdf.py malicious_circular_outline.pdf") print() print("⚠️ WARNING: Malicious PDFs will hang! Use Ctrl+C to kill.") print() return 1 pdf_path = sys.argv[1] return read_pdf(pdf_path) if __name__ == "__main__": try: sys.exit(main()) except KeyboardInterrupt: print() print() print("=" * 70) print("⚠️ INTERRUPTED BY USER") print("=" * 70) sys.exit(1)