#!/usr/bin/env python3 # Reusable MD5 collision for Wasm files # via pre-computed UniColl prefixes # Ange Albertini 2023 import argparse import hashlib import sys FILETYPE = 'WebAssembly' MAGIC = b"\0asm\1\0\0\0" EXT = 'wasm' parser = argparse.ArgumentParser(description="Generate %s MD5 collisions." % (FILETYPE)) parser.add_argument('file1', help="first 'top' input file.") parser.add_argument('file2', help="second 'bottom' input file.") args = parser.parse_args() filename_a = args.file1 filename_b = args.file2 def toLEB128(n): buf = [] while True: out = n & 0x7f n >>= 7 if n: buf += [out | 0x80] else: buf += [out] break return bytes(buf) assert toLEB128(256) == b'\x80\x02' assert toLEB128(197) == b'\xC5\x01' assert toLEB128(129) == b'\x81\x01' assert toLEB128(128) == b'\x80\x01' assert toLEB128(127) == b'\x7f' CUSTOM_SECTION = b"\0" def wrapper(length, name=b""): header = len(name).to_bytes(1, "little") + name header = b"" section = CUSTOM_SECTION + toLEB128(len(header) + length) return section def wrap(parasite, name=b""): wrapped = wrapper(len(parasite), name) + parasite return wrapped def check_magic(contents): return contents.startswith(MAGIC) with open(filename_a, "rb") as f: contents_a = f.read() if check_magic(contents_a) == False: print("Error: File A (%s) is not a valid %s file." % (filename_b, FILETYPE)) sys.exit(1) sections_a = contents_a[8:] with open(filename_b, "rb") as f: contents_b = f.read() if check_magic(contents_a) == False: print("Error: File B (%s) is not a valid %s file." % (filename_b, FILETYPE)) sys.exit(1) sections_b = contents_b[8:] with open('wasm1.bin', "rb") as f: prefix_s = f.read() with open('wasm2.bin', "rb") as f: prefix_l = f.read() assert hashlib.md5(prefix_s).digest() == hashlib.md5(prefix_l).digest() assert hashlib.sha1(prefix_s).digest() != hashlib.sha1(prefix_l).digest() wrapped_b = wrap(sections_b) # MD5 constant BLOCK_SIZE = 0x40 # For this prefix pair: # index of the first unicoll block UNICOLL_INDEX = 1 # incremented position in the unicoll block UNICOLL_INCPOS = 0x9 # Landing offset after the Increment position DELTA = 1 # Jump between the 2 unicoll blocks - usually 0x100 UNICOLL_GAP = 0x80 # because the increment is on a leb128 jump = UNICOLL_GAP - 1 + len(sections_a) jump128 = len(toLEB128(jump)) prewrap_b = len(wrap(sections_b)) - len(sections_b) suffix = b"".join([ # Unicoll (0xC0) and landing () gap between Unicoll b">" * ((UNICOLL_INDEX * BLOCK_SIZE + UNICOLL_INCPOS + DELTA + UNICOLL_GAP) - (UNICOLL_INDEX + 2) * BLOCK_SIZE), CUSTOM_SECTION, toLEB128(jump - jump128 + prewrap_b), b"<" * (UNICOLL_GAP - 1 - jump128), sections_a, wrap(sections_b), ]) coll_s = prefix_s + suffix coll_l = prefix_l + suffix assert hashlib.md5(coll_s).digest() == hashlib.md5(coll_l).digest() assert hashlib.sha1(coll_s).digest() != hashlib.sha1(coll_l).digest() hash = hashlib.md5(coll_s).hexdigest()[:8] cn1 = "coll1-%s.%s" % (hash, EXT) cn2 = "coll2-%s.%s" % (hash, EXT) with open(cn1, "wb") as f: f.write(coll_s) with open(cn2, "wb") as f: f.write(coll_l) print("Collision successful: %s / %s" % (cn1, cn2))