From c9ec5b1a5a5e994e453441f8626a7a6ea779262c Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Mon, 8 May 2023 22:21:53 +0200 Subject: [PATCH] mm: expose per-process KSM control via syscalls d7597f59d1d3 added a new API to enable per-process KSM control. It however uses prctl, which doesn't allow controlling KSM from outside of the current process. Hence, expose this API via 3 syscalls: process_ksm_enable, process_ksm_disable and process_ksm_status. Given sufficient privileges, auto-KSM can be enable by another process. Since these syscalls are not in the upstream kernel, also expose their numbers under /sys/kernel/process_ksm so that userspace tooling like uksmd knows how to use them. Signed-off-by: Oleksandr Natalenko --- arch/alpha/kernel/syscalls/syscall.tbl | 3 + arch/arm/tools/syscall.tbl | 3 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 6 + arch/ia64/kernel/syscalls/syscall.tbl | 3 + arch/m68k/kernel/syscalls/syscall.tbl | 3 + arch/microblaze/kernel/syscalls/syscall.tbl | 3 + arch/mips/kernel/syscalls/syscall_n32.tbl | 3 + arch/mips/kernel/syscalls/syscall_n64.tbl | 3 + arch/mips/kernel/syscalls/syscall_o32.tbl | 3 + arch/parisc/kernel/syscalls/syscall.tbl | 3 + arch/powerpc/kernel/syscalls/syscall.tbl | 3 + arch/s390/kernel/syscalls/syscall.tbl | 3 + arch/sh/kernel/syscalls/syscall.tbl | 3 + arch/sparc/kernel/syscalls/syscall.tbl | 3 + arch/x86/entry/syscalls/syscall_32.tbl | 3 + arch/x86/entry/syscalls/syscall_64.tbl | 3 + arch/xtensa/kernel/syscalls/syscall.tbl | 3 + include/linux/syscalls.h | 3 + include/uapi/asm-generic/unistd.h | 11 +- kernel/sys.c | 147 ++++++++++++++++++++ kernel/sys_ni.c | 3 + 22 files changed, 218 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 8ebacf37a..9b8afad2b 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,6 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common process_ksm_enable sys_process_ksm_enable +562 common process_ksm_disable sys_process_ksm_disable +563 common process_ksm_status sys_process_ksm_status diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d..b8a2f2689 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 037feba03..6a28fb91b 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 454 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d..72e380b95 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,12 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_process_ksm_enable 451 +__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) +#define __NR_process_ksm_disable 452 +__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) +#define __NR_process_ksm_status 453 +__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 72c929d99..aa698c590 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc..4e9d10dfd 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e47..f89d989f9 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994e..fbd19a419 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,6 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 process_ksm_enable sys_process_ksm_enable +452 n32 process_ksm_disable sys_process_ksm_disable +453 n32 process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9..32694bd4a 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,6 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 process_ksm_enable sys_process_ksm_enable +452 n64 process_ksm_disable sys_process_ksm_disable +453 n64 process_ksm_status sys_process_ksm_status diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a..6463565ed 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,6 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 process_ksm_enable sys_process_ksm_enable +452 o32 process_ksm_disable sys_process_ksm_disable +453 o32 process_ksm_status sys_process_ksm_status diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 0e42fceb2..7862be4ce 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index a0be12747..8d986c713 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -537,3 +537,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b68f47541..388f20823 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,6 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status sys_process_ksm_status diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977..df431a526 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb..f757d9623 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8d..f18baa583 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,6 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 process_ksm_enable sys_process_ksm_enable +452 i386 process_ksm_disable sys_process_ksm_disable +453 i386 process_ksm_status sys_process_ksm_status diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608..03187452c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,9 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c..4002ab783 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,6 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common process_ksm_enable sys_process_ksm_enable +452 common process_ksm_disable sys_process_ksm_disable +453 common process_ksm_status sys_process_ksm_status diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 33a0ee3bc..9f5c1a070 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -919,6 +919,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); +asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags); +asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags); +asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 45fa180cc..98731e18a 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -886,8 +886,17 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_process_ksm_enable 451 +__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable) + +#define __NR_process_ksm_disable 452 +__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable) + +#define __NR_process_ksm_status 453 +__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status) + #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 454 /* * 32 bit systems traditionally used different diff --git a/kernel/sys.c b/kernel/sys.c index 339fee3ef..6af3987e7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2715,6 +2715,153 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return error; } +#ifdef CONFIG_KSM +enum pkc_action { + PKSM_ENABLE = 0, + PKSM_DISABLE, + PKSM_STATUS, +}; + +static long do_process_ksm_control(int pidfd, enum pkc_action action) +{ + long ret; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + unsigned int f_flags; + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) { + ret = PTR_ERR(pid); + goto out; + } + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + /* Require CAP_SYS_NICE for influencing process performance. */ + if (!capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + + if (mmap_write_lock_killable(mm)) { + ret = -EINTR; + goto release_mm; + } + + switch (action) { + case PKSM_ENABLE: + ret = ksm_enable_merge_any(mm); + break; + case PKSM_DISABLE: + ret = ksm_disable_merge_any(mm); + break; + case PKSM_STATUS: + ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags); + break; + } + + mmap_write_unlock(mm); + +release_mm: + mmput(mm); +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); +out: + return ret; +} +#endif /* CONFIG_KSM */ + +SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags) +{ +#ifdef CONFIG_KSM + if (flags != 0) + return -EINVAL; + + return do_process_ksm_control(pidfd, PKSM_ENABLE); +#else /* CONFIG_KSM */ + return -ENOSYS; +#endif /* CONFIG_KSM */ +} + +SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags) +{ +#ifdef CONFIG_KSM + if (flags != 0) + return -EINVAL; + + return do_process_ksm_control(pidfd, PKSM_DISABLE); +#else /* CONFIG_KSM */ + return -ENOSYS; +#endif /* CONFIG_KSM */ +} + +SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags) +{ +#ifdef CONFIG_KSM + if (flags != 0) + return -EINVAL; + + return do_process_ksm_control(pidfd, PKSM_STATUS); +#else /* CONFIG_KSM */ + return -ENOSYS; +#endif /* CONFIG_KSM */ +} + +#ifdef CONFIG_KSM +static ssize_t process_ksm_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", __NR_process_ksm_enable); +} +static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable); + +static ssize_t process_ksm_disable_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", __NR_process_ksm_disable); +} +static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable); + +static ssize_t process_ksm_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", __NR_process_ksm_status); +} +static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status); + +static struct attribute *process_ksm_sysfs_attrs[] = { + &process_ksm_enable_attr.attr, + &process_ksm_disable_attr.attr, + &process_ksm_status_attr.attr, + NULL, +}; + +static const struct attribute_group process_ksm_sysfs_attr_group = { + .attrs = process_ksm_sysfs_attrs, + .name = "process_ksm", +}; + +static int __init process_ksm_sysfs_init(void) +{ + return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group); +} +subsys_initcall(process_ksm_sysfs_init); +#endif /* CONFIG_KSM */ + SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, struct getcpu_cache __user *, unused) { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 860b2dcf3..96fe36a6d 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -292,6 +292,9 @@ COND_SYSCALL(mincore); COND_SYSCALL(madvise); COND_SYSCALL(process_madvise); COND_SYSCALL(process_mrelease); +COND_SYSCALL(process_ksm_enable); +COND_SYSCALL(process_ksm_disable); +COND_SYSCALL(process_ksm_status); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL(get_mempolicy); -- 2.41.0