From 475e807da05b33db783cc51a104ece35b4bd1274 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sat, 22 Apr 2023 11:54:42 +0200 Subject: [PATCH 1/2] gcc-LTO support for the kernel this is the first call for comments (and kbuild complaints) for this support of gcc (full) LTO in the kernel. Most of the patches come from Andi. Me and Martin rebased them to new kernels and fixed the to-use known issues. Also I updated most of the commit logs and reordered the patches to groups of patches with similar intent. The very first patch comes from Alexander and is pending on some x86 queue already (I believe). I am attaching it only for completeness. Without that, the kernel does not boot (LTO reorders a lot). In our measurements, the performance differences are negligible. The kernel is bigger with gcc LTO due to more inlining. The next step might be to play with non-static functions as we export everything, so the compiler cannot actually drop anything (esp. inlined and no longer needed functions). Cc: Alexander Potapenko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Makhalov Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andrii Nakryiko Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnaldo Carvalho de Melo Cc: Ben Segall Cc: Borislav Petkov Cc: Daniel Borkmann Cc: Daniel Bristot de Oliveira Cc: Dave Hansen Cc: Dietmar Eggemann Cc: Dmitry Vyukov Cc: Don Zickus Cc: Hao Luo Cc: H.J. Lu Cc: "H. Peter Anvin" Cc: Huang Rui Cc: Ingo Molnar Cc: Jan Hubicka Cc: Jason Baron Cc: Jiri Kosina Cc: Jiri Olsa Cc: Joe Lawrence Cc: John Fastabend Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Juri Lelli Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Martin Liska Cc: Masahiro Yamada Cc: Mel Gorman Cc: Miguel Ojeda Cc: Michal Marek Cc: Miroslav Benes Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Oleksandr Tyshchenko Cc: Peter Zijlstra Cc: Petr Mladek Cc: "Rafael J. Wysocki" Cc: Richard Biener Cc: Sedat Dilek Cc: Song Liu Cc: Stanislav Fomichev Cc: Stefano Stabellini Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vincenzo Frascino Cc: Viresh Kumar Cc: VMware PV-Drivers Reviewers Cc: Yonghong Song Alexander Lobakin (1): x86/boot: robustify calling startup_{32,64}() from the decompressor code Andi Kleen (36): Compiler Attributes, lto: introduce __noreorder tracepoint, lto: Mark static call functions as __visible static_call, lto: Mark static keys as __visible static_call, lto: Mark static_call_return0() as __visible static_call, lto: Mark func_a() as __visible_on_lto x86/alternative, lto: Mark int3_*() as global and __visible x86/paravirt, lto: Mark native_steal_clock() as __visible_on_lto x86/preempt, lto: Mark preempt_schedule_*thunk() as __visible x86/xen, lto: Mark xen_vcpu_stolen() as __visible x86, lto: Mark gdt_page and native_sched_clock() as __visible amd, lto: Mark amd pmu and pstate functions as __visible_on_lto entry, lto: Mark raw_irqentry_exit_cond_resched() as __visible export, lto: Mark __kstrtab* in EXPORT_SYMBOL() as global and __visible softirq, lto: Mark irq_enter/exit_rcu() as __visible btf, lto: Make all BTF IDs global on LTO init.h, lto: mark initcalls as __noreorder bpf, lto: mark interpreter jump table as __noreorder sched, lto: mark sched classes as __noreorder linkage, lto: use C version for SYSCALL_ALIAS() / cond_syscall() scripts, lto: re-add gcc-ld scripts, lto: use CONFIG_LTO for many LTO specific actions Kbuild, lto: Add Link Time Optimization support x86/purgatory, lto: Disable gcc LTO for purgatory x86/realmode, lto: Disable gcc LTO for real mode code x86/vdso, lto: Disable gcc LTO for the vdso scripts, lto: disable gcc LTO for some mod sources Kbuild, lto: disable gcc LTO for bounds+asm-offsets lib/string, lto: disable gcc LTO for string.o Compiler attributes, lto: disable __flatten with LTO Kbuild, lto: don't include weak source file symbols in System.map x86, lto: Disable relative init pointers with gcc LTO x86/livepatch, lto: Disable live patching with gcc LTO x86/lib, lto: Mark 32bit mem{cpy,move,set} as __used scripts, lto: check C symbols for modversions scripts/bloat-o-meter, lto: handle gcc LTO x86, lto: Finally enable gcc LTO for x86 Jiri Slaby (5): kbuild: pass jobserver to cmd_ld_vmlinux.o compiler.h: introduce __visible_on_lto compiler.h: introduce __global_on_lto btf, lto: pass scope as strings x86/apic, lto: Mark apic_driver*() as __noreorder Martin Liska (4): kbuild: lto: preserve MAKEFLAGS for module linking x86/sev, lto: Mark cpuid_table_copy as __visible_on_lto mm/kasan, lto: Mark kasan mem{cpy,move,set} as __used Signed-off-by: Peter Jung --- Documentation/kbuild/index.rst | 2 + Documentation/kbuild/lto-build.rst | 76 +++++++++++++++++++++++++++++ Kbuild | 3 ++ Makefile | 6 ++- arch/Kconfig | 52 ++++++++++++++++++++ arch/x86/Kconfig | 5 +- arch/x86/entry/vdso/Makefile | 2 + arch/x86/events/amd/core.c | 2 +- arch/x86/include/asm/apic.h | 4 +- arch/x86/include/asm/preempt.h | 4 +- arch/x86/kernel/alternative.c | 5 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/paravirt.c | 2 +- arch/x86/kernel/sev-shared.c | 2 +- arch/x86/kernel/tsc.c | 2 +- arch/x86/lib/memcpy_32.c | 4 +- arch/x86/purgatory/Makefile | 2 + arch/x86/realmode/Makefile | 1 + drivers/cpufreq/amd-pstate.c | 15 +++--- drivers/xen/time.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/btf_ids.h | 24 ++++----- include/linux/compiler.h | 8 +++ include/linux/compiler_attributes.h | 15 ++++++ include/linux/export.h | 6 ++- include/linux/init.h | 2 +- include/linux/linkage.h | 16 +++--- include/linux/static_call.h | 12 ++--- include/linux/tracepoint.h | 4 +- kernel/bpf/core.c | 2 +- kernel/entry/common.c | 2 +- kernel/kallsyms.c | 2 +- kernel/livepatch/Kconfig | 1 + kernel/sched/sched.h | 1 + kernel/softirq.c | 4 +- kernel/static_call.c | 2 +- kernel/static_call_inline.c | 6 +-- kernel/time/posix-stubs.c | 19 +++++++- lib/Makefile | 2 + mm/kasan/generic.c | 2 +- mm/kasan/shadow.c | 6 +-- scripts/Makefile.build | 17 ++++--- scripts/Makefile.lib | 2 +- scripts/Makefile.lto | 43 ++++++++++++++++ scripts/Makefile.modfinal | 2 +- scripts/Makefile.vmlinux | 3 +- scripts/Makefile.vmlinux_o | 6 +-- scripts/bloat-o-meter | 2 +- scripts/gcc-ld | 40 +++++++++++++++ scripts/link-vmlinux.sh | 9 ++-- scripts/mksysmap | 2 + scripts/mod/Makefile | 3 ++ scripts/module.lds.S | 2 +- 53 files changed, 371 insertions(+), 91 deletions(-) create mode 100644 Documentation/kbuild/lto-build.rst create mode 100644 scripts/Makefile.lto create mode 100755 scripts/gcc-ld diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index cee2f99f734b..1937eee7c437 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -22,6 +22,8 @@ Kernel Build System gcc-plugins llvm + lto-build + .. only:: subproject and html Indices diff --git a/Documentation/kbuild/lto-build.rst b/Documentation/kbuild/lto-build.rst new file mode 100644 index 000000000000..3fb17342e72f --- /dev/null +++ b/Documentation/kbuild/lto-build.rst @@ -0,0 +1,76 @@ +===================================================== +gcc link time optimization (LTO) for the Linux kernel +===================================================== + +Link Time Optimization allows the compiler to optimize the complete program +instead of just each file. + +The compiler can inline functions between files and do various other global +optimizations, like specializing functions for common parameters, +determing when global variables are clobbered, making functions pure/const, +propagating constants globally, removing unneeded data and others. + +It will also drop unused functions which can make the kernel +image smaller in some circumstances, in particular for small kernel +configurations. + +For small monolithic kernels it can throw away unused code very effectively +(especially when modules are disabled) and usually shrinks +the code size. + +Build time and memory consumption at build time will increase, depending +on the size of the largest binary. Modular kernels are less affected. +With LTO incremental builds are less incremental, as always the whole +binary needs to be re-optimized (but not re-parsed) + +Oopses can be somewhat more difficult to read, due to the more aggressive +inlining: it helps to use scripts/faddr2line. + +It is currently incompatible with live patching. + +Normal "reasonable" builds work with less than 4GB of RAM, but very large +configurations like allyesconfig typically need more memory. The actual +memory needed depends on the available memory (gcc sizes its garbage +collector pools based on that or on the ulimit -m limits) and +the compiler version. + +Requirements: +------------- + +- Enough memory: 4GB for a standard build, more for allyesconfig + The peak memory usage happens single threaded (when lto-wpa merges types), + so dialing back -j options will not help much. + +A 32bit hosted compiler is unlikely to work due to the memory requirements. +You can however build a kernel targeted at 32bit on a 64bit host. + +FAQs: +----- + +* I get a section type attribute conflict + + Usually because of someone doing const __initdata (should be + const __initconst) or const __read_mostly (should be just const). Check + both symbols reported by gcc. + +References: +----------- + +* Presentation on Kernel LTO + (note, performance numbers/details totally outdated.) + + http://halobates.de/kernel-lto.pdf + +* Generic gcc LTO: + + * http://www.ucw.cz/~hubicka/slides/labs2013.pdf + * http://www.hipeac.net/system/files/barcelona.pdf + +* Somewhat outdated too (from GCC site): + + * http://gcc.gnu.org/projects/lto/lto.pdf + * http://gcc.gnu.org/projects/lto/whopr.pdf + +Happy Link-Time-Optimizing! + +Andi Kleen diff --git a/Kbuild b/Kbuild index 464b34a08f51..40744d76d416 100644 --- a/Kbuild +++ b/Kbuild @@ -11,6 +11,8 @@ bounds-file := include/generated/bounds.h targets := kernel/bounds.s +kernel/bounds.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + $(bounds-file): kernel/bounds.s FORCE $(call filechk,offsets,__LINUX_BOUNDS_H__) @@ -30,6 +32,7 @@ offsets-file := include/generated/asm-offsets.h targets += arch/$(SRCARCH)/kernel/asm-offsets.s arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file) +arch/$(SRCARCH)/kernel/asm-offsets.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) $(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE $(call filechk,offsets,__ASM_OFFSETS_H__) diff --git a/Makefile b/Makefile index 81a25d298289..0213a697ef28 100644 --- a/Makefile +++ b/Makefile @@ -477,6 +477,7 @@ KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) # Make variables (CC, etc...) CPP = $(CC) -E +LDFINAL = $(LD) ifneq ($(LLVM),) CC = $(LLVM_PREFIX)clang$(LLVM_SUFFIX) LD = $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX) @@ -599,7 +600,7 @@ export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX -export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD +export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD LDFINAL export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS @@ -996,7 +997,7 @@ endif endif endif -ifdef CONFIG_LTO +ifdef CONFIG_LTO_CLANG KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO) KBUILD_AFLAGS += -fno-lto export CC_FLAGS_LTO @@ -1084,6 +1085,7 @@ include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan include-$(CONFIG_KCOV) += scripts/Makefile.kcov include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct +include-$(CONFIG_LTO_GCC) += scripts/Makefile.lto include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins include $(addprefix $(srctree)/, $(include-y)) diff --git a/arch/Kconfig b/arch/Kconfig index e3511afbb7f2..5b63791126c7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -699,6 +699,21 @@ config HAS_LTO_CLANG The compiler and Kconfig options support building with Clang's LTO. +config ARCH_SUPPORTS_LTO_GCC + bool + +# Some ar versions leak file descriptors when using the LTO +# plugin and cause strange errors when ulimit -n is too low. +# Pick an arbitrary threshold, which should be enough for most +# kernel configs. This was a regression that is only +# in some transient binutils version, so either older or +# new enough is ok. +# This might not be the exact range with this bug. +config BAD_AR + depends on LD_VERSION = 23000 + depends on $(shell,ulimit -n) < 4000 + def_bool y + choice prompt "Link Time Optimization (LTO)" default LTO_NONE @@ -746,8 +761,45 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_GCC + bool "gcc LTO" + depends on ARCH_SUPPORTS_LTO_GCC && CC_IS_GCC + depends on GCC_VERSION >= 100300 + depends on LD_VERSION >= 22700 + depends on !BAD_AR + select LTO + help + Enable whole program (link time) optimizations (LTO) for the whole + kernel and each module. This usually increases compile time, + especially for incremential builds, but tends to generate better code + as well as some global checks. + + It allows the compiler to inline functions between different files + and do other global optimization, like propagating constants between + functions, determine side effects of functions, avoid unnecessary + register saving around functions, or optimize unused function + arguments. It also allows the compiler to drop unused functions. + + With this option the compiler will also do some global checking over + different source files. + + This requires a gcc 10.3 or later compiler and binutils >= 2.27. + + On larger non modular configurations this may need more than 4GB of + RAM for the link phase, as well as a 64bit host compiler. + + For more information see Documentation/kbuild/lto-build.rst endchoice +config LTO_CP_CLONE + bool "Allow aggressive cloning for function specialization" + depends on LTO_GCC + help + Allow the compiler to clone and specialize functions for specific + arguments when it determines these arguments are commonly + called. Experimential. Will increase text size. + config ARCH_SUPPORTS_CFI_CLANG bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df21fba77db1..aeff2eca6527 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -115,6 +115,7 @@ config X86 select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_SUPPORTS_LTO_GCC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS @@ -179,7 +180,9 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS + # LTO can move assembler to different files, so all + # the init functions would need to be global for this to work + select HAVE_ARCH_PREL32_RELOCATIONS if !LTO_GCC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 1506a22a4fb6..d9ea002c0703 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,8 @@ # Building vDSO images for x86. # +KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index bccea57dee81..04442cc08fca 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -643,7 +643,7 @@ static inline void amd_pmu_ack_global_status(u64 status) wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } -static bool amd_pmu_test_overflow_topbit(int idx) +__visible_on_lto bool amd_pmu_test_overflow_topbit(int idx) { u64 counter; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..34032041b426 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -362,12 +362,12 @@ extern struct apic *apic; * to enforce the order with in them. */ #define apic_driver(sym) \ - static const struct apic *__apicdrivers_##sym __used \ + static const struct apic *__apicdrivers_##sym __used __noreorder \ __aligned(sizeof(struct apic *)) \ __section(".apicdrivers") = { &sym } #define apic_drivers(sym1, sym2) \ - static struct apic *__apicdrivers_##sym1##sym2[2] __used \ + static struct apic *__apicdrivers_##sym1##sym2[2] __used __noreorder \ __aligned(sizeof(struct apic *)) \ __section(".apicdrivers") = { &sym1, &sym2 } diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 2d13f25b1bd8..23810d573170 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -107,13 +107,13 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); -extern asmlinkage void preempt_schedule_thunk(void); +extern __visible asmlinkage void preempt_schedule_thunk(void); #define preempt_schedule_dynamic_enabled preempt_schedule_thunk #define preempt_schedule_dynamic_disabled NULL extern asmlinkage void preempt_schedule_notrace(void); -extern asmlinkage void preempt_schedule_notrace_thunk(void); +extern __visible asmlinkage void preempt_schedule_notrace_thunk(void); #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk #define preempt_schedule_notrace_dynamic_disabled NULL diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f615e0cb6d93..ac45c11b27f0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1266,11 +1266,12 @@ extern struct paravirt_patch_site __start_parainstructions[], * convention such that we can 'call' it from assembly. */ -extern void int3_magic(unsigned int *ptr); /* defined in asm */ +extern __visible void int3_magic(unsigned int *ptr); /* defined in asm */ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" +" .globl int3_magic\n" "int3_magic:\n" ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" @@ -1279,7 +1280,7 @@ asm ( " .popsection\n" ); -extern void int3_selftest_ip(void); /* defined in asm below */ +extern __visible void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8cd4126d8253..1d348ab594e0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -202,7 +202,7 @@ static const struct cpu_dev default_cpu = { static const struct cpu_dev *this_cpu = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 /* * We need valid kernel segments for data and code in long mode too diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 42e182868873..66562b5c9fab 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -104,7 +104,7 @@ unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -static u64 native_steal_clock(int cpu) +__visible_on_lto u64 native_steal_clock(int cpu) { return 0; } diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fcc..554da8aabfc7 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -64,7 +64,7 @@ struct snp_cpuid_table { static u16 ghcb_version __ro_after_init; /* Copy of the SNP firmware's CPUID page. */ -static struct snp_cpuid_table cpuid_table_copy __ro_after_init; +__visible_on_lto struct snp_cpuid_table cpuid_table_copy __ro_after_init; /* * These will be initialized based on CPUID table so that non-present diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 344698852146..f1afb40782cb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -217,7 +217,7 @@ static void __init cyc2ns_init_secondary_cpus(void) /* * Scheduler clock - returns current time in nanosec units. */ -noinstr u64 native_sched_clock(void) +noinstr __visible u64 native_sched_clock(void) { if (static_branch_likely(&__use_tsc)) { u64 tsc_now = rdtsc(); diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index a29b64befb93..d8daeb1fc883 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -6,13 +6,13 @@ #undef memset #undef memmove -__visible void *memcpy(void *to, const void *from, size_t n) +__used __visible void *memcpy(void *to, const void *from, size_t n) { return __memcpy(to, from, n); } EXPORT_SYMBOL(memcpy); -__visible void *memset(void *s, int c, size_t count) +__used __visible void *memset(void *s, int c, size_t count) { return __memset(s, c, count); } diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 82fec66d46d2..91f0907585cb 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -60,6 +60,8 @@ ifdef CONFIG_CFI_CLANG PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_LTO) + CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index a0b491ae2de8..47b8b500cf15 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -10,6 +10,7 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n +KBUILD_CFLAGS += $(DISABLE_LTO_GCC) subdir- := rm diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5a3d4aa0f45a..c0c1f311c231 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -226,7 +226,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return ret; } -static inline int pstate_enable(bool enable) +__visible_on_lto int do_amd_pstate_enable(bool enable) { return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); } @@ -254,14 +254,14 @@ static int cppc_enable(bool enable) return ret; } -DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); +DEFINE_STATIC_CALL(amd_pstate_enable, do_amd_pstate_enable); static inline int amd_pstate_enable(bool enable) { return static_call(amd_pstate_enable)(enable); } -static int pstate_init_perf(struct amd_cpudata *cpudata) +__visible_on_lto int do_amd_pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; u32 highest_perf; @@ -327,15 +327,16 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) return ret; } -DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); +DEFINE_STATIC_CALL(amd_pstate_init_perf, do_amd_pstate_init_perf); static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) { return static_call(amd_pstate_init_perf)(cpudata); } -static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) +__visible_on_lto void do_amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, u32 max_perf, + bool fast_switch) { if (fast_switch) wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); @@ -357,7 +358,7 @@ static void cppc_update_perf(struct amd_cpudata *cpudata, cppc_set_perf(cpudata->cpu, &perf_ctrls); } -DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); +DEFINE_STATIC_CALL(amd_pstate_update_perf, do_amd_pstate_update_perf); static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 152dd33bb223..006a04592c8f 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -145,7 +145,7 @@ void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) } /* return true when a vcpu could run but has no real cpu to run on */ -bool xen_vcpu_stolen(int vcpu) +__visible bool xen_vcpu_stolen(int vcpu) { return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d1f57e4868ed..81f2e1c88ab4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -96,7 +96,7 @@ * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3a4f7cd882ca..535e7d599264 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -37,7 +37,7 @@ struct btf_id_set8 { #define ____BTF_ID(symbol, word) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -".local " #symbol " ; \n" \ +"." __global_on_lto " " #symbol " ; \n" \ ".type " #symbol ", STT_OBJECT; \n" \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ @@ -83,16 +83,16 @@ word \ #define __BTF_ID_LIST(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " " #name "; \n" \ +"." scope " " #name "; \n" \ #name ":; \n" \ ".popsection; \n"); #define BTF_ID_LIST(name) \ -__BTF_ID_LIST(name, local) \ +__BTF_ID_LIST(name, __global_on_lto) \ extern u32 name[]; #define BTF_ID_LIST_GLOBAL(name, n) \ -__BTF_ID_LIST(name, globl) +__BTF_ID_LIST(name, "globl") /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with * a single entry. @@ -142,18 +142,18 @@ asm( \ #define __BTF_SET_START(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " __BTF_ID__set__" #name "; \n" \ +"." scope " __BTF_ID__set__" #name "; \n" \ "__BTF_ID__set__" #name ":; \n" \ ".zero 4 \n" \ ".popsection; \n"); #define BTF_SET_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET_START(name, local) +__BTF_ID_LIST(name, __global_on_lto) \ +__BTF_SET_START(name, __global_on_lto) #define BTF_SET_START_GLOBAL(name) \ -__BTF_ID_LIST(name, globl) \ -__BTF_SET_START(name, globl) +__BTF_ID_LIST(name, "globl") \ +__BTF_SET_START(name, "globl") #define BTF_SET_END(name) \ asm( \ @@ -186,14 +186,14 @@ extern struct btf_id_set name; #define __BTF_SET8_START(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " __BTF_ID__set8__" #name "; \n" \ +"." scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ ".zero 8 \n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_ID_LIST(name, __global_on_lto) \ +__BTF_SET8_START(name, __global_on_lto) #define BTF_SET8_END(name) \ asm( \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 947a60b801db..8b5399b121d3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -133,6 +133,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ +#ifdef CONFIG_LTO_GCC +# define __visible_on_lto __visible +# define __global_on_lto "globl" +#else +# define __visible_on_lto static +# define __global_on_lto "local" +#endif + #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e659cb6fded3..ed4388ee01f4 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -214,7 +214,12 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes * clang: https://clang.llvm.org/docs/AttributeReference.html#flatten */ +#ifndef CONFIG_LTO_GCC # define __flatten __attribute__((flatten)) +#else +/* Causes very large memory use with gcc in LTO mode */ +# define __flatten +#endif /* * Note the missing underscores. @@ -366,4 +371,14 @@ */ #define __fix_address noinline __noclone +/* + * https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes + */ + +#if __has_attribute(__no_reorder__) +#define __noreorder __attribute__((no_reorder)) +#else +#define __noreorder +#endif + #endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/export.h b/include/linux/export.h index 3f31ced0d977..3cb5f85327da 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -85,11 +85,13 @@ struct kernel_symbol { */ #define ___EXPORT_SYMBOL(sym, sec, ns) \ extern typeof(sym) sym; \ - extern const char __kstrtab_##sym[]; \ - extern const char __kstrtabns_##sym[]; \ + extern const char __visible __kstrtab_##sym[]; \ + extern const char __visible __kstrtabns_##sym[]; \ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \ + " .globl __kstrtab_" #sym " \n" \ "__kstrtab_" #sym ": \n" \ " .asciz \"" #sym "\" \n" \ + " .globl __kstrtabns_" #sym " \n" \ "__kstrtabns_" #sym ": \n" \ " .asciz \"" ns "\" \n" \ " .previous \n"); \ diff --git a/include/linux/init.h b/include/linux/init.h index c5fe6d26f5b1..b069a0e1ca5c 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -249,7 +249,7 @@ extern bool initcall_debug; static_assert(__same_type(initcall_t, &fn)); #else #define ____define_initcall(fn, __unused, __name, __sec) \ - static initcall_t __name __used \ + static initcall_t __name __used __noreorder \ __attribute__((__section__(__sec))) = fn; #endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5c8865bb59d9..7ef0f43b820d 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -23,17 +23,17 @@ #endif #ifndef cond_syscall -#define cond_syscall(x) asm( \ - ".weak " __stringify(x) "\n\t" \ - ".set " __stringify(x) "," \ - __stringify(sys_ni_syscall)) +#define cond_syscall(x) \ + extern long x(void) __attribute__((alias("sys_ni_syscall"), weak)); #endif #ifndef SYSCALL_ALIAS -#define SYSCALL_ALIAS(alias, name) asm( \ - ".globl " __stringify(alias) "\n\t" \ - ".set " __stringify(alias) "," \ - __stringify(name)) +#define SYSCALL_ALIAS(a, name) \ + long a(void) __attribute__((alias(__stringify(name)))) +#define SYSCALL_ALIAS_PROTO(a, name) \ + typeof(a) a __attribute__((alias(__stringify(name)))) +#else +#define SYSCALL_ALIAS_PROTO(a, name) SYSCALL_ALIAS(a, name) #endif #define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 141e6b176a1b..21c84fcbcfc9 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -184,7 +184,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ .type = 1, \ }; \ @@ -192,7 +192,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = NULL, \ .type = 1, \ }; \ @@ -200,7 +200,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL_RET0(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = __static_call_return0, \ .type = 1, \ }; \ @@ -229,14 +229,14 @@ static inline int static_call_init(void) { return 0; } #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = NULL, \ }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) @@ -290,7 +290,7 @@ static inline long __static_call_return0(void) #define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func_init, \ } diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 6811e43c1b5c..0612de8f70ae 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -249,7 +249,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * tracepoint is enabled. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ - extern int __traceiter_##name(data_proto); \ + extern __visible int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ @@ -314,7 +314,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ - int __traceiter_##_name(void *__data, proto) \ + __visible int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e2d256c82072..6aa50232e0dd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1644,7 +1644,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * * Return: whatever value is in %BPF_R0 at program exit */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) +static u64 __noreorder ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b5..dff8607db66e 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -379,7 +379,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) return ret; } -void raw_irqentry_exit_cond_resched(void) +__visible void raw_irqentry_exit_cond_resched(void) { if (!preempt_count()) { /* Sanity check RCU and thread stack */ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 83f499182c9a..4a6e6e2392f4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -167,7 +167,7 @@ static bool cleanup_symbol_name(char *s) { char *res; - if (!IS_ENABLED(CONFIG_LTO_CLANG)) + if (!IS_ENABLED(CONFIG_LTO)) return false; /* diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 53d51ed619a3..22699adc39a6 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -12,6 +12,7 @@ config LIVEPATCH depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH depends on !TRIM_UNUSED_KSYMS + depends on !LTO_GCC # not supported in gcc help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7331d436ebc4..4fe6317a5c73 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2267,6 +2267,7 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) */ #define DEFINE_SCHED_CLASS(name) \ const struct sched_class name##_sched_class \ + __noreorder \ __aligned(__alignof__(struct sched_class)) \ __section("__" #name "_sched_class") diff --git a/kernel/softirq.c b/kernel/softirq.c index c8a6913c067d..9d62e09c9581 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -604,7 +604,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ -void irq_enter_rcu(void) +__visible void irq_enter_rcu(void) { __irq_enter_raw(); @@ -657,7 +657,7 @@ static inline void __irq_exit_rcu(void) * * Also processes softirqs if needed and possible. */ -void irq_exit_rcu(void) +__visible void irq_exit_rcu(void) { __irq_exit_rcu(); /* must be last! */ diff --git a/kernel/static_call.c b/kernel/static_call.c index e9c3e69f3837..9197fe86d8bd 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -long __static_call_return0(void) +__visible long __static_call_return0(void) { return 0; } diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index 639397b5491c..ec0a9fd7a953 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -514,7 +514,7 @@ early_initcall(static_call_init); #ifdef CONFIG_STATIC_CALL_SELFTEST -static int func_a(int x) +__visible_on_lto int sc_func_a(int x) { return x+1; } @@ -524,7 +524,7 @@ static int func_b(int x) return x+2; } -DEFINE_STATIC_CALL(sc_selftest, func_a); +DEFINE_STATIC_CALL(sc_selftest, sc_func_a); static struct static_call_data { int (*func)(int); @@ -533,7 +533,7 @@ static struct static_call_data { } static_call_data [] __initdata = { { NULL, 2, 3 }, { func_b, 2, 4 }, - { func_a, 2, 3 } + { sc_func_a, 2, 3 } }; static int __init test_static_call_init(void) diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 828aeecbd1e8..df96f8f1aab1 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -31,13 +31,21 @@ asmlinkage long sys_ni_posix_timers(void) } #ifndef SYS_NI -#define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers) +#define SYS_NI(name) SYSCALL_ALIAS_PROTO(sys_##name, sys_ni_posix_timers) #endif #ifndef COMPAT_SYS_NI -#define COMPAT_SYS_NI(name) SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers) +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS_PROTO(compat_sys_##name, sys_ni_posix_timers) #endif +/* + * This cannot go to SYS_NI() or SYSCALL_ALIAS_PROTO() due to gcc bug fixed in + * gcc >= 13 (cf. PR 97498). I wonder how is __SYSCALL_DEFINEx() able to work? + */ +__diag_push(); +__diag_ignore(GCC, 8, "-Wattribute-alias", "Alias to nonimplemented syscall"); + SYS_NI(timer_create); SYS_NI(timer_gettime); SYS_NI(timer_getoverrun); @@ -51,6 +59,8 @@ SYS_NI(clock_adjtime32); SYS_NI(alarm); #endif +__diag_pop(); + /* * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC * as it is easy to remain compatible with little code. CLOCK_BOOTTIME @@ -158,6 +168,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, which_clock); } +__diag_push(); +__diag_ignore(GCC, 8, "-Wattribute-alias", "Alias to nonimplemented syscall"); + #ifdef CONFIG_COMPAT COMPAT_SYS_NI(timer_create); #endif @@ -171,6 +184,8 @@ COMPAT_SYS_NI(setitimer); SYS_NI(timer_settime32); SYS_NI(timer_gettime32); +__diag_pop(); + SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock, struct old_timespec32 __user *, tp) { diff --git a/lib/Makefile b/lib/Makefile index 31a3a257fd49..5fb348f63d6b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -27,6 +27,8 @@ KASAN_SANITIZE_string.o := n CFLAGS_string.o += -fno-stack-protector endif +CFLAGS_string.o += $(DISABLE_LTO_GCC) + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ maple_tree.o idr.o extable.o irq_regs.o argv_split.o \ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index e5eef670735e..9494f0b22d8b 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -152,7 +152,7 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) case 16: return memory_is_poisoned_16(addr); default: - BUILD_BUG(); + break; } } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index c8b86f3273b5..1a7f36d9bb56 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -48,7 +48,7 @@ EXPORT_SYMBOL(__kasan_check_write); * these, so that non-instrumented files can safely consider them as builtins. */ #undef memset -void *memset(void *addr, int c, size_t len) +__used void *memset(void *addr, int c, size_t len) { if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_)) return NULL; @@ -58,7 +58,7 @@ void *memset(void *addr, int c, size_t len) #ifdef __HAVE_ARCH_MEMMOVE #undef memmove -void *memmove(void *dest, const void *src, size_t len) +__used void *memmove(void *dest, const void *src, size_t len) { if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) @@ -69,7 +69,7 @@ void *memmove(void *dest, const void *src, size_t len) #endif #undef memcpy -void *memcpy(void *dest, const void *src, size_t len) +__used void *memcpy(void *dest, const void *src, size_t len) { if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 76323201232a..03e16a3afdf0 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -149,8 +149,8 @@ is-single-obj-m = $(and $(part-of-module),$(filter $@, $(obj-m)),y) # When a module consists of a single object, there is no reason to keep LLVM IR. # Make $(LD) covert LLVM IR to ELF here. -ifdef CONFIG_LTO_CLANG -cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) +ifdef CONFIG_LTO +cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LDFINAL) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) endif quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ @@ -169,7 +169,7 @@ ifdef CONFIG_MODVERSIONS # be compiled and linked to the kernel and/or modules. gen_symversions = \ - if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then \ + if $(NM) $@ 2>/dev/null | grep -q __kstrtab; then \ $(call cmd_gensymtypes_$(1),$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ >> $(dot-target).cmd; \ fi @@ -249,7 +249,7 @@ endef # Built-in and composite module parts $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE - $(call if_changed_rule,cc_o_c) + +$(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) # To make this rule robust against "Argument list too long" error, @@ -267,7 +267,8 @@ $(obj)/%.usyms: $(obj)/%.o FORCE $(call if_changed,undefined_syms) quiet_cmd_cc_lst_c = MKLST $@ - cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ + cmd_cc_lst_c = $(if $(CONFIG_LTO),$(warning Listing in LTO mode does not match final binary)) \ + $(CC) $(c_flags) -g -c -o $*.o $< && \ $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ System.map $(OBJDUMP) > $@ @@ -438,8 +439,8 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) -quiet_cmd_ld_multi_m = LD [M] $@ - cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool) +quiet_cmd_ld_multi_m = LDFINAL [M] $@ + cmd_ld_multi_m = $(LDFINAL) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool) define rule_ld_multi_m $(call cmd_and_savecmd,ld_multi_m) @@ -449,7 +450,7 @@ endef $(multi-obj-m): objtool-enabled := $(delay-objtool) $(multi-obj-m): part-of-module := y $(multi-obj-m): %.o: %.mod FORCE - $(call if_changed_rule,ld_multi_m) + +$(call if_changed_rule,ld_multi_m) $(call multi_depend, $(multi-obj-m), .o, -objs -y -m) # Add intermediate targets: diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index a3ec7265fb57..0144b946b79a 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -275,7 +275,7 @@ objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ $(if $(part-of-module), --module) -delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) +delay-objtool := $(or $(CONFIG_LTO),$(CONFIG_X86_KERNEL_IBT)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) diff --git a/scripts/Makefile.lto b/scripts/Makefile.lto new file mode 100644 index 000000000000..33ac0da2bb47 --- /dev/null +++ b/scripts/Makefile.lto @@ -0,0 +1,43 @@ +# +# Support for gcc link time optimization +# + +DISABLE_LTO_GCC := +export DISABLE_LTO_GCC + +ifdef CONFIG_LTO_GCC + CC_FLAGS_LTO_GCC := -flto + DISABLE_LTO_GCC := -fno-lto + + KBUILD_CFLAGS += ${CC_FLAGS_LTO_GCC} + + CC_FLAGS_LTO := -flto + export CC_FLAGS_LTO + + lto-flags-y := -flinker-output=nolto-rel -flto=jobserver + lto-flags-y += -fwhole-program + + lto-flags-$(CONFIG_LTO_CP_CLONE) += -fipa-cp-clone + + # allow extra flags from command line + lto-flags-y += ${LTO_EXTRA_CFLAGS} + + # For LTO we need to use gcc to do the linking, not ld + # directly. Use a wrapper to convert the ld command line + # to gcc + LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \ + ${lto-flags-y} + + # LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs + # it's easy to drive the machine OOM. Use the object directory + # instead for temporaries. + # This has the drawback that there might be some junk more visible + # after interrupted compilations, but you would have that junk + # there anyways in /tmp. + TMPDIR ?= $(objtree) + export TMPDIR + + # use plugin aware tools + AR = $(CROSS_COMPILE)gcc-ar + NM = $(CROSS_COMPILE)gcc-nm +endif # CONFIG_LTO_GCC diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 4703f652c009..9c9466be17c0 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -32,7 +32,7 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o += \ - $(LD) -r $(KBUILD_LDFLAGS) \ + $(LDFINAL) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -T scripts/module.lds -o $@ $(filter %.o, $^); \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 10176dec97ea..2a98c371c10c 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -27,7 +27,8 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link cmd_link_vmlinux = \ - $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)"; \ + $< "$(LD)" "$(LDFINAL)" "$(KBUILD_LDFLAGS)" \ + "$(LDFLAGS_vmlinux)"; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) targets += vmlinux diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index ae52d3b3f063..497a59e4311d 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -44,9 +44,9 @@ objtool-args = $(vmlinux-objtool-args-y) --link # Link of vmlinux.o used for section mismatch analysis # --------------------------------------------------------------------------- -quiet_cmd_ld_vmlinux.o = LD $@ +quiet_cmd_ld_vmlinux.o = LDFINAL $@ cmd_ld_vmlinux.o = \ - $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ + $(LDFINAL) ${KBUILD_LDFLAGS} -r -o $@ \ $(addprefix -T , $(initcalls-lds)) \ --whole-archive vmlinux.a --no-whole-archive \ --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ @@ -58,7 +58,7 @@ define rule_ld_vmlinux.o endef vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE - $(call if_changed_rule,ld_vmlinux.o) + +$(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 36303afa9dfc..642706fa666e 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -45,7 +45,7 @@ def getsizes(file, format): if name == "linux_banner": continue if name == "vermagic": continue # statics and some other optimizations adds random .NUMBER - name = re_NUMBER.sub('', name) + name = re_NUMBER.sub('', name).replace(".lto_priv", "") sym[name] = sym.get(name, 0) + int(size, 16) return sym diff --git a/scripts/gcc-ld b/scripts/gcc-ld new file mode 100755 index 000000000000..13e85ece8d04 --- /dev/null +++ b/scripts/gcc-ld @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# run gcc with ld options +# used as a wrapper to execute link time optimizations +# yes virginia, this is not pretty + +ARGS="-nostdlib" + +for j in "$@" ; do + if [ "$j" = -v ] ; then + exec `$CC -print-prog-name=ld` -v + fi +done + +while [ "$1" != "" ] ; do + case "$1" in + -save-temps*|-m32|-m64) N="$1" ;; + -r) N="$1" ;; + -flinker-output*) N="$1" ;; + -[Wg]*) N="$1" ;; + -[olv]|-[Ofd]*|-nostdlib) N="$1" ;; + --end-group|--start-group|--whole-archive|--no-whole-archive|\ +--no-undefined|--hash-style*|--build-id*|--eh-frame-hdr|-Bsymbolic) + N="-Wl,$1" ;; + -[RTFGhIezcbyYu]*|\ +--script|--defsym|-init|-Map|--oformat|-rpath|\ +-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|-soname|\ +--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m|-z) + A="$1" ; shift ; N="-Wl,$A,$1" ;; + -[m]*) N="$1" ;; + -*) N="-Wl,$1" ;; + *) N="$1" ;; + esac + ARGS="$ARGS $N" + shift +done + +[ -n "$V" ] && echo >&2 $CC $ARGS + +exec $CC $ARGS diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf0..052545f9da7f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -29,8 +29,9 @@ set -e LD="$1" -KBUILD_LDFLAGS="$2" -LDFLAGS_vmlinux="$3" +LDFINAL="$2" +KBUILD_LDFLAGS="$3" +LDFLAGS_vmlinux="$4" is_enabled() { grep -q "^$1=y" include/config/auto.conf @@ -60,7 +61,7 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then + if is_enabled CONFIG_LTO || is_enabled CONFIG_X86_KERNEL_IBT; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= @@ -82,7 +83,7 @@ vmlinux_link() ldlibs="-lutil -lrt -lpthread" else wl= - ld="${LD}" + ld="${LDFINAL}" ldflags="${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}" ldlibs= fi diff --git a/scripts/mksysmap b/scripts/mksysmap index 16a08b8ef2f8..0f19a44ab136 100755 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -34,6 +34,7 @@ # U - undefined global symbols # N - debugging symbols # w - local weak symbols +# W - weak symbols if they contain .c. # readprofile starts reading symbols when _stext is found, and # continue until it finds a symbol which is not either of 'T', 't', @@ -57,4 +58,5 @@ $NM -n $1 | grep -v \ -e ' __kstrtab_' \ -e ' __kstrtabns_' \ -e ' L0$' \ + -e ' W .*\.c\.' \ > $2 diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index c9e38ad937fd..aa3465d6bc4a 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD := y CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) +CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO_GCC) hostprogs-always-y += modpost mk_elfconfig always-y += empty.o @@ -9,6 +10,8 @@ modpost-objs := modpost.o file2alias.o sumversion.o devicetable-offsets-file := devicetable-offsets.h +$(obj)/devicetable-offsets.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + $(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE $(call filechk,offsets,__DEVICETABLE_OFFSETS_H__) diff --git a/scripts/module.lds.S b/scripts/module.lds.S index bf5bcf2836d8..a6e6a1345fce 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -33,7 +33,7 @@ SECTIONS { __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif -#ifdef CONFIG_LTO_CLANG +#ifdef CONFIG_LTO /* * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and * -ffunction-sections, which increases the size of the final module. -- 2.40.0