From 6862acd7d60136488d29037db89263fc68ea59d8 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 26 Dec 2022 00:32:07 +0100 Subject: [PATCH] gcc-LTO support for the kernel this is the first call for comments (and kbuild complaints) for this support of gcc (full) LTO in the kernel. Most of the patches come from Andi. Me and Martin rebased them to new kernels and fixed the to-use known issues. Also I updated most of the commit logs and reordered the patches to groups of patches with similar intent. The very first patch comes from Alexander and is pending on some x86 queue already (I believe). I am attaching it only for completeness. Without that, the kernel does not boot (LTO reorders a lot). In our measurements, the performance differences are negligible. The kernel is bigger with gcc LTO due to more inlining. The next step might be to play with non-static functions as we export everything, so the compiler cannot actually drop anything (esp. inlined and no longer needed functions). Cc: Alexander Potapenko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Makhalov Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andrii Nakryiko Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnaldo Carvalho de Melo Cc: Ben Segall Cc: Borislav Petkov Cc: Daniel Borkmann Cc: Daniel Bristot de Oliveira Cc: Dave Hansen Cc: Dietmar Eggemann Cc: Dmitry Vyukov Cc: Don Zickus Cc: Hao Luo Cc: H.J. Lu Cc: "H. Peter Anvin" Cc: Huang Rui Cc: Ingo Molnar Cc: Jan Hubicka Cc: Jason Baron Cc: Jiri Kosina Cc: Jiri Olsa Cc: Joe Lawrence Cc: John Fastabend Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Juri Lelli Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Martin Liska Cc: Masahiro Yamada Cc: Mel Gorman Cc: Miguel Ojeda Cc: Michal Marek Cc: Miroslav Benes Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Oleksandr Tyshchenko Cc: Peter Zijlstra Cc: Petr Mladek Cc: "Rafael J. Wysocki" Cc: Richard Biener Cc: Sedat Dilek Cc: Song Liu Cc: Stanislav Fomichev Cc: Stefano Stabellini Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vincenzo Frascino Cc: Viresh Kumar Cc: VMware PV-Drivers Reviewers Cc: Yonghong Song Alexander Lobakin (1): x86/boot: robustify calling startup_{32,64}() from the decompressor code Andi Kleen (36): Compiler Attributes, lto: introduce __noreorder tracepoint, lto: Mark static call functions as __visible static_call, lto: Mark static keys as __visible static_call, lto: Mark static_call_return0() as __visible static_call, lto: Mark func_a() as __visible_on_lto x86/alternative, lto: Mark int3_*() as global and __visible x86/paravirt, lto: Mark native_steal_clock() as __visible_on_lto x86/preempt, lto: Mark preempt_schedule_*thunk() as __visible x86/xen, lto: Mark xen_vcpu_stolen() as __visible x86, lto: Mark gdt_page and native_sched_clock() as __visible amd, lto: Mark amd pmu and pstate functions as __visible_on_lto entry, lto: Mark raw_irqentry_exit_cond_resched() as __visible export, lto: Mark __kstrtab* in EXPORT_SYMBOL() as global and __visible softirq, lto: Mark irq_enter/exit_rcu() as __visible btf, lto: Make all BTF IDs global on LTO init.h, lto: mark initcalls as __noreorder bpf, lto: mark interpreter jump table as __noreorder sched, lto: mark sched classes as __noreorder linkage, lto: use C version for SYSCALL_ALIAS() / cond_syscall() scripts, lto: re-add gcc-ld scripts, lto: use CONFIG_LTO for many LTO specific actions Kbuild, lto: Add Link Time Optimization support x86/purgatory, lto: Disable gcc LTO for purgatory x86/realmode, lto: Disable gcc LTO for real mode code x86/vdso, lto: Disable gcc LTO for the vdso scripts, lto: disable gcc LTO for some mod sources Kbuild, lto: disable gcc LTO for bounds+asm-offsets lib/string, lto: disable gcc LTO for string.o Compiler attributes, lto: disable __flatten with LTO Kbuild, lto: don't include weak source file symbols in System.map x86, lto: Disable relative init pointers with gcc LTO x86/livepatch, lto: Disable live patching with gcc LTO x86/lib, lto: Mark 32bit mem{cpy,move,set} as __used scripts, lto: check C symbols for modversions scripts/bloat-o-meter, lto: handle gcc LTO x86, lto: Finally enable gcc LTO for x86 Jiri Slaby (5): kbuild: pass jobserver to cmd_ld_vmlinux.o compiler.h: introduce __visible_on_lto compiler.h: introduce __global_on_lto btf, lto: pass scope as strings x86/apic, lto: Mark apic_driver*() as __noreorder Martin Liska (4): kbuild: lto: preserve MAKEFLAGS for module linking x86/sev, lto: Mark cpuid_table_copy as __visible_on_lto mm/kasan, lto: Mark kasan mem{cpy,move,set} as __used kasan, lto: remove extra BUILD_BUG() in memory_is_poisoned Signed-off-by: Peter Jung --- Documentation/kbuild/index.rst | 2 + Documentation/kbuild/lto-build.rst | 76 +++++++++++++++++++++++++++++ Kbuild | 3 ++ Makefile | 6 ++- arch/Kconfig | 52 ++++++++++++++++++++ arch/x86/Kconfig | 5 +- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 2 +- arch/x86/boot/compressed/misc.c | 16 +++--- arch/x86/entry/vdso/Makefile | 2 + arch/x86/events/amd/core.c | 2 +- arch/x86/include/asm/apic.h | 4 +- arch/x86/include/asm/preempt.h | 4 +- arch/x86/kernel/alternative.c | 5 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/paravirt.c | 2 +- arch/x86/kernel/sev-shared.c | 2 +- arch/x86/kernel/tsc.c | 2 +- arch/x86/lib/memcpy_32.c | 4 +- arch/x86/purgatory/Makefile | 2 + arch/x86/realmode/Makefile | 1 + drivers/cpufreq/amd-pstate.c | 15 +++--- drivers/xen/time.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/btf_ids.h | 24 ++++----- include/linux/compiler.h | 8 +++ include/linux/compiler_attributes.h | 15 ++++++ include/linux/export.h | 6 ++- include/linux/init.h | 2 +- include/linux/linkage.h | 16 +++--- include/linux/static_call.h | 12 ++--- include/linux/tracepoint.h | 4 +- kernel/bpf/core.c | 2 +- kernel/entry/common.c | 2 +- kernel/kallsyms.c | 2 +- kernel/livepatch/Kconfig | 1 + kernel/sched/sched.h | 1 + kernel/softirq.c | 4 +- kernel/static_call.c | 2 +- kernel/static_call_inline.c | 6 +-- kernel/time/posix-stubs.c | 19 +++++++- lib/Makefile | 2 + mm/kasan/generic.c | 2 +- mm/kasan/shadow.c | 6 +-- scripts/Makefile.build | 17 ++++--- scripts/Makefile.lib | 2 +- scripts/Makefile.lto | 43 ++++++++++++++++ scripts/Makefile.modfinal | 2 +- scripts/Makefile.vmlinux | 3 +- scripts/Makefile.vmlinux_o | 6 +-- scripts/bloat-o-meter | 2 +- scripts/gcc-ld | 40 +++++++++++++++ scripts/link-vmlinux.sh | 9 ++-- scripts/mksysmap | 2 + scripts/mod/Makefile | 3 ++ scripts/module.lds.S | 2 +- 56 files changed, 383 insertions(+), 99 deletions(-) create mode 100644 Documentation/kbuild/lto-build.rst create mode 100644 scripts/Makefile.lto create mode 100755 scripts/gcc-ld diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index cee2f99f734b..1937eee7c437 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -22,6 +22,8 @@ Kernel Build System gcc-plugins llvm + lto-build + .. only:: subproject and html Indices diff --git a/Documentation/kbuild/lto-build.rst b/Documentation/kbuild/lto-build.rst new file mode 100644 index 000000000000..3fb17342e72f --- /dev/null +++ b/Documentation/kbuild/lto-build.rst @@ -0,0 +1,76 @@ +===================================================== +gcc link time optimization (LTO) for the Linux kernel +===================================================== + +Link Time Optimization allows the compiler to optimize the complete program +instead of just each file. + +The compiler can inline functions between files and do various other global +optimizations, like specializing functions for common parameters, +determing when global variables are clobbered, making functions pure/const, +propagating constants globally, removing unneeded data and others. + +It will also drop unused functions which can make the kernel +image smaller in some circumstances, in particular for small kernel +configurations. + +For small monolithic kernels it can throw away unused code very effectively +(especially when modules are disabled) and usually shrinks +the code size. + +Build time and memory consumption at build time will increase, depending +on the size of the largest binary. Modular kernels are less affected. +With LTO incremental builds are less incremental, as always the whole +binary needs to be re-optimized (but not re-parsed) + +Oopses can be somewhat more difficult to read, due to the more aggressive +inlining: it helps to use scripts/faddr2line. + +It is currently incompatible with live patching. + +Normal "reasonable" builds work with less than 4GB of RAM, but very large +configurations like allyesconfig typically need more memory. The actual +memory needed depends on the available memory (gcc sizes its garbage +collector pools based on that or on the ulimit -m limits) and +the compiler version. + +Requirements: +------------- + +- Enough memory: 4GB for a standard build, more for allyesconfig + The peak memory usage happens single threaded (when lto-wpa merges types), + so dialing back -j options will not help much. + +A 32bit hosted compiler is unlikely to work due to the memory requirements. +You can however build a kernel targeted at 32bit on a 64bit host. + +FAQs: +----- + +* I get a section type attribute conflict + + Usually because of someone doing const __initdata (should be + const __initconst) or const __read_mostly (should be just const). Check + both symbols reported by gcc. + +References: +----------- + +* Presentation on Kernel LTO + (note, performance numbers/details totally outdated.) + + http://halobates.de/kernel-lto.pdf + +* Generic gcc LTO: + + * http://www.ucw.cz/~hubicka/slides/labs2013.pdf + * http://www.hipeac.net/system/files/barcelona.pdf + +* Somewhat outdated too (from GCC site): + + * http://gcc.gnu.org/projects/lto/lto.pdf + * http://gcc.gnu.org/projects/lto/whopr.pdf + +Happy Link-Time-Optimizing! + +Andi Kleen diff --git a/Kbuild b/Kbuild index 464b34a08f51..40744d76d416 100644 --- a/Kbuild +++ b/Kbuild @@ -11,6 +11,8 @@ bounds-file := include/generated/bounds.h targets := kernel/bounds.s +kernel/bounds.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + $(bounds-file): kernel/bounds.s FORCE $(call filechk,offsets,__LINUX_BOUNDS_H__) @@ -30,6 +32,7 @@ offsets-file := include/generated/asm-offsets.h targets += arch/$(SRCARCH)/kernel/asm-offsets.s arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file) +arch/$(SRCARCH)/kernel/asm-offsets.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) $(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE $(call filechk,offsets,__ASM_OFFSETS_H__) diff --git a/Makefile b/Makefile index 473dca4da560..8665628651a5 100644 --- a/Makefile +++ b/Makefile @@ -493,6 +493,7 @@ KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) # Make variables (CC, etc...) CPP = $(CC) -E +LDFINAL = $(LD) ifneq ($(LLVM),) CC = $(LLVM_PREFIX)clang$(LLVM_SUFFIX) LD = $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX) @@ -615,7 +616,7 @@ export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX -export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD +export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD LDFINAL export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS @@ -1011,7 +1012,7 @@ endif endif endif -ifdef CONFIG_LTO +ifdef CONFIG_LTO_CLANG KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO) KBUILD_AFLAGS += -fno-lto export CC_FLAGS_LTO @@ -1099,6 +1100,7 @@ include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan include-$(CONFIG_KCOV) += scripts/Makefile.kcov include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct +include-$(CONFIG_LTO_GCC) += scripts/Makefile.lto include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins include $(addprefix $(srctree)/, $(include-y)) diff --git a/arch/Kconfig b/arch/Kconfig index 12e3ddabac9d..4c6edfdf3d58 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -699,6 +699,21 @@ config HAS_LTO_CLANG The compiler and Kconfig options support building with Clang's LTO. +config ARCH_SUPPORTS_LTO_GCC + bool + +# Some ar versions leak file descriptors when using the LTO +# plugin and cause strange errors when ulimit -n is too low. +# Pick an arbitrary threshold, which should be enough for most +# kernel configs. This was a regression that is only +# in some transient binutils version, so either older or +# new enough is ok. +# This might not be the exact range with this bug. +config BAD_AR + depends on LD_VERSION = 23000 + depends on $(shell,ulimit -n) < 4000 + def_bool y + choice prompt "Link Time Optimization (LTO)" default LTO_NONE @@ -746,8 +761,45 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_GCC + bool "gcc LTO" + depends on ARCH_SUPPORTS_LTO_GCC && CC_IS_GCC + depends on GCC_VERSION >= 100300 + depends on LD_VERSION >= 22700 + depends on !BAD_AR + select LTO + help + Enable whole program (link time) optimizations (LTO) for the whole + kernel and each module. This usually increases compile time, + especially for incremential builds, but tends to generate better code + as well as some global checks. + + It allows the compiler to inline functions between different files + and do other global optimization, like propagating constants between + functions, determine side effects of functions, avoid unnecessary + register saving around functions, or optimize unused function + arguments. It also allows the compiler to drop unused functions. + + With this option the compiler will also do some global checking over + different source files. + + This requires a gcc 10.3 or later compiler and binutils >= 2.27. + + On larger non modular configurations this may need more than 4GB of + RAM for the link phase, as well as a 64bit host compiler. + + For more information see Documentation/kbuild/lto-build.rst endchoice +config LTO_CP_CLONE + bool "Allow aggressive cloning for function specialization" + depends on LTO_GCC + help + Allow the compiler to clone and specialize functions for specific + arguments when it determines these arguments are commonly + called. Experimential. Will increase text size. + config ARCH_SUPPORTS_CFI_CLANG bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3604074a878b..55efa83e4806 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -114,6 +114,7 @@ config X86 select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_SUPPORTS_LTO_GCC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS @@ -178,7 +179,9 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS + # LTO can move assembler to different files, so all + # the init functions would need to be global for this to work + select HAVE_ARCH_PREL32_RELOCATIONS if !LTO_GCC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 6589ddd4cfaf..987ae727cf9f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -187,7 +187,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) leal boot_heap@GOTOFF(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call extract_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp /* diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index a75712991df3..03c4328a88cb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -569,7 +569,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) movl input_len(%rip), %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movl output_len(%rip), %r9d /* decompressed length, end of relocs */ - call extract_kernel /* returns kernel location in %rax */ + call extract_kernel /* returns kernel entry point in %rax */ popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cf690d8712f4..2548d7fb243e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -287,16 +287,15 @@ static void parse_elf(void *output) Elf32_Phdr *phdrs, *phdr; #endif void *dest; + size_t off; int i; memcpy(&ehdr, output, sizeof(ehdr)); if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -305,6 +304,7 @@ static void parse_elf(void *output) error("Failed to allocate space for phdrs"); memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + off = ehdr.e_entry - phdrs->p_paddr; for (i = 0; i < ehdr.e_phnum; i++) { phdr = &phdrs[i]; @@ -328,6 +328,8 @@ static void parse_elf(void *output) } free(phdrs); + + return off; } /* @@ -356,6 +358,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; unsigned long needed_size; + size_t off; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -456,14 +459,15 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); - parse_elf(output); + off = parse_elf(output); + debug_putaddr(off); handle_relocations(output, output_len, virt_addr); debug_putstr("done.\nBooting the kernel.\n"); /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output; + return output + off; } void fortify_panic(const char *name) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 838613ac15b8..250c764cd0f9 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,8 @@ # Building vDSO images for x86. # +KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index d6f3703e4119..78007a1be300 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -643,7 +643,7 @@ static inline void amd_pmu_ack_global_status(u64 status) wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } -static bool amd_pmu_test_overflow_topbit(int idx) +__visible_on_lto bool amd_pmu_test_overflow_topbit(int idx) { u64 counter; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..34032041b426 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -362,12 +362,12 @@ extern struct apic *apic; * to enforce the order with in them. */ #define apic_driver(sym) \ - static const struct apic *__apicdrivers_##sym __used \ + static const struct apic *__apicdrivers_##sym __used __noreorder \ __aligned(sizeof(struct apic *)) \ __section(".apicdrivers") = { &sym } #define apic_drivers(sym1, sym2) \ - static struct apic *__apicdrivers_##sym1##sym2[2] __used \ + static struct apic *__apicdrivers_##sym1##sym2[2] __used __noreorder \ __aligned(sizeof(struct apic *)) \ __section(".apicdrivers") = { &sym1, &sym2 } diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 2d13f25b1bd8..23810d573170 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -107,13 +107,13 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); -extern asmlinkage void preempt_schedule_thunk(void); +extern __visible asmlinkage void preempt_schedule_thunk(void); #define preempt_schedule_dynamic_enabled preempt_schedule_thunk #define preempt_schedule_dynamic_disabled NULL extern asmlinkage void preempt_schedule_notrace(void); -extern asmlinkage void preempt_schedule_notrace_thunk(void); +extern __visible asmlinkage void preempt_schedule_notrace_thunk(void); #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk #define preempt_schedule_notrace_dynamic_disabled NULL diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7d8c3cbde368..7dd332b59b99 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1268,11 +1268,12 @@ extern struct paravirt_patch_site __start_parainstructions[], * convention such that we can 'call' it from assembly. */ -extern void int3_magic(unsigned int *ptr); /* defined in asm */ +extern __visible void int3_magic(unsigned int *ptr); /* defined in asm */ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" +" .globl int3_magic\n" "int3_magic:\n" ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" @@ -1281,7 +1282,7 @@ asm ( " .popsection\n" ); -extern void int3_selftest_ip(void); /* defined in asm below */ +extern __visible void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9cfca3d7d0e2..c3c53844fcdf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -202,7 +202,7 @@ static const struct cpu_dev default_cpu = { static const struct cpu_dev *this_cpu = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 /* * We need valid kernel segments for data and code in long mode too diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 327757afb027..b10558284600 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -103,7 +103,7 @@ unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -static u64 native_steal_clock(int cpu) +__visible_on_lto u64 native_steal_clock(int cpu) { return 0; } diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fcc..554da8aabfc7 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -64,7 +64,7 @@ struct snp_cpuid_table { static u16 ghcb_version __ro_after_init; /* Copy of the SNP firmware's CPUID page. */ -static struct snp_cpuid_table cpuid_table_copy __ro_after_init; +__visible_on_lto struct snp_cpuid_table cpuid_table_copy __ro_after_init; /* * These will be initialized based on CPUID table so that non-present diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index bab8a98080cf..af90d8157210 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -215,7 +215,7 @@ static void __init cyc2ns_init_secondary_cpus(void) /* * Scheduler clock - returns current time in nanosec units. */ -u64 native_sched_clock(void) +__visible u64 native_sched_clock(void) { if (static_branch_likely(&__use_tsc)) { u64 tsc_now = rdtsc(); diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index a29b64befb93..d8daeb1fc883 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -6,13 +6,13 @@ #undef memset #undef memmove -__visible void *memcpy(void *to, const void *from, size_t n) +__used __visible void *memcpy(void *to, const void *from, size_t n) { return __memcpy(to, from, n); } EXPORT_SYMBOL(memcpy); -__visible void *memset(void *s, int c, size_t count) +__used __visible void *memset(void *s, int c, size_t count) { return __memset(s, c, count); } diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 17f09dc26381..c00dc09d6fe4 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -60,6 +60,8 @@ ifdef CONFIG_CFI_CLANG PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_LTO) + CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index a0b491ae2de8..47b8b500cf15 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -10,6 +10,7 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n +KBUILD_CFLAGS += $(DISABLE_LTO_GCC) subdir- := rm diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 204e39006dda..0f4c48871896 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -62,7 +62,7 @@ static struct cpufreq_driver amd_pstate_driver; static int cppc_load __initdata; -static inline int pstate_enable(bool enable) +__visible_on_lto int do_amd_pstate_enable(bool enable) { return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); } @@ -80,14 +80,14 @@ static int cppc_enable(bool enable) return ret; } -DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); +DEFINE_STATIC_CALL(amd_pstate_enable, do_amd_pstate_enable); static inline int amd_pstate_enable(bool enable) { return static_call(amd_pstate_enable)(enable); } -static int pstate_init_perf(struct amd_cpudata *cpudata) +__visible_on_lto int do_amd_pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; u32 highest_perf; @@ -138,15 +138,16 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) return 0; } -DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); +DEFINE_STATIC_CALL(amd_pstate_init_perf, do_amd_pstate_init_perf); static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) { return static_call(amd_pstate_init_perf)(cpudata); } -static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) +__visible_on_lto void do_amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, u32 max_perf, + bool fast_switch) { if (fast_switch) wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); @@ -168,7 +169,7 @@ static void cppc_update_perf(struct amd_cpudata *cpudata, cppc_set_perf(cpudata->cpu, &perf_ctrls); } -DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); +DEFINE_STATIC_CALL(amd_pstate_update_perf, do_amd_pstate_update_perf); static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 152dd33bb223..006a04592c8f 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -145,7 +145,7 @@ void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) } /* return true when a vcpu could run but has no real cpu to run on */ -bool xen_vcpu_stolen(int vcpu) +__visible bool xen_vcpu_stolen(int vcpu) { return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a94219e9916f..a29a4dfd7d36 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -96,7 +96,7 @@ * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3a4f7cd882ca..535e7d599264 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -37,7 +37,7 @@ struct btf_id_set8 { #define ____BTF_ID(symbol, word) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -".local " #symbol " ; \n" \ +"." __global_on_lto " " #symbol " ; \n" \ ".type " #symbol ", STT_OBJECT; \n" \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ @@ -83,16 +83,16 @@ word \ #define __BTF_ID_LIST(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " " #name "; \n" \ +"." scope " " #name "; \n" \ #name ":; \n" \ ".popsection; \n"); #define BTF_ID_LIST(name) \ -__BTF_ID_LIST(name, local) \ +__BTF_ID_LIST(name, __global_on_lto) \ extern u32 name[]; #define BTF_ID_LIST_GLOBAL(name, n) \ -__BTF_ID_LIST(name, globl) +__BTF_ID_LIST(name, "globl") /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with * a single entry. @@ -142,18 +142,18 @@ asm( \ #define __BTF_SET_START(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " __BTF_ID__set__" #name "; \n" \ +"." scope " __BTF_ID__set__" #name "; \n" \ "__BTF_ID__set__" #name ":; \n" \ ".zero 4 \n" \ ".popsection; \n"); #define BTF_SET_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET_START(name, local) +__BTF_ID_LIST(name, __global_on_lto) \ +__BTF_SET_START(name, __global_on_lto) #define BTF_SET_START_GLOBAL(name) \ -__BTF_ID_LIST(name, globl) \ -__BTF_SET_START(name, globl) +__BTF_ID_LIST(name, "globl") \ +__BTF_SET_START(name, "globl") #define BTF_SET_END(name) \ asm( \ @@ -186,14 +186,14 @@ extern struct btf_id_set name; #define __BTF_SET8_START(name, scope) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ -"." #scope " __BTF_ID__set8__" #name "; \n" \ +"." scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ ".zero 8 \n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_ID_LIST(name, __global_on_lto) \ +__BTF_SET8_START(name, __global_on_lto) #define BTF_SET8_END(name) \ asm( \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 947a60b801db..8b5399b121d3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -133,6 +133,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ +#ifdef CONFIG_LTO_GCC +# define __visible_on_lto __visible +# define __global_on_lto "globl" +#else +# define __visible_on_lto static +# define __global_on_lto "local" +#endif + #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a..09cf8eebcb0d 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -229,7 +229,12 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes * clang: https://clang.llvm.org/docs/AttributeReference.html#flatten */ +#ifndef CONFIG_LTO_GCC # define __flatten __attribute__((flatten)) +#else +/* Causes very large memory use with gcc in LTO mode */ +# define __flatten +#endif /* * Note the missing underscores. @@ -379,4 +384,14 @@ */ #define __fix_address noinline __noclone +/* + * https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes + */ + +#if __has_attribute(__no_reorder__) +#define __noreorder __attribute__((no_reorder)) +#else +#define __noreorder +#endif + #endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/export.h b/include/linux/export.h index 3f31ced0d977..3cb5f85327da 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -85,11 +85,13 @@ struct kernel_symbol { */ #define ___EXPORT_SYMBOL(sym, sec, ns) \ extern typeof(sym) sym; \ - extern const char __kstrtab_##sym[]; \ - extern const char __kstrtabns_##sym[]; \ + extern const char __visible __kstrtab_##sym[]; \ + extern const char __visible __kstrtabns_##sym[]; \ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \ + " .globl __kstrtab_" #sym " \n" \ "__kstrtab_" #sym ": \n" \ " .asciz \"" #sym "\" \n" \ + " .globl __kstrtabns_" #sym " \n" \ "__kstrtabns_" #sym ": \n" \ " .asciz \"" ns "\" \n" \ " .previous \n"); \ diff --git a/include/linux/init.h b/include/linux/init.h index c5fe6d26f5b1..b069a0e1ca5c 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -249,7 +249,7 @@ extern bool initcall_debug; static_assert(__same_type(initcall_t, &fn)); #else #define ____define_initcall(fn, __unused, __name, __sec) \ - static initcall_t __name __used \ + static initcall_t __name __used __noreorder \ __attribute__((__section__(__sec))) = fn; #endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5c8865bb59d9..7ef0f43b820d 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -23,17 +23,17 @@ #endif #ifndef cond_syscall -#define cond_syscall(x) asm( \ - ".weak " __stringify(x) "\n\t" \ - ".set " __stringify(x) "," \ - __stringify(sys_ni_syscall)) +#define cond_syscall(x) \ + extern long x(void) __attribute__((alias("sys_ni_syscall"), weak)); #endif #ifndef SYSCALL_ALIAS -#define SYSCALL_ALIAS(alias, name) asm( \ - ".globl " __stringify(alias) "\n\t" \ - ".set " __stringify(alias) "," \ - __stringify(name)) +#define SYSCALL_ALIAS(a, name) \ + long a(void) __attribute__((alias(__stringify(name)))) +#define SYSCALL_ALIAS_PROTO(a, name) \ + typeof(a) a __attribute__((alias(__stringify(name)))) +#else +#define SYSCALL_ALIAS_PROTO(a, name) SYSCALL_ALIAS(a, name) #endif #define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 141e6b176a1b..21c84fcbcfc9 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -184,7 +184,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ .type = 1, \ }; \ @@ -192,7 +192,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = NULL, \ .type = 1, \ }; \ @@ -200,7 +200,7 @@ extern long __static_call_return0(void); #define DEFINE_STATIC_CALL_RET0(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = __static_call_return0, \ .type = 1, \ }; \ @@ -229,14 +229,14 @@ static inline int static_call_init(void) { return 0; } #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = NULL, \ }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) @@ -290,7 +290,7 @@ static inline long __static_call_return0(void) #define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ + __visible struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func_init, \ } diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4b33b95eb8be..1ce0655f0c9c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -239,7 +239,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ - extern int __traceiter_##name(data_proto); \ + extern __visible int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ @@ -306,7 +306,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ - int __traceiter_##_name(void *__data, proto) \ + __visible int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba3fff17e2f9..1aa0d602c2c8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1642,7 +1642,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * * Return: whatever value is in %BPF_R0 at program exit */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) +static u64 __noreorder ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 846add8394c4..13c1a7a0e8ce 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -378,7 +378,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) return ret; } -void raw_irqentry_exit_cond_resched(void) +__visible void raw_irqentry_exit_cond_resched(void) { if (!preempt_count()) { /* Sanity check RCU and thread stack */ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 83f499182c9a..4a6e6e2392f4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -167,7 +167,7 @@ static bool cleanup_symbol_name(char *s) { char *res; - if (!IS_ENABLED(CONFIG_LTO_CLANG)) + if (!IS_ENABLED(CONFIG_LTO)) return false; /* diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 53d51ed619a3..22699adc39a6 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -12,6 +12,7 @@ config LIVEPATCH depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH depends on !TRIM_UNUSED_KSYMS + depends on !LTO_GCC # not supported in gcc help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0fb95c98b84e..ad58eba41f6c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2246,6 +2246,7 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) */ #define DEFINE_SCHED_CLASS(name) \ const struct sched_class name##_sched_class \ + __noreorder \ __aligned(__alignof__(struct sched_class)) \ __section("__" #name "_sched_class") diff --git a/kernel/softirq.c b/kernel/softirq.c index 5db2afd0be68..99b82c9492ea 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -646,7 +646,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ -void irq_enter_rcu(void) +__visible void irq_enter_rcu(void) { __irq_enter_raw(); @@ -699,7 +699,7 @@ static inline void __irq_exit_rcu(void) * * Also processes softirqs if needed and possible. */ -void irq_exit_rcu(void) +__visible void irq_exit_rcu(void) { __irq_exit_rcu(); /* must be last! */ diff --git a/kernel/static_call.c b/kernel/static_call.c index e9c3e69f3837..9197fe86d8bd 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -long __static_call_return0(void) +__visible long __static_call_return0(void) { return 0; } diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c index 639397b5491c..ec0a9fd7a953 100644 --- a/kernel/static_call_inline.c +++ b/kernel/static_call_inline.c @@ -514,7 +514,7 @@ early_initcall(static_call_init); #ifdef CONFIG_STATIC_CALL_SELFTEST -static int func_a(int x) +__visible_on_lto int sc_func_a(int x) { return x+1; } @@ -524,7 +524,7 @@ static int func_b(int x) return x+2; } -DEFINE_STATIC_CALL(sc_selftest, func_a); +DEFINE_STATIC_CALL(sc_selftest, sc_func_a); static struct static_call_data { int (*func)(int); @@ -533,7 +533,7 @@ static struct static_call_data { } static_call_data [] __initdata = { { NULL, 2, 3 }, { func_b, 2, 4 }, - { func_a, 2, 3 } + { sc_func_a, 2, 3 } }; static int __init test_static_call_init(void) diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 90ea5f373e50..23e1a63adc2b 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -31,13 +31,21 @@ asmlinkage long sys_ni_posix_timers(void) } #ifndef SYS_NI -#define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers) +#define SYS_NI(name) SYSCALL_ALIAS_PROTO(sys_##name, sys_ni_posix_timers) #endif #ifndef COMPAT_SYS_NI -#define COMPAT_SYS_NI(name) SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers) +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS_PROTO(compat_sys_##name, sys_ni_posix_timers) #endif +/* + * This cannot go to SYS_NI() or SYSCALL_ALIAS_PROTO() due to gcc bug fixed in + * gcc >= 13 (cf. PR 97498). I wonder how is __SYSCALL_DEFINEx() able to work? + */ +__diag_push(); +__diag_ignore(GCC, 8, "-Wattribute-alias", "Alias to nonimplemented syscall"); + SYS_NI(timer_create); SYS_NI(timer_gettime); SYS_NI(timer_getoverrun); @@ -51,6 +59,8 @@ SYS_NI(clock_adjtime32); SYS_NI(alarm); #endif +__diag_pop(); + /* * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC * as it is easy to remain compatible with little code. CLOCK_BOOTTIME @@ -157,6 +167,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, which_clock); } +__diag_push(); +__diag_ignore(GCC, 8, "-Wattribute-alias", "Alias to nonimplemented syscall"); + #ifdef CONFIG_COMPAT COMPAT_SYS_NI(timer_create); #endif @@ -170,6 +183,8 @@ COMPAT_SYS_NI(setitimer); SYS_NI(timer_settime32); SYS_NI(timer_gettime32); +__diag_pop(); + SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock, struct old_timespec32 __user *, tp) { diff --git a/lib/Makefile b/lib/Makefile index 4d9461bfea42..3fc8235f94d0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -27,6 +27,8 @@ KASAN_SANITIZE_string.o := n CFLAGS_string.o += -fno-stack-protector endif +CFLAGS_string.o += $(DISABLE_LTO_GCC) + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ maple_tree.o idr.o extable.o irq_regs.o argv_split.o \ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index b076f597a378..9f9b24295ac6 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -152,7 +152,7 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) case 16: return memory_is_poisoned_16(addr); default: - BUILD_BUG(); + break; } } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 2fba1f51f042..482b188c0dd0 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -39,7 +39,7 @@ bool __kasan_check_write(const volatile void *p, unsigned int size) EXPORT_SYMBOL(__kasan_check_write); #undef memset -void *memset(void *addr, int c, size_t len) +__used void *memset(void *addr, int c, size_t len) { if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_)) return NULL; @@ -49,7 +49,7 @@ void *memset(void *addr, int c, size_t len) #ifdef __HAVE_ARCH_MEMMOVE #undef memmove -void *memmove(void *dest, const void *src, size_t len) +__used void *memmove(void *dest, const void *src, size_t len) { if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) @@ -60,7 +60,7 @@ void *memmove(void *dest, const void *src, size_t len) #endif #undef memcpy -void *memcpy(void *dest, const void *src, size_t len) +__used void *memcpy(void *dest, const void *src, size_t len) { if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a0d5c6cca76d..d68a9170d925 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -149,8 +149,8 @@ is-single-obj-m = $(and $(part-of-module),$(filter $@, $(obj-m)),y) # When a module consists of a single object, there is no reason to keep LLVM IR. # Make $(LD) covert LLVM IR to ELF here. -ifdef CONFIG_LTO_CLANG -cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) +ifdef CONFIG_LTO +cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LDFINAL) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) endif quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ @@ -169,7 +169,7 @@ ifdef CONFIG_MODVERSIONS # be compiled and linked to the kernel and/or modules. gen_symversions = \ - if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then \ + if $(NM) $@ 2>/dev/null | grep -q __kstrtab; then \ $(call cmd_gensymtypes_$(1),$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ >> $(dot-target).cmd; \ fi @@ -249,7 +249,7 @@ endef # Built-in and composite module parts $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE - $(call if_changed_rule,cc_o_c) + +$(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) # To make this rule robust against "Argument list too long" error, @@ -267,7 +267,8 @@ $(obj)/%.usyms: $(obj)/%.o FORCE $(call if_changed,undefined_syms) quiet_cmd_cc_lst_c = MKLST $@ - cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ + cmd_cc_lst_c = $(if $(CONFIG_LTO),$(warning Listing in LTO mode does not match final binary)) \ + $(CC) $(c_flags) -g -c -o $*.o $< && \ $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ System.map $(OBJDUMP) > $@ @@ -448,8 +449,8 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) -quiet_cmd_ld_multi_m = LD [M] $@ - cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool) +quiet_cmd_ld_multi_m = LDFINAL [M] $@ + cmd_ld_multi_m = $(LDFINAL) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool) define rule_ld_multi_m $(call cmd_and_savecmd,ld_multi_m) @@ -459,7 +460,7 @@ endef $(multi-obj-m): objtool-enabled := $(delay-objtool) $(multi-obj-m): part-of-module := y $(multi-obj-m): %.o: %.mod FORCE - $(call if_changed_rule,ld_multi_m) + +$(call if_changed_rule,ld_multi_m) $(call multi_depend, $(multi-obj-m), .o, -objs -y -m) # Add intermediate targets: diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f45507b22ac9..d59cb689660a 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -275,7 +275,7 @@ objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ $(if $(part-of-module), --module) -delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) +delay-objtool := $(or $(CONFIG_LTO),$(CONFIG_X86_KERNEL_IBT)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) diff --git a/scripts/Makefile.lto b/scripts/Makefile.lto new file mode 100644 index 000000000000..33ac0da2bb47 --- /dev/null +++ b/scripts/Makefile.lto @@ -0,0 +1,43 @@ +# +# Support for gcc link time optimization +# + +DISABLE_LTO_GCC := +export DISABLE_LTO_GCC + +ifdef CONFIG_LTO_GCC + CC_FLAGS_LTO_GCC := -flto + DISABLE_LTO_GCC := -fno-lto + + KBUILD_CFLAGS += ${CC_FLAGS_LTO_GCC} + + CC_FLAGS_LTO := -flto + export CC_FLAGS_LTO + + lto-flags-y := -flinker-output=nolto-rel -flto=jobserver + lto-flags-y += -fwhole-program + + lto-flags-$(CONFIG_LTO_CP_CLONE) += -fipa-cp-clone + + # allow extra flags from command line + lto-flags-y += ${LTO_EXTRA_CFLAGS} + + # For LTO we need to use gcc to do the linking, not ld + # directly. Use a wrapper to convert the ld command line + # to gcc + LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \ + ${lto-flags-y} + + # LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs + # it's easy to drive the machine OOM. Use the object directory + # instead for temporaries. + # This has the drawback that there might be some junk more visible + # after interrupted compilations, but you would have that junk + # there anyways in /tmp. + TMPDIR ?= $(objtree) + export TMPDIR + + # use plugin aware tools + AR = $(CROSS_COMPILE)gcc-ar + NM = $(CROSS_COMPILE)gcc-nm +endif # CONFIG_LTO_GCC diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index a30d5b08eee9..d932e26d691d 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -32,7 +32,7 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o += \ - $(LD) -r $(KBUILD_LDFLAGS) \ + $(LDFINAL) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -T scripts/module.lds -o $@ $(filter %.o, $^); \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb96844..8871e55f881b 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -26,7 +26,8 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link cmd_link_vmlinux = \ - $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)"; \ + $< "$(LD)" "$(LDFINAL)" "$(KBUILD_LDFLAGS)" \ + "$(LDFLAGS_vmlinux)"; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) targets += vmlinux diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0edfdb40364b..1f646b16aa70 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -44,9 +44,9 @@ objtool-args = $(vmlinux-objtool-args-y) --link # Link of vmlinux.o used for section mismatch analysis # --------------------------------------------------------------------------- -quiet_cmd_ld_vmlinux.o = LD $@ +quiet_cmd_ld_vmlinux.o = LDFINAL $@ cmd_ld_vmlinux.o = \ - $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ + $(LDFINAL) ${KBUILD_LDFLAGS} -r -o $@ \ $(addprefix -T , $(initcalls-lds)) \ --whole-archive vmlinux.a --no-whole-archive \ --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ @@ -58,7 +58,7 @@ define rule_ld_vmlinux.o endef vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE - $(call if_changed_rule,ld_vmlinux.o) + +$(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index f9553f60a14a..ab994b3bf6e2 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -45,7 +45,7 @@ def getsizes(file, format): if name == "linux_banner": continue if name == "vermagic": continue # statics and some other optimizations adds random .NUMBER - name = re_NUMBER.sub('', name) + name = re_NUMBER.sub('', name).replace(".lto_priv", "") sym[name] = sym.get(name, 0) + int(size, 16) return sym diff --git a/scripts/gcc-ld b/scripts/gcc-ld new file mode 100755 index 000000000000..13e85ece8d04 --- /dev/null +++ b/scripts/gcc-ld @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# run gcc with ld options +# used as a wrapper to execute link time optimizations +# yes virginia, this is not pretty + +ARGS="-nostdlib" + +for j in "$@" ; do + if [ "$j" = -v ] ; then + exec `$CC -print-prog-name=ld` -v + fi +done + +while [ "$1" != "" ] ; do + case "$1" in + -save-temps*|-m32|-m64) N="$1" ;; + -r) N="$1" ;; + -flinker-output*) N="$1" ;; + -[Wg]*) N="$1" ;; + -[olv]|-[Ofd]*|-nostdlib) N="$1" ;; + --end-group|--start-group|--whole-archive|--no-whole-archive|\ +--no-undefined|--hash-style*|--build-id*|--eh-frame-hdr|-Bsymbolic) + N="-Wl,$1" ;; + -[RTFGhIezcbyYu]*|\ +--script|--defsym|-init|-Map|--oformat|-rpath|\ +-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|-soname|\ +--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m|-z) + A="$1" ; shift ; N="-Wl,$A,$1" ;; + -[m]*) N="$1" ;; + -*) N="-Wl,$1" ;; + *) N="$1" ;; + esac + ARGS="$ARGS $N" + shift +done + +[ -n "$V" ] && echo >&2 $CC $ARGS + +exec $CC $ARGS diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf0..052545f9da7f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -29,8 +29,9 @@ set -e LD="$1" -KBUILD_LDFLAGS="$2" -LDFLAGS_vmlinux="$3" +LDFINAL="$2" +KBUILD_LDFLAGS="$3" +LDFLAGS_vmlinux="$4" is_enabled() { grep -q "^$1=y" include/config/auto.conf @@ -60,7 +61,7 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then + if is_enabled CONFIG_LTO || is_enabled CONFIG_X86_KERNEL_IBT; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= @@ -82,7 +83,7 @@ vmlinux_link() ldlibs="-lutil -lrt -lpthread" else wl= - ld="${LD}" + ld="${LDFINAL}" ldflags="${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}" ldlibs= fi diff --git a/scripts/mksysmap b/scripts/mksysmap index 16a08b8ef2f8..0f19a44ab136 100755 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -34,6 +34,7 @@ # U - undefined global symbols # N - debugging symbols # w - local weak symbols +# W - weak symbols if they contain .c. # readprofile starts reading symbols when _stext is found, and # continue until it finds a symbol which is not either of 'T', 't', @@ -57,4 +58,5 @@ $NM -n $1 | grep -v \ -e ' __kstrtab_' \ -e ' __kstrtabns_' \ -e ' L0$' \ + -e ' W .*\.c\.' \ > $2 diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index c9e38ad937fd..aa3465d6bc4a 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD := y CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) +CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO_GCC) hostprogs-always-y += modpost mk_elfconfig always-y += empty.o @@ -9,6 +10,8 @@ modpost-objs := modpost.o file2alias.o sumversion.o devicetable-offsets-file := devicetable-offsets.h +$(obj)/devicetable-offsets.s: KBUILD_CFLAGS += $(DISABLE_LTO_GCC) + $(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE $(call filechk,offsets,__DEVICETABLE_OFFSETS_H__) diff --git a/scripts/module.lds.S b/scripts/module.lds.S index bf5bcf2836d8..a6e6a1345fce 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -33,7 +33,7 @@ SECTIONS { __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif -#ifdef CONFIG_LTO_CLANG +#ifdef CONFIG_LTO /* * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and * -ffunction-sections, which increases the size of the final module. -- 2.39.0