From 718155e6164b4bec45bcba8814c3f82e84f36db0 Mon Sep 17 00:00:00 2001 From: graysky Date: Mon, 16 Sep 2024 14:47:03 -0400 FEATURES This patch adds additional tunings via new x86-64 ISA levels to the Linux kernel. These are selectable under: Processor type and features ---> x86-64 compiler ISA level • x86-64 A value of (1) is the default • x86-64-v2 A value of (2) brings support for vector instructions up to Streaming SIMD Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 (SSSE3), the POPCNT instruction, and CMPXCHG16B. • x86-64-v3 A value of (3) adds vector instructions up to AVX2, MOVBE, and additional bit-manipulation instructions. There is also x86-64-v4 but including this makes little sense as the kernel does not use any of the AVX512 instructions anyway. Users of glibc 2.33 and above can see which level is supported by running: /lib/ld-linux-x86-64.so.2 --help | grep supported Or /lib64/ld-linux-x86-64.so.2 --help | grep supported BENEFITS Small but real speed increases are measurable using a make endpoint comparing a generic kernel to one built with one of the respective microarchs. See the following experimental evidence supporting this statement: https://github.com/graysky2/kernel_compiler_patch?tab=readme-ov-file#benchmarks REQUIREMENTS linux version 6.8-rc3+ gcc version >=9.0 or clang version >=9.0 ACKNOWLEDGMENTS This patch builds on the seminal work by Jeroen.[2] REFERENCES 1. https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 2. http://www.linuxforge.net/docs/linux/linux-gcc.php --- arch/x86/Kconfig.cpu | 24 ++++++++++++++++++++++++ arch/x86/Makefile | 13 +++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2a7279d80460..121fd13fc283 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -308,6 +308,30 @@ config X86_GENERIC This is really intended for distributors who need more generic optimizations. +config X86_64_VERSION + int "x86-64 compiler ISA level" + range 1 3 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 && GENERIC_CPU + help + Specify a specific x86-64 compiler ISA level. + + There are three x86-64 ISA levels that work on top of + the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4. + + x86-64-v2 brings support for vector instructions up to Streaming SIMD + Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 + (SSSE3), the POPCNT instruction, and CMPXCHG16B. + + x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional + bit-manipulation instructions. + + x86-64-v4 is not included since the kernel does not use AVX512 instructions + + You can find the best version for your CPU by running one of the following: + /lib/ld-linux-x86-64.so.2 --help | grep supported + /lib64/ld-linux-x86-64.so.2 --help | grep supported + # # Define implied options from the CPU selection here config X86_INTERNODE_CACHE_SHIFT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index cd75e78a06c1..b60d6194d83e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -181,15 +181,20 @@ else cflags-$(CONFIG_MK8) += -march=k8 cflags-$(CONFIG_MPSC) += -march=nocona cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom - cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic + cflags-$(CONFIG_MATOM) += -march=bonnell + ifeq ($(CONFIG_X86_64_VERSION),1) + cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic + else + cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION) + rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) + endif KBUILD_CFLAGS += $(cflags-y) rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 - rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom - rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic + rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=bonnell KBUILD_RUSTFLAGS += $(rustflags-y) KBUILD_CFLAGS += -mno-red-zone -- 2.46.2