From 176a6d03f6157b9065d00af3d224b1e3706f7195 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 23 Jun 2015 01:26:52 -0500 Subject: [PATCH 01/27] i8042: decrease debug message level to info Author: Arjan van de Ven Signed-off-by: Miguel Bernal Marin Signed-off-by: Jose Carlos Venegas Munoz --- drivers/input/serio/i8042.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 0b9f1d0a8..fc5f6ac8d 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -617,7 +617,7 @@ static int i8042_enable_kbd_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_KBDINT; i8042_ctr |= I8042_CTR_KBDDIS; - pr_err("Failed to enable KBD port\n"); + pr_info("Failed to enable KBD port\n"); return -EIO; } @@ -636,7 +636,7 @@ static int i8042_enable_aux_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_AUXINT; i8042_ctr |= I8042_CTR_AUXDIS; - pr_err("Failed to enable AUX port\n"); + pr_info("Failed to enable AUX port\n"); return -EIO; } @@ -728,7 +728,7 @@ static int __init i8042_check_mux(void) i8042_ctr &= ~I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { - pr_err("Failed to disable AUX port, can't use MUX\n"); + pr_info("Failed to disable AUX port, can't use MUX\n"); return -EIO; } @@ -951,7 +951,7 @@ static int i8042_controller_selftest(void) do { if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { - pr_err("i8042 controller selftest timeout\n"); + pr_info("i8042 controller selftest timeout\n"); return -ENODEV; } @@ -973,7 +973,7 @@ static int i8042_controller_selftest(void) pr_info("giving up on controller selftest, continuing anyway...\n"); return 0; #else - pr_err("i8042 controller selftest failed\n"); + pr_info("i8042 controller selftest failed\n"); return -EIO; #endif } -- 2.34.1.75.gabe6bb3905 From 5991ad744c9e0f8a0b57b9e6409db99c6d5ea49f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 11 Jan 2016 10:01:44 -0600 Subject: [PATCH 02/27] increase the ext4 default commit age Both the VM and EXT4 have a "commit to disk after X seconds" time. Currently the EXT4 time is shorter than our VM time, which is a bit suboptional, it's better for performance to let the VM do the writeouts in bulk rather than something deep in the journalling layer. (DISTRO TWEAK -- NOT FOR UPSTREAM) Signed-off-by: Arjan van de Ven Signed-off-by: Jose Carlos Venegas Munoz --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c452..e70409a77 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -45,7 +45,7 @@ /* * The default maximum commit age, in seconds. */ -#define JBD2_DEFAULT_MAX_COMMIT_AGE 5 +#define JBD2_DEFAULT_MAX_COMMIT_AGE 30 #ifdef CONFIG_JBD2_DEBUG /* -- 2.34.1.75.gabe6bb3905 From 59315ff251707aeff58a839a1c841162d776072a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 14 Mar 2016 11:22:09 -0600 Subject: [PATCH 03/27] silence rapl --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 7c0099e7a..f0d653828 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1455,7 +1455,7 @@ static int __init rapl_init(void) id = x86_match_cpu(rapl_ids); if (!id) { - pr_err("driver does not support CPU family %d model %d\n", + pr_info("driver does not support CPU family %d model %d\n", boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; -- 2.34.1.75.gabe6bb3905 From 9943c0b6374540ffd42283cf897eb122f4b0b0e3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 14 Mar 2016 11:10:58 -0600 Subject: [PATCH 04/27] pci pme wakeups Reduce wakeups for PME checks, which are a workaround for miswired boards (sadly, too many of them) in laptops. --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a101faf3e..4d7422b42 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -60,7 +60,7 @@ struct pci_pme_device { struct pci_dev *dev; }; -#define PME_TIMEOUT 1000 /* How long between PME checks */ +#define PME_TIMEOUT 4000 /* How long between PME checks */ static void pci_dev_d3_sleep(struct pci_dev *dev) { -- 2.34.1.75.gabe6bb3905 From e3629b8ac0d717a90201027ca6a7963fc71c1a9f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 6 May 2019 12:57:09 -0500 Subject: [PATCH 05/27] ksm-wakeups reduce wakeups in ksm by adding rounding (aligning) when the sleep times are 1 second or longer Signed-off-by: Arjan van de Ven --- kernel/watchdog.c | 2 +- mm/ksm.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ad912511a..8ccb40284 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,7 +41,7 @@ unsigned long __read_mostly watchdog_enabled; int __read_mostly watchdog_user_enabled = 1; int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; int __read_mostly soft_watchdog_user_enabled = 1; -int __read_mostly watchdog_thresh = 10; +int __read_mostly watchdog_thresh = 40; static int __read_mostly nmi_watchdog_available; struct cpumask watchdog_cpumask __read_mostly; diff --git a/mm/ksm.c b/mm/ksm.c index a5716fdec..7d5ec0138 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2415,9 +2415,14 @@ static int ksm_scan_thread(void *nothing) if (ksmd_should_run()) { sleep_ms = READ_ONCE(ksm_thread_sleep_millisecs); - wait_event_interruptible_timeout(ksm_iter_wait, - sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs), - msecs_to_jiffies(sleep_ms)); + if (sleep_ms >= 1000) + wait_event_interruptible_timeout(ksm_iter_wait, + sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs), + msecs_to_jiffies(round_jiffies_relative(sleep_ms))); + else + wait_event_interruptible_timeout(ksm_iter_wait, + sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs), + msecs_to_jiffies(sleep_ms)); } else { wait_event_freezable(ksm_thread_wait, ksmd_should_run() || kthread_should_stop()); -- 2.34.1.75.gabe6bb3905 From 76b34371342d503a1bd794c1806cfca68e8ef91c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 19 Mar 2016 21:32:19 -0400 Subject: [PATCH 06/27] intel_idle: tweak cpuidle cstates Increase target_residency in cpuidle cstate Tune intel_idle to be a bit less agressive; Clear linux is cleaner in hygiene (wakupes) than the average linux, so we can afford changing these in a way that increases performance while keeping power efficiency --- drivers/idle/intel_idle.c | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e6c543b5e..b73df64b2 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -502,7 +502,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -510,7 +510,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, - .target_residency = 100, + .target_residency = 900, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -518,7 +518,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -526,7 +526,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 1500, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -534,7 +534,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 2000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -542,7 +542,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 5000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -550,7 +550,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -570,7 +570,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -578,7 +578,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, - .target_residency = 100, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -586,7 +586,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -594,7 +594,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 2000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -602,7 +602,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 4000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -610,7 +610,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 7000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -618,7 +618,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -639,7 +639,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -647,7 +647,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 70, - .target_residency = 100, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -655,7 +655,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 85, - .target_residency = 200, + .target_residency = 600, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -663,7 +663,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x33", .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 124, - .target_residency = 800, + .target_residency = 3000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -671,7 +671,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, - .target_residency = 800, + .target_residency = 3200, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -679,7 +679,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 480, - .target_residency = 5000, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -687,7 +687,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 890, - .target_residency = 5000, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -708,7 +708,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 300, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -- 2.34.1.75.gabe6bb3905 From 0a04094675c90a50dbc6f9ef8515e13900f30abd Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Mon, 30 Aug 2021 13:08:40 +0200 Subject: [PATCH 07/27] port: print fsync count for bootchart Signed-off-by: Piotr Gorski --- block/blk-core.c | 3 +++ include/linux/sched.h | 1 + kernel/sched/debug.c | 1 + 3 files changed, 5 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index c2d912d0c..14f363d28 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1077,6 +1077,9 @@ blk_qc_t submit_bio(struct bio *bio) } else { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); + + if (bio->bi_opf & REQ_PREFLUSH) + current->fsync_count++; } } diff --git a/include/linux/sched.h b/include/linux/sched.h index c1a927dde..1dc9b2fda 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1043,6 +1043,7 @@ struct task_struct { /* Cached requested key. */ struct key *cached_requested_key; #endif + int fsync_count; /* * executable name, excluding path. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 17a653b67..76e541860 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -960,6 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, PN(se.exec_start); PN(se.vruntime); PN(se.sum_exec_runtime); + P(fsync_count); nr_switches = p->nvcsw + p->nivcsw; -- 2.34.1.75.gabe6bb3905 From 885c57359319961fc1a3161c2f4644a65a78c122 Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Mon, 28 Jun 2021 13:52:04 +0200 Subject: [PATCH 08/27] bootstats: add printk's to measure boot time in more detail Signed-off-by: Piotr Gorski --- arch/x86/kernel/alternative.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e9da3dc71..386747fa3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -646,7 +646,9 @@ void __init alternative_instructions(void) * Then patch alternatives, such that those paravirt calls that are in * alternatives can be overwritten by their immediate fragments. */ + printk("clr: Applying alternatives\n"); apply_alternatives(__alt_instructions, __alt_instructions_end); + printk("clr: Applying alternatives done\n"); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ -- 2.34.1.75.gabe6bb3905 From 83b90790ddc0570be27c81ed45613802ba3e8096 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 11 Feb 2015 17:28:14 -0600 Subject: [PATCH 09/27] smpboot: reuse timer calibration NO point recalibrating for known-constant tsc ... saves 200ms+ of boot time. --- arch/x86/kernel/tsc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a69819637..5f3ee7c31 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1569,6 +1569,9 @@ unsigned long calibrate_delay_is_known(void) if (!constant_tsc || !mask) return 0; + if (cpu != 0) + return cpu_data(0).loops_per_jiffy; + sibling = cpumask_any_but(mask, cpu); if (sibling < nr_cpu_ids) return cpu_data(sibling).loops_per_jiffy; -- 2.34.1.75.gabe6bb3905 From 1a02d62bda6c5bc8e74c4f8be93b3d03ad67f0d6 Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Mon, 1 Nov 2021 13:07:50 +0100 Subject: [PATCH 10/27] port: initialize ata before graphics Signed-off-by: Piotr Gorski --- drivers/Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index be5d40ae1..5cf1c4a93 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -60,15 +60,8 @@ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers obj-y += iommu/ -# gpu/ comes after char for AGP vs DRM startup and after iommu -obj-y += gpu/ - obj-$(CONFIG_CONNECTOR) += connector/ -# i810fb and intelfb depend on char/agp/ -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ - obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ @@ -80,6 +73,14 @@ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ + +# gpu/ comes after char for AGP vs DRM startup and after iommu +obj-y += gpu/ + +# i810fb and intelfb depend on char/agp/ +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ +obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ + obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ -- 2.34.1.75.gabe6bb3905 From 6479841047e209c7db1d8d12954c1b45ee4e70d7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 29 Jul 2016 19:10:52 +0000 Subject: [PATCH 11/27] give rdrand some credit try to credit rdrand/rdseed with some entropy In VMs but even modern hardware, we're super starved for entropy, and while we can and do wear a tin foil hat, it's very hard to argue that rdrand and rdtsc add zero entropy. --- drivers/char/random.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 605969ed0..1cab72339 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1757,6 +1757,8 @@ static void __init init_std_data(struct entropy_store *r) if (!arch_get_random_seed_long(&rv) && !arch_get_random_long(&rv)) rv = random_get_entropy(); + else + credit_entropy_bits(r, 1); mix_pool_bytes(r, &rv, sizeof(rv)); } mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); -- 2.34.1.75.gabe6bb3905 From c372b820dc372d738536a3f0ee7ac9eb06761763 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2017 15:34:09 +0000 Subject: [PATCH 12/27] ipv4/tcp: allow the memory tuning for tcp to go a little bigger than default --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 844c6e5a8..35184b99f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4588,8 +4588,8 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_wshare = min(4UL*1024*1024, limit); - max_rshare = min(6UL*1024*1024, limit); + max_wshare = min(16UL*1024*1024, limit); + max_rshare = min(16UL*1024*1024, limit); init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; -- 2.34.1.75.gabe6bb3905 From de720c2bd1f3ee3135bafe7284e6cd981f6b1429 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 17 May 2017 01:52:11 +0000 Subject: [PATCH 13/27] init: wait for partition and retry scan As Clear Linux boots fast the device is not ready when the mounting code is reached, so a retry device scan will be performed every 0.5 sec for at least 40 sec and synchronize the async task. Signed-off-by: Miguel Bernal Marin --- init/do_mounts.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/init/do_mounts.c b/init/do_mounts.c index 762b53497..107b96927 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -284,8 +284,18 @@ dev_t name_to_dev_t(const char *name) if (strcmp(name, "/dev/ram") == 0) return Root_RAM0; #ifdef CONFIG_BLOCK - if (strncmp(name, "PARTUUID=", 9) == 0) - return devt_from_partuuid(name + 9); + if (strncmp(name, "PARTUUID=", 9) == 0) { + dev_t res; + int needtowait = 40<<1; + res = devt_from_partuuid(name + 9); + while (!res && needtowait) { + /* waiting 0.5 sec */ + msleep(500); + res = devt_from_partuuid(name + 9); + needtowait--; + } + return res; + } if (strncmp(name, "PARTLABEL=", 10) == 0) return devt_from_partlabel(name + 10); if (strncmp(name, "/dev/", 5) == 0) @@ -613,7 +623,9 @@ void __init prepare_namespace(void) * For example, it is not atypical to wait 5 seconds here * for the touchpad of a laptop to initialize. */ + async_synchronize_full(); wait_for_device_probe(); + async_synchronize_full(); md_run_setup(); -- 2.34.1.75.gabe6bb3905 From 514799622712589cc49ce138db401296a41fbfaf Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Mon, 28 Jun 2021 13:53:57 +0200 Subject: [PATCH 14/27] add boot option to allow unsigned modules Signed-off-by: Piotr Gorski --- kernel/module.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 5c26a76e8..8948aef61 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "module-internal.h" @@ -270,6 +271,10 @@ static void module_assert_mutex_or_preempt(void) #ifdef CONFIG_MODULE_SIG static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); +/* Allow disabling module signature requirement by adding boot param */ +static bool sig_unenforce = false; +module_param(sig_unenforce, bool_enable_only, 0644); + void set_module_sig_enforced(void) { @@ -416,6 +421,8 @@ extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const s32 __start___kcrctab[]; extern const s32 __start___kcrctab_gpl[]; +extern struct boot_params boot_params; + #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL #else @@ -4624,6 +4631,19 @@ static const struct proc_ops modules_proc_ops = { static int __init proc_modules_init(void) { proc_create("modules", 0, NULL, &modules_proc_ops); + +#ifdef CONFIG_MODULE_SIG_FORCE + switch (boot_params.secure_boot) { + case efi_secureboot_mode_unset: + case efi_secureboot_mode_unknown: + case efi_secureboot_mode_disabled: + /* + * sig_unenforce is only applied if SecureBoot is not + * enabled. + */ + sig_enforce = !sig_unenforce; + } +#endif return 0; } module_init(proc_modules_init); -- 2.34.1.75.gabe6bb3905 From 5e0bc283fa3e6e712271a2b48cf2f1fdc932abf0 Mon Sep 17 00:00:00 2001 From: William Douglas Date: Wed, 20 Jun 2018 17:23:21 +0000 Subject: [PATCH 15/27] enable stateless firmware loading Prefer the order of specific version before generic and /etc before /lib to enable the user to give specific overrides for generic firmware and distribution firmware. --- drivers/base/firmware_loader/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index ef904b8b1..7ae710715 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -467,6 +467,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv, static char fw_path_para[256]; static const char * const fw_path[] = { fw_path_para, + "/etc/firmware/" UTS_RELEASE, + "/etc/firmware", "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, -- 2.34.1.75.gabe6bb3905 From 0e21c02991654fe5a206d25fd9d8d9366da8261a Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Thu, 2 Aug 2018 12:03:22 -0700 Subject: [PATCH 16/27] migrate some systemd defaults to the kernel defaults. These settings are needed to prevent networking issues when the networking modules come up by default without explicit settings, which breaks some cases. We don't want the modprobe settings to be read at boot time if we're not going to do anything else ever. --- drivers/net/dummy.c | 2 +- include/uapi/linux/if_bonding.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index f82ad7419..5e8faa70a 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -43,7 +43,7 @@ #define DRV_NAME "dummy" -static int numdummies = 1; +static int numdummies = 0; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index d174914a8..bf8e2af10 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -82,7 +82,7 @@ #define BOND_STATE_ACTIVE 0 /* link is active */ #define BOND_STATE_BACKUP 1 /* link is backup */ -#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ +#define BOND_DEFAULT_MAX_BONDS 0 /* Default maximum number of devices to support */ #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ -- 2.34.1.75.gabe6bb3905 From 9a7ccfae54fcd3174e2509b1830c0b521c03ea45 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 10 Mar 2016 15:11:28 +0000 Subject: [PATCH 17/27] xattr: allow setting user.* attributes on symlinks by owner Kvmtool and clear containers supports using user attributes to label host files with the virtual uid/guid of the file in the container. This allows an end user to manage their files and a complete uid space without all the ugly namespace stuff. The one gap in the support is symlinks because an end user can change the ownership of a symbolic link. We support attributes on these files as you can already (as root) set security attributes on them. The current rules seem slightly over-paranoid and as we have a use case this patch enables updating the attributes on a symbolic link IFF you are the owner of the synlink (as permissions are not usually meaningful on the link itself). Signed-off-by: Alan Cox --- fs/xattr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 5c8c5175b..0f98df2ea 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -120,16 +120,17 @@ xattr_permission(struct user_namespace *mnt_userns, struct inode *inode, } /* - * In the user.* namespace, only regular files and directories can have - * extended attributes. For sticky directories, only the owner and - * privileged users can write attributes. + * In the user.* namespace, only regular files, symbolic links, and + * directories can have extended attributes. For symbolic links and + * sticky directories, only the owner and privileged users can write + * attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) return (mask & MAY_WRITE) ? -EPERM : -ENODATA; - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && - (mask & MAY_WRITE) && - !inode_owner_or_capable(mnt_userns, inode)) + if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) + || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) + && !inode_owner_or_capable(mnt_userns, inode)) return -EPERM; } -- 2.34.1.75.gabe6bb3905 From 6e83513d5e8446d53e52d89248c149c76f4766c9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 8 Dec 2018 18:21:32 +0000 Subject: [PATCH 18/27] use lfence instead of rep and nop --- arch/x86/include/asm/vdso/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h index 57b1a7034..e2c45674f 100644 --- a/arch/x86/include/asm/vdso/processor.h +++ b/arch/x86/include/asm/vdso/processor.h @@ -10,7 +10,7 @@ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static __always_inline void rep_nop(void) { - asm volatile("rep; nop" ::: "memory"); + asm volatile("lfence" ::: "memory"); } static __always_inline void cpu_relax(void) -- 2.34.1.75.gabe6bb3905 From 2aa8550e7bcaff7497ceb98e22c96e6374dd209f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 13 Dec 2018 01:00:49 +0000 Subject: [PATCH 19/27] do accept() in LIFO order for cache efficiency --- include/linux/wait.h | 2 ++ kernel/sched/wait.c | 24 ++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 93dab0e95..04d89a54c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); @@ -1137,6 +1138,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 76577d164..c33f48ed2 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -48,6 +48,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ } EXPORT_SYMBOL_GPL(add_wait_queue_priority); +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -283,6 +294,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent } EXPORT_SYMBOL(prepare_to_wait_exclusive); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + if (list_empty(&wq_entry->entry)) + __add_wait_queue(wq_head, wq_entry); + set_current_state(state); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { wq_entry->flags = flags; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7fea3a7c..cae47561e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -441,7 +441,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) -- 2.34.1.75.gabe6bb3905 From 8b44e64f5195a1d948acff9adc9e04a9c6232c51 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 18 Feb 2018 23:35:41 +0000 Subject: [PATCH 20/27] locking: rwsem: spin faster tweak rwsem owner spinning a bit --- kernel/locking/rwsem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e63f740c2..1f9dfa8dd 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -715,6 +715,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; + int i = 0; owner = rwsem_owner_flags(sem, &flags); state = rwsem_owner_state(owner, flags); @@ -748,7 +749,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - cpu_relax(); + if (i++ > 1000) + cpu_relax(); } rcu_read_unlock(); -- 2.34.1.75.gabe6bb3905 From a7d89f56df11196c7036e15625245ffce55133f1 Mon Sep 17 00:00:00 2001 From: Joe Konno Date: Tue, 25 Jun 2019 10:35:54 -0700 Subject: [PATCH 21/27] ata: libahci: ignore staggered spin-up Change libahci to ignore firmware's staggered spin-up flag. End-users who wish to honor firmware's SSS flag can add the following kernel parameter to a new file at /etc/kernel/cmdline.d/ignore_sss.conf: libahci.ignore_sss=0 And then run sudo clr-boot-manager update Signed-off-by: Joe Konno --- drivers/ata/libahci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 395772fa3..f2d841597 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -33,14 +33,14 @@ #include "libata.h" static int ahci_skip_host_reset; -int ahci_ignore_sss; +int ahci_ignore_sss=1; EXPORT_SYMBOL_GPL(ahci_ignore_sss); module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444); MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)"); module_param_named(ignore_sss, ahci_ignore_sss, int, 0444); -MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)"); +MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])"); static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, unsigned hints); -- 2.34.1.75.gabe6bb3905 From fccb98a51b999113d104c2c66960b7be0a429767 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 10 Aug 2019 03:19:04 +0000 Subject: [PATCH 22/27] print CPU that faults print cpu number when we print a crash --- arch/x86/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4bfed53e2..484aac28a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -776,9 +776,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i", loglvl, tsk->comm, task_pid_nr(tsk), address, - (void *)regs->ip, (void *)regs->sp, error_code); + (void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id()); print_vma_addr(KERN_CONT " in ", regs->ip); -- 2.34.1.75.gabe6bb3905 From 985d98a756ff90536d93ea8d780d7f5e17a0216f Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Sat, 2 Nov 2019 00:59:52 +0000 Subject: [PATCH 23/27] fix bug in ucode force reload revision check If force_ucode_load==true, reload ucode even if revision # is identical. --- arch/x86/kernel/cpu/microcode/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7e8e07bdd..1ef03ab31 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -772,7 +772,7 @@ static enum ucode_state apply_microcode_intel(int cpu) * already. */ rev = intel_get_microcode_revision(); - if (rev >= mc->hdr.rev) { + if (rev > mc->hdr.rev || (rev == mc->hdr.rev && !force_ucode_load)) { ret = UCODE_OK; goto out; } -- 2.34.1.75.gabe6bb3905 From fb6ba777d0d7f62b0f21dbf0c89a04162005b24d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 11 Nov 2019 23:12:11 +0000 Subject: [PATCH 24/27] nvme workaround --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f8dd664b2..a11ce1420 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -47,7 +47,7 @@ static u8 nvme_max_retries = 5; module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -static unsigned long default_ps_max_latency_us = 100000; +static unsigned long default_ps_max_latency_us = 200; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); -- 2.34.1.75.gabe6bb3905 From 0dcaeb6f5769511a5e1a756a45f40fd2791e0b3d Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Wed, 12 Feb 2020 22:47:12 +0000 Subject: [PATCH 25/27] don't report an error if PowerClamp run on other CPU --- drivers/thermal/intel/intel_powerclamp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index a5b58ea89..430f2b93e 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -647,6 +647,11 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = { .set_cur_state = powerclamp_set_cur_state, }; +static const struct x86_cpu_id amd_cpu[] = { + { X86_VENDOR_AMD }, + {}, +}; + static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), {} @@ -656,6 +661,11 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); static int __init powerclamp_probe(void) { + if (x86_match_cpu(amd_cpu)){ + pr_info("Intel PowerClamp does not support AMD CPUs\n"); + return -ENODEV; + } + if (!x86_match_cpu(intel_powerclamp_ids)) { pr_err("CPU does not support MWAIT\n"); return -ENODEV; -- 2.34.1.75.gabe6bb3905 From b635f403c03c35780a1ebacacf83a0632d6b9e7d Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Thu, 11 Jun 2020 22:30:56 +0200 Subject: [PATCH 26/27] Port microcode patches Signed-off-by: Piotr Gorski --- arch/x86/kernel/cpu/microcode/core.c | 9 ++++- arch/x86/kernel/cpu/microcode/intel.c | 57 +++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index efb69be41..10bf3d746 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -604,6 +604,7 @@ static int microcode_reload_late(void) atomic_set(&late_cpus_in, 0); atomic_set(&late_cpus_out, 0); + printk ("Going to do stop_machine\n"); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); if (ret == 0) microcode_check(); @@ -617,18 +618,23 @@ static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + extern bool force_ucode_load; enum ucode_state tmp_ret = UCODE_OK; int bsp = boot_cpu_data.cpu_index; unsigned long val; + bool orig_cmd_line = force_ucode_load; ssize_t ret = 0; ret = kstrtoul(buf, 0, &val); if (ret) return ret; - if (val != 1) + if (!val || val > 2) return size; + if (val == 2) + force_ucode_load = true; + cpus_read_lock(); ret = check_online_cpus(); @@ -644,6 +650,7 @@ static ssize_t reload_store(struct device *dev, mutex_unlock(µcode_mutex); put: + force_ucode_load = orig_cmd_line; cpus_read_unlock(); if (ret == 0) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1ef03ab31..92af2b9f1 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; +bool force_ucode_load = false; /* Current microcode patch used in early patching on the APs. */ static struct microcode_intel *intel_ucode_patch; @@ -94,8 +96,18 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev { struct microcode_header_intel *mc_hdr = mc; - if (mc_hdr->rev <= new_rev) + //if (mc_hdr->rev <= new_rev) + if (mc_hdr->rev < new_rev) { + printk ("Returning NO_NEW old = 0x%x new = 0x%x\n", + mc_hdr->rev, new_rev); return 0; + } + if ((mc_hdr->rev == new_rev) && !force_ucode_load) { + printk ("SAME REV: no_force Returning NO_NEW old = 0x%x new = 0x%x\n", + mc_hdr->rev, new_rev); + return 0; + } + printk ("ucode: force loading same rev\n"); return find_matching_signature(mc, csig, cpf); } @@ -550,11 +562,20 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) * already. */ rev = intel_get_microcode_revision(); - if (rev >= mc->hdr.rev) { + if (rev > mc->hdr.rev) { uci->cpu_sig.rev = rev; return UCODE_OK; } + if (rev == mc->hdr.rev) { + if (!force_ucode_load) { + printk ("Matching ucode rev, no update\n"); + return UCODE_OK; + } else { + printk ("Matching ucode rev.. force updating\n"); + } + } + /* * Writeback and invalidate caches before updating microcode to avoid * internal issues depending on what the microcode is updating. @@ -606,6 +627,29 @@ int __init save_microcode_in_initrd_intel(void) return 0; } +static bool check_force_ucode_bsp(void) +{ + static const char *__force_ucode_str = "force_ucode_load"; + +#ifdef CONFIG_X86_32 + const char *cmdline = (const char *)__pa_nodebug(boot_command_line); + const char *option = (const char *)__pa_nodebug(__force_ucode_str); + bool *res = (bool *)__pa_nodebug(&force_ucode_load); + +#else /* CONFIG_X86_64 */ + const char *cmdline = boot_command_line; + const char *option = __force_ucode_str; + bool *res = &force_ucode_load; +#endif + + if (cmdline_find_option_bool(cmdline, option)) { + printk("cmdline forcing ucode update for same rev\n"); + *res = true; + } + + return *res; +} + /* * @res_patch, output: a pointer to the patch we found. */ @@ -639,6 +683,9 @@ void __init load_ucode_intel_bsp(void) { struct microcode_intel *patch; struct ucode_cpu_info uci; + bool force_bsp; + + force_bsp = check_force_ucode_bsp(); patch = __load_ucode_intel(&uci); if (!patch) @@ -687,8 +734,12 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) phdr = (struct microcode_header_intel *)iter->data; - if (phdr->rev <= uci->cpu_sig.rev) + if (phdr->rev < uci->cpu_sig.rev) continue; + if (phdr->rev == uci->cpu_sig.rev && !force_ucode_load) + continue; + else + printk ("same rev forcing ucode\n"); if (!find_matching_signature(phdr, uci->cpu_sig.sig, -- 2.34.1.75.gabe6bb3905 From e047d8c53709523a8c39dc323ad13de2da1815b4 Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Thu, 9 Dec 2021 12:35:13 +0100 Subject: [PATCH 27/27] clearlinux-5.15: backport patches from clearlinux repo Signed-off-by: Piotr Gorski --- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/intel_epb.c | 4 ++++ arch/x86/kernel/itmt.c | 29 ++++++++++++++++++++++++++++- drivers/cpufreq/intel_pstate.c | 7 +++++++ lib/raid6/algos.c | 8 ++++++-- mm/compaction.c | 2 +- mm/page_alloc.c | 5 +++-- 7 files changed, 50 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 9239399e5..4f046a0ee 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -171,6 +171,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled; /* Interface to set priority of a cpu */ void sched_set_itmt_core_prio(int prio, int core_cpu); +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu); /* Interface to notify scheduler that system supports ITMT */ int sched_set_itmt_support(void); diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index f4dd73396..12b82a8d7 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -156,6 +156,10 @@ static ssize_t energy_perf_bias_store(struct device *dev, if (ret < 0) return ret; + /* update the ITMT scheduler logic to use the power policy data */ + /* scale the val up by 2 so the range is 224 - 256 */ + sched_set_itmt_power_ratio(256 - val * 2, cpu); + return count; } diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 1afbdd1dd..b99f7f59d 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -25,6 +25,7 @@ static DEFINE_MUTEX(itmt_update_mutex); DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); +DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio); /* Boolean to track if system has ITMT capabilities */ static bool __read_mostly sched_itmt_capable; @@ -169,7 +170,12 @@ void sched_clear_itmt_support(void) int arch_asym_cpu_priority(int cpu) { - return per_cpu(sched_core_priority, cpu); + int power_ratio = per_cpu(sched_power_ratio, cpu); + + /* a power ratio of 0 (uninitialized) is assumed to be maximum */ + if (power_ratio == 0) + power_ratio = 256 - 2 * 6; + return per_cpu(sched_core_priority, cpu) * power_ratio / 256; } /** @@ -203,3 +209,24 @@ void sched_set_itmt_core_prio(int prio, int core_cpu) i++; } } + +/** + * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT + * @power_ratio: The power scaling ratio [1..256] for the core + * @core_cpu: The cpu number associated with the core + * + * Set a scaling to the cpu performance based on long term power + * settings (like EPB). + * + * Note this is for the policy not for the actual dynamic frequency; + * the frequency will increase itself as workloads run on a core. + */ + +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu) +{ + int cpu; + + for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { + per_cpu(sched_power_ratio, cpu) = power_ratio; + } +} diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e15c3bc17..7d5c243e9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -351,6 +351,13 @@ static void intel_pstate_set_itmt_prio(int cpu) * update them at any time after it has been called. */ sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); + /* + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. + * In this case we can't use CPPC.highest_perf to enable ITMT. + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + */ + if (cppc_perf.highest_perf == 0xff) + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); if (max_highest_perf <= min_highest_perf) { if (cppc_perf.highest_perf > max_highest_perf) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d5e5000f..56164c20f 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -128,8 +128,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) for (best = NULL, algo = raid6_recov_algos; *algo; algo++) if (!best || (*algo)->priority > best->priority) - if (!(*algo)->valid || (*algo)->valid()) + if (!(*algo)->valid || (*algo)->valid()) { best = *algo; + break; + } if (best) { raid6_2data_recov = best->data2; @@ -198,8 +200,10 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (best == *algo) + if (best == *algo) { bestxorperf = perf; + break; + } pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, (perf * HZ * (disks-2)) >> diff --git a/mm/compaction.c b/mm/compaction.c index bfc93da1c..4f43496b1 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -53,7 +53,7 @@ static inline void count_compact_events(enum vm_event_item item, long delta) /* * Fragmentation score check interval for proactive compaction purposes. */ -static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 5000; /* * Page order with-respect-to which proactive compaction diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23d3339ac..020fecebb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6766,11 +6766,11 @@ static int zone_batchsize(struct zone *zone) /* * The number of pages to batch allocate is either ~0.1% - * of the zone or 1MB, whichever is smaller. The batch + * of the zone or 4MB, whichever is smaller. The batch * size is striking a balance between allocation latency * and zone lock contention. */ - batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE); + batch = min(zone_managed_pages(zone) >> 10, 4 * (1024 * 1024) / PAGE_SIZE); batch /= 4; /* We effectively *= 4 below */ if (batch < 1) batch = 1; @@ -6848,6 +6848,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online) * historical relationship between high and batch. */ high = max(high, batch << 2); + high = max(high, 1024); return high; #else -- 2.34.1.75.gabe6bb3905