diff --git a/init/Kconfig b/init/Kconfig index 041f3a022122..3213e61dd824 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -53,6 +53,15 @@ config BROKEN_ON_SMP depends on BROKEN || !SMP default y +config BLD + bool "An alternate CPU load distribution technique for task scheduler" + depends on SMP && !NUMA_BALANCING && !SCHED_SMT + default y + help + This is an alternate CPU load distribution technique based for task + scheduler based on The Barbershop Load Distribution algorithm. Not + suitable for NUMA, should work well on SMP. + config INIT_ENV_ARG_LIMIT int default 32 if !UML diff --git a/kernel/sched/bld.h b/kernel/sched/bld.h new file mode 100644 index 000000000000..e2a487db1f9f --- /dev/null +++ b/kernel/sched/bld.h @@ -0,0 +1,215 @@ +#ifdef CONFIG_BLD + +static DEFINE_RWLOCK(rt_list_lock); +static LIST_HEAD(rt_rq_head); +static LIST_HEAD(cfs_rq_head); +static DEFINE_RWLOCK(cfs_list_lock); + +#ifdef CONFIG_FAIR_GROUP_SCHED +static inline struct rq *rq_of_cfs(struct cfs_rq *cfs_rq) +{ + return cfs_rq->rq; +} +#else +static inline struct rq *rq_of_cfs(struct cfs_rq *cfs_rq) +{ + return container_of(cfs_rq, struct rq, cfs); +} +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +static inline struct rq *rq_of_rt(struct rt_rq *rt_rq) +{ + return rt_rq->rq; +} +#else +static inline struct rq *rq_of_rt(struct rt_rq *rt_rq) +{ + return container_of(rt_rq, struct rq, rt); +} +#endif + +static int select_cpu_for_wakeup(int task_type, struct cpumask *mask) +{ + int cpu = smp_processor_id(), i; + unsigned long load, varload; + struct rq *rq; + + if (task_type) { + varload = ULONG_MAX; + for_each_cpu(i, mask) { + rq = cpu_rq(i); + load = rq->cfs.load.weight; + if (load < varload) { + varload = load; + cpu = i; + } + } + } else { + /* Here's an attempt to get a CPU within the mask where + * we can preempt easily. To achieve this we tried to + * maintain a lowbit, which indicate the lowest bit set on + * array bitmap. Since all CPUs contains high priority + * kernel threads therefore we eliminate 0, so it might not + * be right every time, but it's just an indicator. + */ + varload = 1; + + for_each_cpu(i, mask) { + rq = cpu_rq(i); + load = rq->rt.lowbit; + if (load >= varload) { + varload = load; + cpu = i; + } + } + } + + return cpu; +} + +static int bld_pick_cpu_cfs(struct task_struct *p, int sd_flags, int wake_flags) +{ + struct cfs_rq *cfs; + unsigned long flags; + unsigned int cpu = smp_processor_id(); + + read_lock_irqsave(&cfs_list_lock, flags); + list_for_each_entry(cfs, &cfs_rq_head, bld_cfs_list) { + cpu = cpu_of(rq_of_cfs(cfs)); + if (cpu_online(cpu)) + break; + } + read_unlock_irqrestore(&cfs_list_lock, flags); + return cpu; +} + +static int bld_pick_cpu_rt(struct task_struct *p, int sd_flags, int wake_flags) +{ + struct rt_rq *rt; + unsigned long flags; + unsigned int cpu = smp_processor_id(); + + read_lock_irqsave(&rt_list_lock, flags); + list_for_each_entry(rt, &rt_rq_head, bld_rt_list) { + cpu = cpu_of(rq_of_rt(rt)); + if (cpu_online(cpu)) + break; + } + read_unlock_irqrestore(&rt_list_lock, flags); + return cpu; +} + +static int bld_pick_cpu_domain(struct task_struct *p, int sd_flags, int wake_flags) +{ + unsigned int cpu = smp_processor_id(), want_affine = 0; + struct cpumask *tmpmask; + + if (p->nr_cpus_allowed == 1) + return task_cpu(p); + + if (sd_flags & SD_BALANCE_WAKE) { + if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { + want_affine = 1; + } + } + + if (want_affine) + tmpmask = &p->cpus_allowed; + else + tmpmask = sched_domain_span(cpu_rq(task_cpu(p))->sd); + + if (rt_task(p)) + cpu = select_cpu_for_wakeup(0, tmpmask); + else + cpu = select_cpu_for_wakeup(1, tmpmask); + + return cpu; +} + +static void track_load_rt(struct rq *rq, struct task_struct *p) +{ + unsigned long flag; + int firstbit; + struct rt_rq *first; + struct rt_prio_array *array = &rq->rt.active; + + first = list_entry(rt_rq_head.next, struct rt_rq, bld_rt_list); + firstbit = sched_find_first_bit(array->bitmap); + + /* Maintaining rt.lowbit */ + if (firstbit > 0 && firstbit <= rq->rt.lowbit) + rq->rt.lowbit = firstbit; + + if (rq->rt.lowbit < first->lowbit) { + write_lock_irqsave(&rt_list_lock, flag); + list_del(&rq->rt.bld_rt_list); + list_add_tail(&rq->rt.bld_rt_list, &rt_rq_head); + write_unlock_irqrestore(&rt_list_lock, flag); + } +} + +static int bld_get_cpu(struct task_struct *p, int sd_flags, int wake_flags) +{ + unsigned int cpu; + + if (sd_flags == SD_BALANCE_WAKE || (sd_flags == SD_BALANCE_EXEC && (get_nr_threads(p) > 1))) + cpu = bld_pick_cpu_domain(p, sd_flags, wake_flags); + else { + if (rt_task(p)) + cpu = bld_pick_cpu_rt(p, sd_flags, wake_flags); + else + cpu = bld_pick_cpu_cfs(p, sd_flags, wake_flags); + } + + return cpu; +} + +static void bld_track_load_activate(struct rq *rq, struct task_struct *p) +{ + unsigned long flag; + if (rt_task(p)) { + track_load_rt(rq, p); + } else { + if (rq->cfs.pos != 2) { + struct cfs_rq *last; + last = list_entry(cfs_rq_head.prev, struct cfs_rq, bld_cfs_list); + if (rq->cfs.load.weight >= last->load.weight) { + write_lock_irqsave(&cfs_list_lock, flag); + list_del(&rq->cfs.bld_cfs_list); + list_add_tail(&rq->cfs.bld_cfs_list, &cfs_rq_head); + rq->cfs.pos = 2; last->pos = 1; + write_unlock_irqrestore(&cfs_list_lock, flag); + } + } + } +} + +static void bld_track_load_deactivate(struct rq *rq, struct task_struct *p) +{ + unsigned long flag; + if (rt_task(p)) { + track_load_rt(rq, p); + } else { + if (rq->cfs.pos != 0) { + struct cfs_rq *first; + first = list_entry(cfs_rq_head.next, struct cfs_rq, bld_cfs_list); + if (rq->cfs.load.weight <= first->load.weight) { + write_lock_irqsave(&cfs_list_lock, flag); + list_del(&rq->cfs.bld_cfs_list); + list_add(&rq->cfs.bld_cfs_list, &cfs_rq_head); + rq->cfs.pos = 0; first->pos = 1; + write_unlock_irqrestore(&cfs_list_lock, flag); + } + } + } +} +#else +static inline void bld_track_load_activate(struct rq *rq, struct task_struct *p) +{ +} + +static inline void bld_track_load_deactivate(struct rq *rq, struct task_struct *p) +{ +} +#endif /* CONFIG_BLD */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe365c9a08e9..aa564616ef15 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -16,6 +16,7 @@ #include "../workqueue_internal.h" #include "../smpboot.h" +#include "bld.h" #define CREATE_TRACE_POINTS #include @@ -748,6 +749,8 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) sched_info_queued(rq, p); p->sched_class->enqueue_task(rq, p, flags); + if (!dl_task(p)) + bld_track_load_activate(rq, p); } static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) @@ -759,6 +762,8 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) sched_info_dequeued(rq, p); p->sched_class->dequeue_task(rq, p, flags); + if (!dl_task(p)) + bld_track_load_deactivate(rq, p); } void activate_task(struct rq *rq, struct task_struct *p, int flags) @@ -1547,11 +1552,17 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) { lockdep_assert_held(&p->pi_lock); +#ifndef CONFIG_BLD if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else cpu = cpumask_any(&p->cpus_allowed); - +#else + if (dl_task(p)) + cpu = dl_sched_class.select_task_rq(p, cpu, sd_flags, wake_flags); + else + cpu = bld_get_cpu(p, sd_flags, wake_flags); +#endif /* * In order not to call set_task_cpu() on a blocking task we need * to rely on ttwu() to place the task on a valid ->cpus_allowed @@ -1771,7 +1782,11 @@ void scheduler_ipi(void) */ preempt_fold_need_resched(); +#ifndef CONFIG_BLD if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) +#else + if (llist_empty(&this_rq()->wake_list)) +#endif return; /* @@ -1793,13 +1808,16 @@ void scheduler_ipi(void) /* * Check if someone kicked us for doing the nohz idle load balance. */ +#ifndef CONFIG_BLD if (unlikely(got_nohz_idle_kick())) { this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#endif irq_exit(); } +#ifndef CONFIG_BLD static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) { struct rq *rq = cpu_rq(cpu); @@ -1813,6 +1831,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) trace_sched_wake_idle_without_ipi(cpu); } } +#endif /*CONFIG_BLD */ void wake_up_if_idle(int cpu) { @@ -1849,7 +1868,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) struct rq *rq = cpu_rq(cpu); struct rq_flags rf; -#if defined(CONFIG_SMP) +#if defined(CONFIG_SMP) && !defined(CONFIG_BLD) if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ttwu_queue_remote(p, cpu, wake_flags); @@ -2975,7 +2994,14 @@ void sched_exec(void) int dest_cpu; raw_spin_lock_irqsave(&p->pi_lock, flags); +#ifndef CONFIG_BLD dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); +#else + if (dl_task(p)) + dest_cpu = task_cpu(p); + else + dest_cpu = bld_get_cpu(p, SD_BALANCE_EXEC, 0); +#endif if (dest_cpu == smp_processor_id()) goto unlock; @@ -3085,8 +3111,10 @@ void scheduler_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); +#ifndef CONFIG_BLD trigger_load_balance(rq); #endif +#endif } #ifdef CONFIG_NO_HZ_FULL @@ -5865,7 +5893,9 @@ int sched_cpu_dying(unsigned int cpu) calc_load_migrate(rq); update_max_interval(); +#ifndef CONFIG_BLD nohz_balance_exit_idle(rq); +#endif hrtick_clear(rq); return 0; } @@ -6084,6 +6114,15 @@ void __init sched_init(void) #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); +#ifdef CONFIG_BLD + INIT_LIST_HEAD(&rq->cfs.bld_cfs_list); + list_add_tail(&rq->cfs.bld_cfs_list, &cfs_rq_head); + rq->cfs.pos = 0; + + INIT_LIST_HEAD(&rq->rt.bld_rt_list); + list_add_tail(&rq->rt.bld_rt_list, &rt_rq_head); + rq->rt.lowbit = INT_MAX; +#endif } set_load_weight(&init_task, false); @@ -6113,6 +6152,9 @@ void __init sched_init(void) init_schedstats(); scheduler_running = 1; +#ifdef CONFIG_BLD + printk(KERN_INFO "BLD: An Alternate CPU load distributor activated.\n"); +#endif } #ifdef CONFIG_DEBUG_ATOMIC_SLEEP diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2f0a0be4d344..27255340267f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -136,6 +136,11 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) lw->inv_weight = 0; } +static unsigned long capacity_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity; +} + /* * Increase the granularity value when there are more CPUs, * because with more CPUs the 'effective latency' as visible @@ -695,7 +700,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) #include "sched-pelt.h" +#ifndef CONFIG_BLD static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); +#endif static unsigned long task_h_load(struct task_struct *p); /* Give new sched_entity start runnable values to heavy its load in infant time */ @@ -1448,7 +1455,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, static unsigned long weighted_cpuload(struct rq *rq); static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); -static unsigned long capacity_of(int cpu); /* Cached statistics for all CPUs within a node */ struct numa_stats { @@ -5568,6 +5574,26 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) } return load; } +#endif /* CONFIG_NO_HZ_COMMON */ + +#ifndef CONFIG_BLD +static void set_cpu_sd_state_busy(int cpu) +{ + struct sched_domain *sd; + + rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_llc, cpu)); + + if (!sd || !sd->nohz_idle) + goto unlock; + sd->nohz_idle = 0; + + atomic_inc(&sd->shared->nr_busy_cpus); +unlock: + rcu_read_unlock(); +} + +static DEFINE_SPINLOCK(balancing); static struct { cpumask_var_t idle_cpus_mask; @@ -5577,7 +5603,20 @@ static struct { unsigned long next_blocked; /* Next update of blocked load in jiffies */ } nohz ____cacheline_aligned; -#endif /* CONFIG_NO_HZ_COMMON */ +void nohz_balance_exit_idle(struct rq *rq) +{ + SCHED_WARN_ON(rq != this_rq()); + + if (likely(!rq->nohz_tick_stopped)) + return; + + rq->nohz_tick_stopped = 0; + cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + + set_cpu_sd_state_busy(rq->cpu); +} +#endif /* CONFIG_BLD */ /** * __cpu_load_update - update the rq->cpu_load[] statistics @@ -5776,6 +5815,7 @@ void cpu_load_update_active(struct rq *this_rq) cpu_load_update_periodic(this_rq, load); } +#ifndef CONFIG_BLD /* * Return a low guess at the load of a migration-source CPU weighted * according to the scheduling class and "nice" value. @@ -5809,11 +5849,6 @@ static unsigned long target_load(int cpu, int type) return max(rq->cpu_load[type-1], total); } -static unsigned long capacity_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity; -} - static unsigned long capacity_orig_of(int cpu) { return cpu_rq(cpu)->cpu_capacity_orig; @@ -6663,6 +6698,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f return new_cpu; } +#endif /* CONFIG_BLD */ static void detach_entity_cfs_rq(struct sched_entity *se); @@ -7014,6 +7050,7 @@ done: __maybe_unused; return p; idle: +#ifndef CONFIG_BLD new_tasks = idle_balance(rq, rf); /* @@ -7026,7 +7063,7 @@ done: __maybe_unused; if (new_tasks > 0) goto again; - +#endif /* CONFIG_BLD */ return NULL; } @@ -7758,101 +7795,6 @@ static unsigned long task_h_load(struct task_struct *p) } #endif -/********** Helpers for find_busiest_group ************************/ - -enum group_type { - group_other = 0, - group_imbalanced, - group_overloaded, -}; - -/* - * sg_lb_stats - stats of a sched_group required for load_balancing - */ -struct sg_lb_stats { - unsigned long avg_load; /*Avg load across the CPUs of the group */ - unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long sum_weighted_load; /* Weighted load of group's tasks */ - unsigned long load_per_task; - unsigned long group_capacity; - unsigned long group_util; /* Total utilization of the group */ - unsigned int sum_nr_running; /* Nr tasks running in the group */ - unsigned int idle_cpus; - unsigned int group_weight; - enum group_type group_type; - int group_no_capacity; -#ifdef CONFIG_NUMA_BALANCING - unsigned int nr_numa_running; - unsigned int nr_preferred_running; -#endif -}; - -/* - * sd_lb_stats - Structure to store the statistics of a sched_domain - * during load balancing. - */ -struct sd_lb_stats { - struct sched_group *busiest; /* Busiest group in this sd */ - struct sched_group *local; /* Local group in this sd */ - unsigned long total_running; - unsigned long total_load; /* Total load of all groups in sd */ - unsigned long total_capacity; /* Total capacity of all groups in sd */ - unsigned long avg_load; /* Average load across all groups in sd */ - - struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ - struct sg_lb_stats local_stat; /* Statistics of the local group */ -}; - -static inline void init_sd_lb_stats(struct sd_lb_stats *sds) -{ - /* - * Skimp on the clearing to avoid duplicate work. We can avoid clearing - * local_stat because update_sg_lb_stats() does a full clear/assignment. - * We must however clear busiest_stat::avg_load because - * update_sd_pick_busiest() reads this before assignment. - */ - *sds = (struct sd_lb_stats){ - .busiest = NULL, - .local = NULL, - .total_running = 0UL, - .total_load = 0UL, - .total_capacity = 0UL, - .busiest_stat = { - .avg_load = 0UL, - .sum_nr_running = 0, - .group_type = group_other, - }, - }; -} - -/** - * get_sd_load_idx - Obtain the load index for a given sched domain. - * @sd: The sched_domain whose load_idx is to be obtained. - * @idle: The idle status of the CPU for whose sd load_idx is obtained. - * - * Return: The load index. - */ -static inline int get_sd_load_idx(struct sched_domain *sd, - enum cpu_idle_type idle) -{ - int load_idx; - - switch (idle) { - case CPU_NOT_IDLE: - load_idx = sd->busy_idx; - break; - - case CPU_NEWLY_IDLE: - load_idx = sd->newidle_idx; - break; - default: - load_idx = sd->idle_idx; - break; - } - - return load_idx; -} - static unsigned long scale_rt_capacity(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -7967,6 +7909,101 @@ void update_group_capacity(struct sched_domain *sd, int cpu) sdg->sgc->min_capacity = min_capacity; } +#ifndef CONFIG_BLD +/********** Helpers for find_busiest_group ************************/ +enum group_type { + group_other = 0, + group_imbalanced, + group_overloaded, +}; + +/* + * sg_lb_stats - stats of a sched_group required for load_balancing + */ +struct sg_lb_stats { + unsigned long avg_load; /*Avg load across the CPUs of the group */ + unsigned long group_load; /* Total load over the CPUs of the group */ + unsigned long sum_weighted_load; /* Weighted load of group's tasks */ + unsigned long load_per_task; + unsigned long group_capacity; + unsigned long group_util; /* Total utilization of the group */ + unsigned int sum_nr_running; /* Nr tasks running in the group */ + unsigned int idle_cpus; + unsigned int group_weight; + enum group_type group_type; + int group_no_capacity; +#ifdef CONFIG_NUMA_BALANCING + unsigned int nr_numa_running; + unsigned int nr_preferred_running; +#endif +}; + +/* + * sd_lb_stats - Structure to store the statistics of a sched_domain + * during load balancing. + */ +struct sd_lb_stats { + struct sched_group *busiest; /* Busiest group in this sd */ + struct sched_group *local; /* Local group in this sd */ + unsigned long total_running; + unsigned long total_load; /* Total load of all groups in sd */ + unsigned long total_capacity; /* Total capacity of all groups in sd */ + unsigned long avg_load; /* Average load across all groups in sd */ + + struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ + struct sg_lb_stats local_stat; /* Statistics of the local group */ +}; + +static inline void init_sd_lb_stats(struct sd_lb_stats *sds) +{ + /* + * Skimp on the clearing to avoid duplicate work. We can avoid clearing + * local_stat because update_sg_lb_stats() does a full clear/assignment. + * We must however clear busiest_stat::avg_load because + * update_sd_pick_busiest() reads this before assignment. + */ + *sds = (struct sd_lb_stats){ + .busiest = NULL, + .local = NULL, + .total_running = 0UL, + .total_load = 0UL, + .total_capacity = 0UL, + .busiest_stat = { + .avg_load = 0UL, + .sum_nr_running = 0, + .group_type = group_other, + }, + }; +} + +/** + * get_sd_load_idx - Obtain the load index for a given sched domain. + * @sd: The sched_domain whose load_idx is to be obtained. + * @idle: The idle status of the CPU for whose sd load_idx is obtained. + * + * Return: The load index. + */ +static inline int get_sd_load_idx(struct sched_domain *sd, + enum cpu_idle_type idle) +{ + int load_idx; + + switch (idle) { + case CPU_NOT_IDLE: + load_idx = sd->busy_idx; + break; + + case CPU_NEWLY_IDLE: + load_idx = sd->newidle_idx; + break; + default: + load_idx = sd->idle_idx; + break; + } + + return load_idx; +} + /* * Check whether the capacity of the rq has been noticeably reduced by side * activity. The imbalance_pct is used for the threshold. @@ -9192,17 +9229,6 @@ static int active_load_balance_cpu_stop(void *data) return 0; } -static DEFINE_SPINLOCK(balancing); - -/* - * Scale the max load_balance interval with the number of CPUs in the system. - * This trades load-balance latency on larger machines for less cross talk. - */ -void update_max_interval(void) -{ - max_load_balance_interval = HZ*num_online_cpus()/10; -} - /* * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -9309,12 +9335,23 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) #endif } } +#endif /* CONFIG_BLD */ + +/* + * Scale the max load_balance interval with the number of CPUs in the system. + * This trades load-balance latency on larger machines for less cross talk. + */ +void update_max_interval(void) +{ + max_load_balance_interval = HZ*num_online_cpus()/10; +} static inline int on_null_domain(struct rq *rq) { return unlikely(!rcu_dereference_sched(rq->sd)); } +#ifndef CONFIG_BLD #ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details @@ -9453,36 +9490,6 @@ static void nohz_balancer_kick(struct rq *rq) kick_ilb(flags); } -static void set_cpu_sd_state_busy(int cpu) -{ - struct sched_domain *sd; - - rcu_read_lock(); - sd = rcu_dereference(per_cpu(sd_llc, cpu)); - - if (!sd || !sd->nohz_idle) - goto unlock; - sd->nohz_idle = 0; - - atomic_inc(&sd->shared->nr_busy_cpus); -unlock: - rcu_read_unlock(); -} - -void nohz_balance_exit_idle(struct rq *rq) -{ - SCHED_WARN_ON(rq != this_rq()); - - if (likely(!rq->nohz_tick_stopped)) - return; - - rq->nohz_tick_stopped = 0; - cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); - - set_cpu_sd_state_busy(rq->cpu); -} - static void set_cpu_sd_state_idle(int cpu) { struct sched_domain *sd; @@ -9693,7 +9700,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) return false; _nohz_idle_balance(this_rq, flags, idle); - return true; } @@ -9856,6 +9862,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) return pulled_task; } +#endif /* CONFIG_BLD */ /* * run_rebalance_domains is triggered when needed from the scheduler tick. @@ -9863,6 +9870,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) */ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) { +#ifndef CONFIG_BLD struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; @@ -9881,6 +9889,7 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) /* normal load balance */ update_blocked_averages(this_rq->cpu); rebalance_domains(this_rq, idle); +#endif } /* @@ -9888,6 +9897,7 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) */ void trigger_load_balance(struct rq *rq) { +#ifndef CONFIG_BLD /* Don't need to rebalance while attached to NULL domain */ if (unlikely(on_null_domain(rq))) return; @@ -9896,6 +9906,7 @@ void trigger_load_balance(struct rq *rq) raise_softirq(SCHED_SOFTIRQ); nohz_balancer_kick(rq); +#endif } static void rq_online_fair(struct rq *rq) @@ -10408,7 +10419,9 @@ const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, #ifdef CONFIG_SMP +#ifndef CONFIG_BLD .select_task_rq = select_task_rq_fair, +#endif .migrate_task_rq = migrate_task_rq_fair, .rq_online = rq_online_fair, @@ -10469,6 +10482,7 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) __init void init_sched_fair_class(void) { +#ifndef CONFIG_BLD #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); @@ -10478,5 +10492,5 @@ __init void init_sched_fair_class(void) zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); #endif #endif /* SMP */ - +#endif /* BLD */ } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index eaaec8364f96..a94e6e1a50d1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1385,6 +1385,7 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); +#ifndef CONFIG_BLD static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { @@ -1440,6 +1441,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) out: return cpu; } +#endif /* CONFIG_BLD */ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { @@ -2375,7 +2377,9 @@ const struct sched_class rt_sched_class = { .put_prev_task = put_prev_task_rt, #ifdef CONFIG_SMP +#ifndef CONFIG_BLD .select_task_rq = select_task_rq_rt, +#endif .set_cpus_allowed = set_cpus_allowed_common, .rq_online = rq_online_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c7742dcc136c..d42fd4f578b2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -535,9 +535,8 @@ struct cfs_rq { #endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_SMP */ -#ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ - +#ifdef CONFIG_FAIR_GROUP_SCHED /* * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in * a hierarchy). Non-leaf lrqs hold other higher schedulable entities @@ -564,6 +563,11 @@ struct cfs_rq { struct list_head throttled_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ + +#ifdef CONFIG_BLD + struct list_head bld_cfs_list; + char pos; +#endif }; static inline int rt_bandwidth_enabled(void) @@ -603,12 +607,15 @@ struct rt_rq { /* Nests inside the rq lock: */ raw_spinlock_t rt_runtime_lock; + struct rq *rq; #ifdef CONFIG_RT_GROUP_SCHED unsigned long rt_nr_boosted; - - struct rq *rq; struct task_group *tg; #endif +#ifdef CONFIG_BLD + struct list_head bld_rt_list; + int lowbit; +#endif }; static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) @@ -902,7 +909,7 @@ static inline int cpu_of(struct rq *rq) } -#ifdef CONFIG_SCHED_SMT +#if (defined CONFIG_SCHED_SMT) && (!defined CONFIG_BLD) extern struct static_key_false sched_smt_present; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5b33e2f5c0ed..b45fa32df41b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -943,7 +943,9 @@ static void __tick_nohz_idle_stop_tick(struct tick_sched *ts) if (!was_stopped && ts->tick_stopped) { ts->idle_jiffies = ts->last_jiffies; +#ifndef CONFIG_BLD nohz_balance_enter_idle(cpu); +#endif } } else { tick_nohz_retain_tick(ts);