BLD changes for Linux-4.14.

diff --git a/init/Kconfig b/init/Kconfig
index 3c1faaa..0bd8518 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -46,6 +46,15 @@ config BROKEN_ON_SMP
 	depends on BROKEN || !SMP
 	default y
 
+config BLD
+	bool "An alternate CPU load distribution technique for task scheduler"
+	depends on SMP && !NUMA_BALANCING
+	default y
+	help
+	  This is an alternate CPU load distribution technique based for task
+	  scheduler based on The Barbershop Load Distribution algorithm. Not
+	  suitable for NUMA, should work well on SMP.
+
 config INIT_ENV_ARG_LIMIT
 	int
 	default 32 if !UML
diff --git a/kernel/sched/bld.h b/kernel/sched/bld.h
new file mode 100644
index 0000000..e2a487d
--- /dev/null
+++ b/kernel/sched/bld.h
@@ -0,0 +1,215 @@
+#ifdef CONFIG_BLD
+
+static DEFINE_RWLOCK(rt_list_lock);
+static LIST_HEAD(rt_rq_head);
+static LIST_HEAD(cfs_rq_head);
+static DEFINE_RWLOCK(cfs_list_lock);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct rq *rq_of_cfs(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->rq;
+}
+#else
+static inline struct rq *rq_of_cfs(struct cfs_rq *cfs_rq)
+{
+	return container_of(cfs_rq, struct rq, cfs);
+}
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static inline struct rq *rq_of_rt(struct rt_rq *rt_rq)
+{
+	return rt_rq->rq;
+}
+#else
+static inline struct rq *rq_of_rt(struct rt_rq *rt_rq)
+{
+	return container_of(rt_rq, struct rq, rt);
+}
+#endif
+
+static int select_cpu_for_wakeup(int task_type, struct cpumask *mask)
+{
+	int cpu = smp_processor_id(), i;
+	unsigned long load, varload;
+	struct rq *rq;
+
+	if (task_type) {
+		varload = ULONG_MAX;
+		for_each_cpu(i, mask) {
+			rq = cpu_rq(i);
+			load = rq->cfs.load.weight;
+			if (load < varload) {
+				varload = load;
+				cpu = i;
+			}
+		}
+	} else {
+		/* Here's an attempt to get a CPU within the mask where
+		 * we can preempt easily. To achieve this we tried to
+		 * maintain a lowbit, which indicate the lowest bit set on
+		 * array bitmap. Since all CPUs contains high priority
+		 * kernel threads therefore we eliminate 0, so it might not
+		 * be right every time, but it's just an indicator.
+		 */
+		varload = 1;
+
+		for_each_cpu(i, mask) {
+			rq = cpu_rq(i);
+			load = rq->rt.lowbit;
+			if (load >= varload) {
+				varload = load;
+				cpu = i;
+			}
+		}
+	}
+
+	return cpu;
+}
+
+static int bld_pick_cpu_cfs(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	struct cfs_rq *cfs;
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+
+	read_lock_irqsave(&cfs_list_lock, flags);
+	list_for_each_entry(cfs, &cfs_rq_head, bld_cfs_list) {
+		cpu = cpu_of(rq_of_cfs(cfs));
+		if (cpu_online(cpu))
+			break;
+	}
+	read_unlock_irqrestore(&cfs_list_lock, flags);
+	return cpu;
+}
+
+static int bld_pick_cpu_rt(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	struct rt_rq *rt;
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+
+	read_lock_irqsave(&rt_list_lock, flags);
+	list_for_each_entry(rt, &rt_rq_head, bld_rt_list) {
+		cpu = cpu_of(rq_of_rt(rt));
+		if (cpu_online(cpu))
+			break;
+	}
+	read_unlock_irqrestore(&rt_list_lock, flags);
+	return cpu;
+}
+
+static int bld_pick_cpu_domain(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	unsigned int cpu = smp_processor_id(), want_affine = 0;
+	struct cpumask *tmpmask;
+
+	if (p->nr_cpus_allowed == 1)
+		return task_cpu(p);
+
+	if (sd_flags & SD_BALANCE_WAKE) {
+		if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+			want_affine = 1;
+		}
+	}
+
+	if (want_affine)
+		tmpmask = &p->cpus_allowed;
+	else
+		tmpmask = sched_domain_span(cpu_rq(task_cpu(p))->sd);
+
+	if (rt_task(p))
+		cpu = select_cpu_for_wakeup(0, tmpmask);
+	else
+		cpu = select_cpu_for_wakeup(1, tmpmask);
+
+	return cpu;
+}
+
+static void track_load_rt(struct rq *rq, struct task_struct *p)
+{
+	unsigned long flag;
+	int firstbit;
+	struct rt_rq *first;
+	struct rt_prio_array *array = &rq->rt.active;
+
+	first = list_entry(rt_rq_head.next, struct rt_rq, bld_rt_list);
+	firstbit = sched_find_first_bit(array->bitmap);
+
+	/* Maintaining rt.lowbit */
+	if (firstbit > 0 && firstbit <= rq->rt.lowbit)
+		rq->rt.lowbit = firstbit;
+
+	if (rq->rt.lowbit < first->lowbit) {
+		write_lock_irqsave(&rt_list_lock, flag);
+		list_del(&rq->rt.bld_rt_list);
+		list_add_tail(&rq->rt.bld_rt_list, &rt_rq_head);
+		write_unlock_irqrestore(&rt_list_lock, flag);
+	}
+}
+
+static int bld_get_cpu(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	unsigned int cpu;
+
+	if (sd_flags == SD_BALANCE_WAKE || (sd_flags == SD_BALANCE_EXEC && (get_nr_threads(p) > 1)))
+		cpu = bld_pick_cpu_domain(p, sd_flags, wake_flags);
+	else {
+		if (rt_task(p))
+			cpu = bld_pick_cpu_rt(p, sd_flags, wake_flags);
+		else
+			cpu = bld_pick_cpu_cfs(p, sd_flags, wake_flags);
+	}
+
+	return cpu;
+}
+
+static void bld_track_load_activate(struct rq *rq, struct task_struct *p)
+{
+	unsigned long flag;
+	if (rt_task(p)) {
+		track_load_rt(rq, p);
+	} else {
+		if (rq->cfs.pos != 2) {
+			struct cfs_rq *last;
+			last = list_entry(cfs_rq_head.prev, struct cfs_rq, bld_cfs_list);
+			if (rq->cfs.load.weight >= last->load.weight) {
+				write_lock_irqsave(&cfs_list_lock, flag);
+				list_del(&rq->cfs.bld_cfs_list);
+				list_add_tail(&rq->cfs.bld_cfs_list, &cfs_rq_head);
+				rq->cfs.pos = 2; last->pos = 1;
+				write_unlock_irqrestore(&cfs_list_lock, flag);
+			}
+		}
+	}
+}
+
+static void bld_track_load_deactivate(struct rq *rq, struct task_struct *p)
+{
+	unsigned long flag;
+	if (rt_task(p)) {
+		track_load_rt(rq, p);
+	} else {
+		if (rq->cfs.pos != 0) {
+			struct cfs_rq *first;
+			first = list_entry(cfs_rq_head.next, struct cfs_rq, bld_cfs_list);
+			if (rq->cfs.load.weight <= first->load.weight) {
+				write_lock_irqsave(&cfs_list_lock, flag);
+				list_del(&rq->cfs.bld_cfs_list);
+				list_add(&rq->cfs.bld_cfs_list, &cfs_rq_head);
+				rq->cfs.pos = 0; first->pos = 1;
+				write_unlock_irqrestore(&cfs_list_lock, flag);
+			}
+		}
+	}
+}
+#else
+static inline void bld_track_load_activate(struct rq *rq, struct task_struct *p)
+{
+}
+
+static inline void bld_track_load_deactivate(struct rq *rq, struct task_struct *p)
+{
+}
+#endif /* CONFIG_BLD */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d17c5da..6b68caf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -36,6 +36,7 @@
 #include "sched.h"
 #include "../workqueue_internal.h"
 #include "../smpboot.h"
+#include "bld.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -760,6 +761,8 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 		sched_info_queued(rq, p);
 
 	p->sched_class->enqueue_task(rq, p, flags);
+	if (!dl_task(p))
+		bld_track_load_activate(rq, p);
 }
 
 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -771,6 +774,8 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 		sched_info_dequeued(rq, p);
 
 	p->sched_class->dequeue_task(rq, p, flags);
+	if (!dl_task(p))
+		bld_track_load_deactivate(rq, p);
 }
 
 void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1540,11 +1545,17 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
 	lockdep_assert_held(&p->pi_lock);
 
+#ifndef	CONFIG_BLD
 	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 	else
 		cpu = cpumask_any(&p->cpus_allowed);
-
+#else
+	if (dl_task(p))
+		cpu = dl_sched_class.select_task_rq(p, cpu, sd_flags, wake_flags);
+	else
+		cpu = bld_get_cpu(p, sd_flags, wake_flags);
+#endif
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
 	 * to rely on ttwu() to place the task on a valid ->cpus_allowed
@@ -1765,7 +1776,11 @@ void scheduler_ipi(void)
 	 */
 	preempt_fold_need_resched();
 
+#ifndef	CONFIG_BLD
 	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
+#else
+	if (llist_empty(&this_rq()->wake_list))
+#endif
 		return;
 
 	/*
@@ -1787,13 +1802,16 @@ void scheduler_ipi(void)
 	/*
 	 * Check if someone kicked us for doing the nohz idle load balance.
 	 */
+#ifndef	CONFIG_BLD
 	if (unlikely(got_nohz_idle_kick())) {
 		this_rq()->idle_balance = 1;
 		raise_softirq_irqoff(SCHED_SOFTIRQ);
 	}
+#endif
 	irq_exit();
 }
 
+#ifndef	CONFIG_BLD
 static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -1807,6 +1825,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
 			trace_sched_wake_idle_without_ipi(cpu);
 	}
 }
+#endif	/*CONFIG_BLD */
 
 void wake_up_if_idle(int cpu)
 {
@@ -1843,7 +1862,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 	struct rq *rq = cpu_rq(cpu);
 	struct rq_flags rf;
 
-#if defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLD)
 	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
 		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
 		ttwu_queue_remote(p, cpu, wake_flags);
@@ -2912,7 +2931,14 @@ void sched_exec(void)
 	int dest_cpu;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
+#ifndef	CONFIG_BLD
 	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
+#else
+	if (dl_task(p))
+		dest_cpu = task_cpu(p);
+	else
+		dest_cpu = bld_get_cpu(p, SD_BALANCE_EXEC, 0);
+#endif
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
@@ -3022,8 +3048,10 @@ void scheduler_tick(void)
 
 #ifdef CONFIG_SMP
 	rq->idle_balance = idle_cpu(cpu);
+#ifndef	CONFIG_BLD
 	trigger_load_balance(rq);
 #endif
+#endif
 	rq_last_tick_reset(rq);
 }
 
@@ -5702,7 +5730,9 @@ int sched_cpu_dying(unsigned int cpu)
 
 	calc_load_migrate(rq);
 	update_max_interval();
+#ifndef	CONFIG_BLD
 	nohz_balance_exit_idle(cpu);
+#endif
 	hrtick_clear(rq);
 	return 0;
 }
@@ -5931,6 +5961,15 @@ void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		init_rq_hrtick(rq);
 		atomic_set(&rq->nr_iowait, 0);
+#ifdef CONFIG_BLD
+		INIT_LIST_HEAD(&rq->cfs.bld_cfs_list);
+		list_add_tail(&rq->cfs.bld_cfs_list, &cfs_rq_head);
+		rq->cfs.pos = 0;
+
+		INIT_LIST_HEAD(&rq->rt.bld_rt_list);
+		list_add_tail(&rq->rt.bld_rt_list, &rt_rq_head);
+		rq->rt.lowbit = INT_MAX;
+#endif 
 	}
 
 	set_load_weight(&init_task);
@@ -5963,6 +6002,9 @@ void __init sched_init(void)
 	init_schedstats();
 
 	scheduler_running = 1;
+#ifdef	CONFIG_BLD
+	printk(KERN_INFO "BLD: An Alternate CPU load distributor activated.\n");
+#endif 
 }
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5c09ddf..371320d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -150,6 +150,11 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
 	lw->inv_weight = 0;
 }
 
+static unsigned long capacity_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity;
+}
+
 /*
  * Increase the granularity value when there are more CPUs,
  * because with more CPUs the 'effective latency' as visible
@@ -709,7 +714,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #include "sched-pelt.h"
 
+#ifndef CONFIG_BLD
 static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
+#endif
 static unsigned long task_h_load(struct task_struct *p);
 
 /* Give new sched_entity start runnable values to heavy its load in infant time */
@@ -1430,7 +1437,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 static unsigned long weighted_cpuload(struct rq *rq);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long capacity_of(int cpu);
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -5255,6 +5261,8 @@ void cpu_load_update_active(struct rq *this_rq)
 		cpu_load_update_periodic(this_rq, load);
 }
 
+#ifndef CONFIG_BLD
+
 /*
  * Return a low guess at the load of a migration-source cpu weighted
  * according to the scheduling class and "nice" value.
@@ -5288,11 +5296,6 @@ static unsigned long target_load(int cpu, int type)
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long capacity_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity;
-}
-
 static unsigned long capacity_orig_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity_orig;
@@ -6025,6 +6028,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
 	return new_cpu;
 }
+#endif /* CONFIG_BLD */
 
 /*
  * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
@@ -6357,6 +6361,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 	return p;
 
 idle:
+#ifndef	CONFIG_BLD
 	new_tasks = idle_balance(rq, rf);
 
 	/*
@@ -6369,7 +6374,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 
 	if (new_tasks > 0)
 		goto again;
-
+#endif	/* CONFIG_BLD */
 	return NULL;
 }
 
@@ -7071,8 +7076,8 @@ static unsigned long task_h_load(struct task_struct *p)
 }
 #endif
 
+#ifndef	CONFIG_BLD
 /********** Helpers for find_busiest_group ************************/
-
 enum group_type {
 	group_other = 0,
 	group_imbalanced,
@@ -7166,120 +7171,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
 	return load_idx;
 }
 
-static unsigned long scale_rt_capacity(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-	u64 total, used, age_stamp, avg;
-	s64 delta;
-
-	/*
-	 * Since we're reading these variables without serialization make sure
-	 * we read them once before doing sanity checks on them.
-	 */
-	age_stamp = READ_ONCE(rq->age_stamp);
-	avg = READ_ONCE(rq->rt_avg);
-	delta = __rq_clock_broken(rq) - age_stamp;
-
-	if (unlikely(delta < 0))
-		delta = 0;
-
-	total = sched_avg_period() + delta;
-
-	used = div_u64(avg, total);
-
-	if (likely(used < SCHED_CAPACITY_SCALE))
-		return SCHED_CAPACITY_SCALE - used;
-
-	return 1;
-}
-
-static void update_cpu_capacity(struct sched_domain *sd, int cpu)
-{
-	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
-	struct sched_group *sdg = sd->groups;
-
-	cpu_rq(cpu)->cpu_capacity_orig = capacity;
-
-	capacity *= scale_rt_capacity(cpu);
-	capacity >>= SCHED_CAPACITY_SHIFT;
-
-	if (!capacity)
-		capacity = 1;
-
-	cpu_rq(cpu)->cpu_capacity = capacity;
-	sdg->sgc->capacity = capacity;
-	sdg->sgc->min_capacity = capacity;
-}
-
-void update_group_capacity(struct sched_domain *sd, int cpu)
-{
-	struct sched_domain *child = sd->child;
-	struct sched_group *group, *sdg = sd->groups;
-	unsigned long capacity, min_capacity;
-	unsigned long interval;
-
-	interval = msecs_to_jiffies(sd->balance_interval);
-	interval = clamp(interval, 1UL, max_load_balance_interval);
-	sdg->sgc->next_update = jiffies + interval;
-
-	if (!child) {
-		update_cpu_capacity(sd, cpu);
-		return;
-	}
-
-	capacity = 0;
-	min_capacity = ULONG_MAX;
-
-	if (child->flags & SD_OVERLAP) {
-		/*
-		 * SD_OVERLAP domains cannot assume that child groups
-		 * span the current group.
-		 */
-
-		for_each_cpu(cpu, sched_group_span(sdg)) {
-			struct sched_group_capacity *sgc;
-			struct rq *rq = cpu_rq(cpu);
-
-			/*
-			 * build_sched_domains() -> init_sched_groups_capacity()
-			 * gets here before we've attached the domains to the
-			 * runqueues.
-			 *
-			 * Use capacity_of(), which is set irrespective of domains
-			 * in update_cpu_capacity().
-			 *
-			 * This avoids capacity from being 0 and
-			 * causing divide-by-zero issues on boot.
-			 */
-			if (unlikely(!rq->sd)) {
-				capacity += capacity_of(cpu);
-			} else {
-				sgc = rq->sd->groups->sgc;
-				capacity += sgc->capacity;
-			}
-
-			min_capacity = min(capacity, min_capacity);
-		}
-	} else  {
-		/*
-		 * !SD_OVERLAP domains can assume that child groups
-		 * span the current group.
-		 */
-
-		group = child->groups;
-		do {
-			struct sched_group_capacity *sgc = group->sgc;
-
-			capacity += sgc->capacity;
-			min_capacity = min(sgc->min_capacity, min_capacity);
-			group = group->next;
-		} while (group != child->groups);
-	}
-
-	sdg->sgc->capacity = capacity;
-	sdg->sgc->min_capacity = min_capacity;
-}
-
 /*
  * Check whether the capacity of the rq has been noticeably reduced by side
  * activity. The imbalance_pct is used for the threshold.
@@ -8575,6 +8466,123 @@ static int active_load_balance_cpu_stop(void *data)
 
 	return 0;
 }
+#endif /* CONFIG_BLD */
+
+static unsigned long scale_rt_capacity(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 total, used, age_stamp, avg;
+	s64 delta;
+
+	/*
+	 * Since we're reading these variables without serialization make sure
+	 * we read them once before doing sanity checks on them.
+	 */
+	age_stamp = READ_ONCE(rq->age_stamp);
+	avg = READ_ONCE(rq->rt_avg);
+	delta = __rq_clock_broken(rq) - age_stamp;
+
+	if (unlikely(delta < 0))
+		delta = 0;
+
+	total = sched_avg_period() + delta;
+
+	used = div_u64(avg, total);
+
+	if (likely(used < SCHED_CAPACITY_SCALE))
+		return SCHED_CAPACITY_SCALE - used;
+
+	return 1;
+}
+
+
+
+static void update_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
+	struct sched_group *sdg = sd->groups;
+
+	cpu_rq(cpu)->cpu_capacity_orig = capacity;
+
+	capacity *= scale_rt_capacity(cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
+
+	if (!capacity)
+		capacity = 1;
+
+	cpu_rq(cpu)->cpu_capacity = capacity;
+	sdg->sgc->capacity = capacity;
+	sdg->sgc->min_capacity = capacity;
+}
+
+void update_group_capacity(struct sched_domain *sd, int cpu)
+{
+	struct sched_domain *child = sd->child;
+	struct sched_group *group, *sdg = sd->groups;
+	unsigned long capacity, min_capacity;
+	unsigned long interval;
+
+	interval = msecs_to_jiffies(sd->balance_interval);
+	interval = clamp(interval, 1UL, max_load_balance_interval);
+	sdg->sgc->next_update = jiffies + interval;
+
+	if (!child) {
+		update_cpu_capacity(sd, cpu);
+		return;
+	}
+
+	capacity = 0;
+	min_capacity = ULONG_MAX;
+
+	if (child->flags & SD_OVERLAP) {
+		/*
+		 * SD_OVERLAP domains cannot assume that child groups
+		 * span the current group.
+		 */
+
+		for_each_cpu(cpu, sched_group_span(sdg)) {
+			struct sched_group_capacity *sgc;
+			struct rq *rq = cpu_rq(cpu);
+
+			/*
+			 * build_sched_domains() -> init_sched_groups_capacity()
+			 * gets here before we've attached the domains to the
+			 * runqueues.
+			 *
+			 * Use capacity_of(), which is set irrespective of domains
+			 * in update_cpu_capacity().
+			 *
+			 * This avoids capacity from being 0 and
+			 * causing divide-by-zero issues on boot.
+			 */
+			if (unlikely(!rq->sd)) {
+				capacity += capacity_of(cpu);
+			} else {
+				sgc = rq->sd->groups->sgc;
+				capacity += sgc->capacity;
+			}
+
+			min_capacity = min(capacity, min_capacity);
+		}
+	} else  {
+		/*
+		 * !SD_OVERLAP domains can assume that child groups
+		 * span the current group.
+		 */
+
+		group = child->groups;
+		do {
+			struct sched_group_capacity *sgc = group->sgc;
+
+			capacity += sgc->capacity;
+			min_capacity = min(sgc->min_capacity, min_capacity);
+			group = group->next;
+		} while (group != child->groups);
+	}
+
+	sdg->sgc->capacity = capacity;
+	sdg->sgc->min_capacity = min_capacity;
+}
 
 static inline int on_null_domain(struct rq *rq)
 {
@@ -8588,12 +8596,27 @@ static inline int on_null_domain(struct rq *rq)
  *   needed, they will kick the idle load balancer, which then does idle
  *   load balancing for all the idle CPUs.
  */
+#ifndef	CONFIG_BLD
 static struct {
 	cpumask_var_t idle_cpus_mask;
 	atomic_t nr_cpus;
 	unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+void nohz_balance_exit_idle(unsigned int cpu)
+{
+	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
+		/*
+		 * Completely isolated CPUs don't ever set, so we must test.
+		 */
+		if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
+			cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
+			atomic_dec(&nohz.nr_cpus);
+		}
+		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
+	}
+}
+
 static inline int find_new_ilb(void)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
@@ -8632,20 +8655,6 @@ static void nohz_balancer_kick(void)
 	return;
 }
 
-void nohz_balance_exit_idle(unsigned int cpu)
-{
-	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
-		/*
-		 * Completely isolated CPUs don't ever set, so we must test.
-		 */
-		if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
-			cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
-			atomic_dec(&nohz.nr_cpus);
-		}
-		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
-	}
-}
-
 static inline void set_cpu_sd_state_busy(void)
 {
 	struct sched_domain *sd;
@@ -8662,23 +8671,7 @@ static inline void set_cpu_sd_state_busy(void)
 unlock:
 	rcu_read_unlock();
 }
-
-void set_cpu_sd_state_idle(void)
-{
-	struct sched_domain *sd;
-	int cpu = smp_processor_id();
-
-	rcu_read_lock();
-	sd = rcu_dereference(per_cpu(sd_llc, cpu));
-
-	if (!sd || sd->nohz_idle)
-		goto unlock;
-	sd->nohz_idle = 1;
-
-	atomic_dec(&sd->shared->nr_busy_cpus);
-unlock:
-	rcu_read_unlock();
-}
+#endif	/* CONFIG_BLD */
 
 /*
  * This routine will record that the cpu is going idle with tick stopped.
@@ -8686,6 +8679,7 @@ void set_cpu_sd_state_idle(void)
  */
 void nohz_balance_enter_idle(int cpu)
 {
+#ifndef	CONFIG_BLD
 	/*
 	 * If this cpu is going down, then nothing needs to be done.
 	 */
@@ -8708,10 +8702,27 @@ void nohz_balance_enter_idle(int cpu)
 	cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
 	atomic_inc(&nohz.nr_cpus);
 	set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
-}
 #endif
+}
 
-static DEFINE_SPINLOCK(balancing);
+void set_cpu_sd_state_idle(void)
+{
+	struct sched_domain *sd;
+	int cpu = smp_processor_id();
+
+	rcu_read_lock();
+	sd = rcu_dereference(per_cpu(sd_llc, cpu));
+
+	if (!sd || sd->nohz_idle)
+		goto unlock;
+	sd->nohz_idle = 1;
+
+	atomic_dec(&sd->shared->nr_busy_cpus);
+unlock:
+	rcu_read_unlock();
+}
+
+#endif  /* NO_HZ_COMMON */
 
 /*
  * Scale the max load_balance interval with the number of CPUs in the system.
@@ -8722,6 +8733,8 @@ void update_max_interval(void)
 	max_load_balance_interval = HZ*num_online_cpus()/10;
 }
 
+#ifndef	CONFIG_BLD
+static DEFINE_SPINLOCK(balancing);
 /*
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -8980,7 +8993,7 @@ static inline bool nohz_kick_needed(struct rq *rq)
 }
 #else
 static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
-#endif
+#endif	/* CONFIG_NO_HZ_COMMON */
 
 /*
  * run_rebalance_domains is triggered when needed from the scheduler tick.
@@ -9020,6 +9033,7 @@ void trigger_load_balance(struct rq *rq)
 		nohz_balancer_kick();
 #endif
 }
+#endif	/* CONFIG_BLD */
 
 static void rq_online_fair(struct rq *rq)
 {
@@ -9530,7 +9544,9 @@ const struct sched_class fair_sched_class = {
 	.put_prev_task		= put_prev_task_fair,
 
 #ifdef CONFIG_SMP
+#ifndef	CONFIG_BLD
 	.select_task_rq		= select_task_rq_fair,
+#endif
 	.migrate_task_rq	= migrate_task_rq_fair,
 
 	.rq_online		= rq_online_fair,
@@ -9591,6 +9607,7 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
 
 __init void init_sched_fair_class(void)
 {
+#ifndef	CONFIG_BLD
 #ifdef CONFIG_SMP
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
@@ -9599,5 +9616,5 @@ __init void init_sched_fair_class(void)
 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
 #endif
 #endif /* SMP */
-
+#endif /* BLD */
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3c96c80..aaa6a7d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1387,6 +1387,7 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
+#ifndef	CONFIG_BLD
 static int
 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
@@ -1442,6 +1443,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 out:
 	return cpu;
 }
+#endif	/* CONFIG_BLD */
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
@@ -2430,7 +2432,9 @@ const struct sched_class rt_sched_class = {
 	.put_prev_task		= put_prev_task_rt,
 
 #ifdef CONFIG_SMP
+#ifndef	CONFIG_BLD
 	.select_task_rq		= select_task_rq_rt,
+#endif
 
 	.set_cpus_allowed       = set_cpus_allowed_common,
 	.rq_online              = rq_online_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3b448ba..024d1e0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -468,9 +468,8 @@ struct cfs_rq {
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
-
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	/*
 	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
 	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
@@ -494,6 +493,11 @@ struct cfs_rq {
 	struct list_head throttled_list;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_BLD
+	struct list_head bld_cfs_list;
+	char pos;
+#endif
 };
 
 static inline int rt_bandwidth_enabled(void)
@@ -539,12 +543,15 @@ struct rt_rq {
 	/* Nests inside the rq lock: */
 	raw_spinlock_t rt_runtime_lock;
 
+	struct rq *rq;
 #ifdef CONFIG_RT_GROUP_SCHED
 	unsigned long rt_nr_boosted;
-
-	struct rq *rq;
 	struct task_group *tg;
 #endif
+#ifdef CONFIG_BLD
+	struct list_head bld_rt_list;
+	int lowbit;
+#endif
 };
 
 /* Deadline class' related fields in a runqueue */
@@ -810,7 +817,7 @@ static inline int cpu_of(struct rq *rq)
 }
 
 
-#ifdef CONFIG_SCHED_SMT
+#if (defined CONFIG_SCHED_SMT) && (!defined CONFIG_BLD)
 
 extern struct static_key_false sched_smt_present;