diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12aa57de8eea..4ec389d7e7c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -452,6 +452,11 @@ struct sched_entity {
 	/* For load-balancing: */
 	struct load_weight		load;
 	struct rb_node			run_node;
+#ifdef CONFIG_CACHY_SCHED
+	struct sched_entity* 		next;
+	struct sched_entity* 		prev;
+	u64				hrrn_start_time;
+#endif
 	struct list_head		group_node;
 	unsigned int			on_rq;
 
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..cfeefb3d5681 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -31,6 +31,11 @@ extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 
+#ifdef CONFIG_CACHY_SCHED
+extern int hrrn_max_lifetime;
+extern int cachy_harsh_mode;
+#endif
+
 enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_NONE,
 	SCHED_TUNABLESCALING_LOG,
diff --git a/init/Kconfig b/init/Kconfig
index 2a5df1cf838c..23c583cc4935 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -822,6 +822,17 @@ config UCLAMP_BUCKETS_COUNT
 
 endmenu
 
+config CACHY_SCHED
+	bool "Cachy CPU scheduler"
+	default y
+	help
+	  The Cachy CPU scheduler that is based on Highest Response Ratio Next
+	  (HRRN) policy.
+
+	  Is designed for desktop usage since it is about responsiveness.
+
+	  If unsure, say Y here.
+
 #
 # For architectures that want to enable the support for NUMA-affine scheduler
 # balancing logic:
@@ -939,6 +950,7 @@ config CGROUP_WRITEBACK
 
 menuconfig CGROUP_SCHED
 	bool "CPU controller"
+	depends on !CACHY_SCHED
 	default n
 	help
 	  This feature lets CPU scheduler recognize task groups and control CPU
@@ -1201,6 +1213,7 @@ config CHECKPOINT_RESTORE
 
 config SCHED_AUTOGROUP
 	bool "Automatic process group scheduling"
+	depends on !CACHY_SCHED
 	select CGROUPS
 	select CGROUP_SCHED
 	select FAIR_GROUP_SCHED
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c4da7e17b906..22538c0307e8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3359,6 +3359,11 @@ void wake_up_new_task(struct task_struct *p)
 	update_rq_clock(rq);
 	post_init_entity_util_avg(p);
 
+#ifdef CONFIG_CACHY_SCHED
+	if (!cachy_harsh_mode)
+		p->se.hrrn_start_time = sched_clock();
+#endif
+
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -7073,6 +7078,10 @@ void __init sched_init(void)
 	BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
 #endif
 
+#ifdef CONFIG_CACHY_SCHED
+	printk(KERN_INFO "Cachy CPU scheduler v5.9-r9 by Hamad Al Marri.");
+#endif
+
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 36c54265bb2b..3c3cff283ab4 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -481,8 +481,12 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
-		spread, rq0_min_vruntime, spread0;
+	s64 MIN_vruntime = -1, 
+#if !defined(CONFIG_CACHY_SCHED)
+	min_vruntime, rq0_min_vruntime,
+	spread0,
+#endif
+	max_vruntime = -1, spread;
 	struct rq *rq = cpu_rq(cpu);
 	struct sched_entity *last;
 	unsigned long flags;
@@ -503,21 +507,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	last = __pick_last_entity(cfs_rq);
 	if (last)
 		max_vruntime = last->vruntime;
+#if !defined(CONFIG_CACHY_SCHED)
 	min_vruntime = cfs_rq->min_vruntime;
 	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
+#endif
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
 			SPLIT_NS(MIN_vruntime));
+#if !defined(CONFIG_CACHY_SCHED)
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
 			SPLIT_NS(min_vruntime));
+#endif
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
 			SPLIT_NS(max_vruntime));
 	spread = max_vruntime - MIN_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
 			SPLIT_NS(spread));
+#if !defined(CONFIG_CACHY_SCHED)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 			SPLIT_NS(spread0));
+#endif
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
 			cfs_rq->nr_spread_over);
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c0c4d9ad7da8..c29ab40d3a7a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,10 @@
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ *  Cachy enhancements CPU cache and scheduler based on
+ *  Highest Response Ratio Next (HRRN) policy.
+ *  (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com>
  */
 #include "sched.h"
 
@@ -38,6 +42,12 @@
 unsigned int sysctl_sched_latency			= 6000000ULL;
 static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
 
+#ifdef CONFIG_CACHY_SCHED
+int hrrn_max_lifetime					= 30000; // in ms
+int cachy_harsh_mode					= 0;
+#define U64_TO_U32(X) ((u32) (((X) >> 8) & 0xFFFFFFFF))
+#endif
+
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -512,7 +522,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
 /**************************************************************
  * Scheduling class tree data structure manipulation methods:
  */
-
+#if !defined(CONFIG_CACHY_SCHED)
 static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
 {
 	s64 delta = (s64)(vruntime - max_vruntime);
@@ -568,7 +578,129 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
 #endif
 }
+#endif /* CONFIG_CACHY_SCHED */
+
+#ifdef CONFIG_CACHY_SCHED
+/*
+ * Does se have higher HRRN value than curr? If yes, return 1,
+ * otherwise return -1
+ * se is before curr if se has higher HRRN
+ */
+static int
+entity_before(u64 now, struct sched_entity *curr, struct sched_entity *se)
+{
+	u32 l_curr, l_se, r_curr, r_se, vr_curr, vr_se;
+	s32 diff;
+
+	/*
+	 * in case of vruntime==0, logical OR with 1 would
+	 * make sure that the least sig. bit is 1
+	 */
+	vr_curr	= U64_TO_U32(curr->vruntime)	| 1;
+	vr_se	= U64_TO_U32(se->vruntime)	| 1;
+
+	l_curr	= U64_TO_U32(now - curr->hrrn_start_time);
+	l_se	= U64_TO_U32(now - se->hrrn_start_time);
+
+	r_curr	= l_curr / vr_curr;
+	r_se	= l_se / vr_se;
+
+	diff	= r_se - r_curr;
+
+	// take the remainder if equal
+	if (diff == 0) {
+		r_curr	= l_curr % vr_curr;
+		r_se	= l_se % vr_se;
+		diff	= r_se - r_curr;
+	}
+
+	if (diff > 0)
+		return 1;
+
+	return -1;
+}
 
+/*
+ * Enqueue an entity
+ */
+static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct sched_entity *iter, *prev = NULL;
+	u64 now = sched_clock();
+	se->next = NULL;
+	se->prev = NULL;
+
+	if (likely(cfs_rq->head)) {
+
+		// start from head
+		iter = cfs_rq->head;
+
+		// does iter have higher HRRN value than se?
+		while (iter && entity_before(now, se, iter) == 1) {
+			prev = iter;
+			iter = iter->next;
+		}
+
+		// if iter == NULL, insert se at the end
+		if (iter == NULL) {
+			prev->next	= se;
+			se->prev	= prev;
+		}
+		// else if not head, insert se before iter
+		else if (iter != cfs_rq->head) {
+			se->next	= iter;
+			se->prev	= prev;
+
+			iter->prev	= se;
+			prev->next	= se;
+		}
+		// else iter == head, insert se at head
+		else {
+			se->next		= cfs_rq->head;
+			cfs_rq->head->prev	= se;
+
+			// lastly reset the head
+			cfs_rq->head = se;
+		}
+		return;
+	}
+
+	// if empty rq
+	cfs_rq->head = se;
+}
+
+static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	// if only one se in rq
+	if (cfs_rq->head->next == NULL)
+	{
+		cfs_rq->head = NULL;
+		WRITE_ONCE(cfs_rq->hrrn_head, 0UL);
+	}
+	else if (se == cfs_rq->head)
+	{
+		// if it is the head
+		cfs_rq->head		= cfs_rq->head->next;
+		cfs_rq->head->prev	= NULL;
+	}
+	else
+	{
+		// if in the middle
+		struct sched_entity *prev = se->prev;
+		struct sched_entity *next = se->next;
+
+		prev->next = next;
+
+		if (next)
+			next->prev = prev;
+	}
+}
+
+struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->head;
+}
+#else
 /*
  * Enqueue an entity into the rb-tree:
  */
@@ -626,16 +758,21 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
 
 	return rb_entry(next, struct sched_entity, run_node);
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 #ifdef CONFIG_SCHED_DEBUG
 struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
+#ifdef CONFIG_CACHY_SCHED
+	return cfs_rq->head;
+#else
 	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
 
 	if (!last)
 		return NULL;
 
 	return rb_entry(last, struct sched_entity, run_node);
+#endif /* CONFIG_CACHY_SCHED */
 }
 
 /**************************************************************
@@ -720,6 +857,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return slice;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * We calculate the vruntime slice of a to-be-inserted task.
  *
@@ -729,6 +867,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 #include "pelt.h"
 #ifdef CONFIG_SMP
@@ -836,13 +975,41 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 }
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_CACHY_SCHED
+static void reset_lifetime(u64 now, struct sched_entity *se)
+{
+	/*
+	 * left shift 20 bits is approximately = * 1000000
+	 * we don't need the precision of life time
+	 * Ex. for 30s, with left shift (20bits) == 31.457s
+	 */
+	u64 max_life_ns	= ((u64) hrrn_max_lifetime) << 20;
+	u64 life_time	= now - se->hrrn_start_time;
+	s64 diff	= life_time - max_life_ns;
+
+	if (unlikely(diff > 0)) {
+		// multiply life_time by 8 for more precision
+		u64 old_hrrn_x8	= life_time / ((se->vruntime >> 3) | 1);
+
+		// reset life to half max_life (i.e ~15s)
+		se->hrrn_start_time = now - (max_life_ns >> 1);
+
+		// avoid division by zero
+		if (old_hrrn_x8 == 0) old_hrrn_x8 = 1;
+
+		// reset vruntime based on old hrrn ration
+		se->vruntime = (max_life_ns << 2) / old_hrrn_x8;
+	}
+}
+#endif /* CONFIG_CACHY_SCHED */
+
 /*
  * Update the current task's runtime statistics.
  */
 static void update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
-	u64 now = rq_clock_task(rq_of(cfs_rq));
+	u64 now = sched_clock();
 	u64 delta_exec;
 
 	if (unlikely(!curr))
@@ -861,12 +1028,20 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
+
+#ifdef CONFIG_CACHY_SCHED
+	reset_lifetime(sched_clock(), curr);
+#else
 	update_min_vruntime(cfs_rq);
+#endif
 
 	if (entity_is_task(curr)) {
 		struct task_struct *curtask = task_of(curr);
 
+#if !defined(CONFIG_CACHY_SCHED)
 		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
+#endif
+
 		cgroup_account_cputime(curtask, delta_exec);
 		account_group_exec_runtime(curtask, delta_exec);
 	}
@@ -882,6 +1057,7 @@ static void update_curr_fair(struct rq *rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	u64 wait_start, prev_wait_start;
 
 	if (!schedstat_enabled())
@@ -895,11 +1071,13 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		wait_start -= prev_wait_start;
 
 	__schedstat_set(se->statistics.wait_start, wait_start);
+#endif
 }
 
 static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct task_struct *p;
 	u64 delta;
 
@@ -927,11 +1105,13 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	__schedstat_inc(se->statistics.wait_count);
 	__schedstat_add(se->statistics.wait_sum, delta);
 	__schedstat_set(se->statistics.wait_start, 0);
+#endif
 }
 
 static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct task_struct *tsk = NULL;
 	u64 sleep_start, block_start;
 
@@ -995,6 +1175,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			account_scheduler_latency(tsk, delta >> 10, 0);
 		}
 	}
+#endif
 }
 
 /*
@@ -1003,6 +1184,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	if (!schedstat_enabled())
 		return;
 
@@ -1015,12 +1197,13 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (flags & ENQUEUE_WAKEUP)
 		update_stats_enqueue_sleeper(cfs_rq, se);
+#endif
 }
 
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-
+#if !defined(CONFIG_CACHY_SCHED)
 	if (!schedstat_enabled())
 		return;
 
@@ -1041,6 +1224,7 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 			__schedstat_set(se->statistics.block_start,
 				      rq_clock(rq_of(cfs_rq)));
 	}
+#endif
 }
 
 /*
@@ -1052,7 +1236,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	/*
 	 * We are starting a new run period:
 	 */
-	se->exec_start = rq_clock_task(rq_of(cfs_rq));
+	se->exec_start = sched_clock();
 }
 
 /**************************************************
@@ -3060,15 +3244,19 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	cfs_rq->avg.load_avg += se->avg.load_avg;
 	cfs_rq->avg.load_sum += se_weight(se) * se->avg.load_sum;
+#endif
 }
 
 static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
 	sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
+#endif
 }
 #else
 static inline void
@@ -3324,6 +3512,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 void set_task_rq_fair(struct sched_entity *se,
 		      struct cfs_rq *prev, struct cfs_rq *next)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	u64 p_last_update_time;
 	u64 n_last_update_time;
 
@@ -3363,6 +3552,7 @@ void set_task_rq_fair(struct sched_entity *se,
 #endif
 	__update_load_avg_blocked_se(p_last_update_time, se);
 	se->avg.last_update_time = n_last_update_time;
+#endif
 }
 
 
@@ -3642,6 +3832,9 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 {
+#ifdef CONFIG_CACHY_SCHED
+	return 0;
+#else
 	unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0;
 	struct sched_avg *sa = &cfs_rq->avg;
 	int decayed = 0;
@@ -3687,8 +3880,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 #endif
 
 	return decayed;
+#endif
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /**
  * attach_entity_load_avg - attach this entity to its cfs_rq load avg
  * @cfs_rq: cfs_rq to attach to
@@ -3766,6 +3961,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
 	trace_pelt_cfs_tp(cfs_rq);
 }
+#endif
 
 /*
  * Optional action to be done while updating the load average
@@ -3777,6 +3973,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 /* Update task and its cfs_rq load average */
 static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	u64 now = cfs_rq_clock_pelt(cfs_rq);
 	int decayed;
 
@@ -3808,8 +4005,10 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 		if (flags & UPDATE_TG)
 			update_tg_load_avg(cfs_rq, 0);
 	}
+#endif
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 #ifndef CONFIG_64BIT
 static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
 {
@@ -3830,6 +4029,7 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
 	return cfs_rq->avg.last_update_time;
 }
 #endif
+#endif
 
 /*
  * Synchronize entity load avg of dequeued entity without locking
@@ -3837,11 +4037,13 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
  */
 static void sync_entity_load_avg(struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 last_update_time;
 
 	last_update_time = cfs_rq_last_update_time(cfs_rq);
 	__update_load_avg_blocked_se(last_update_time, se);
+#endif
 }
 
 /*
@@ -3850,6 +4052,7 @@ static void sync_entity_load_avg(struct sched_entity *se)
  */
 static void remove_entity_load_avg(struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	unsigned long flags;
 
@@ -3867,6 +4070,7 @@ static void remove_entity_load_avg(struct sched_entity *se)
 	cfs_rq->removed.load_avg	+= se->avg.load_avg;
 	cfs_rq->removed.runnable_avg	+= se->avg.runnable_avg;
 	raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
+#endif
 }
 
 static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
@@ -4089,7 +4293,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 
 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-#ifdef CONFIG_SCHED_DEBUG
+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACHY_SCHED)
 	s64 d = se->vruntime - cfs_rq->min_vruntime;
 
 	if (d < 0)
@@ -4100,6 +4304,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 #endif
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
@@ -4133,9 +4338,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+#endif /* CONFIG_CACHY_SCHED */
 
 static inline void check_schedstat_required(void)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 #ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled())
 		return;
@@ -4152,6 +4359,7 @@ static inline void check_schedstat_required(void)
 			     "kernel.sched_schedstats=1\n");
 	}
 #endif
+#endif
 }
 
 static inline bool cfs_bandwidth_used(void);
@@ -4189,18 +4397,23 @@ static inline bool cfs_bandwidth_used(void);
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
+#endif
 	bool curr = cfs_rq->curr == se;
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * If we're the current task, we must renormalise before calling
 	 * update_curr().
 	 */
 	if (renorm && curr)
 		se->vruntime += cfs_rq->min_vruntime;
+#endif
 
 	update_curr(cfs_rq);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * Otherwise, renormalise after, such that we're placed at the current
 	 * moment in time, instead of some random moment in the past. Being
@@ -4209,6 +4422,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	if (renorm && !curr)
 		se->vruntime += cfs_rq->min_vruntime;
+#endif
 
 	/*
 	 * When enqueuing a sched_entity, we must:
@@ -4223,8 +4437,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_cfs_group(se);
 	account_entity_enqueue(cfs_rq, se);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
+#endif
 
 	check_schedstat_required();
 	update_stats_enqueue(cfs_rq, se, flags);
@@ -4233,6 +4449,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * When bandwidth control is enabled, cfs might have been removed
 	 * because of a parent been throttled but cfs->nr_running > 1. Try to
@@ -4243,8 +4460,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (cfs_rq->nr_running == 1)
 		check_enqueue_throttle(cfs_rq);
+#endif
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void __clear_buddies_last(struct sched_entity *se)
 {
 	for_each_sched_entity(se) {
@@ -4289,6 +4508,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if (cfs_rq->skip == se)
 		__clear_buddies_skip(se);
 }
+#endif // !CONFIG_CACHY_SCHED
 
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
 
@@ -4313,13 +4533,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	update_stats_dequeue(cfs_rq, se, flags);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	clear_buddies(cfs_rq, se);
+#endif
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * Normalize after update_curr(); which will also have moved
 	 * min_vruntime if @se is the one holding it back. But before doing
@@ -4328,12 +4551,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	if (!(flags & DEQUEUE_SLEEP))
 		se->vruntime -= cfs_rq->min_vruntime;
+#endif
 
 	/* return excess runtime on last dequeue */
 	return_cfs_rq_runtime(cfs_rq);
 
 	update_cfs_group(se);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * Now advance min_vruntime if @se was the entity holding it back,
 	 * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
@@ -4342,8 +4567,35 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
 		update_min_vruntime(cfs_rq);
+#endif
+}
+
+#ifdef CONFIG_CACHY_SCHED
+static inline u32
+hrrn_of(u64 now, struct sched_entity *se)
+{
+	u32 l_se, r_se, vr_se;
+
+	vr_se	= U64_TO_U32(se->vruntime)	| 1;
+	l_se	= U64_TO_U32(now - se->hrrn_start_time);
+	r_se	= l_se / vr_se;
+
+	return r_se;
 }
 
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void
+check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+{
+	u64 now	= sched_clock();
+
+	// does head have higher HRRN value than curr
+	if (entity_before(now, curr, cfs_rq->head) == 1)
+		resched_curr(rq_of(cfs_rq));
+}
+#else
 /*
  * Preempt the current task with a newly woken task if needed:
  */
@@ -4383,6 +4635,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	if (delta > ideal_runtime)
 		resched_curr(rq_of(cfs_rq));
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 static void
 set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -4417,6 +4670,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
+#ifdef CONFIG_CACHY_SCHED
+static struct sched_entity *
+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+{
+	struct sched_entity *se = cfs_rq->head;
+	u64 now = sched_clock();
+
+	if (unlikely(!se))
+		se = curr;
+	else if (unlikely(curr && entity_before(now, se, curr) == 1))
+		se = curr;
+
+	return se;
+}
+#else
 static int
 wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
 
@@ -4477,6 +4745,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 
 	return se;
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
 
@@ -4736,6 +5005,9 @@ static int tg_throttle_down(struct task_group *tg, void *data)
 
 static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 {
+#ifdef CONFIG_CACHY_SCHED
+	return false;
+#else
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
@@ -4801,10 +5073,12 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	cfs_rq->throttled = 1;
 	cfs_rq->throttled_clock = rq_clock(rq);
 	return true;
+#endif
 }
 
 void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
@@ -4886,6 +5160,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	/* Determine whether we need to wake up potentially idle CPU: */
 	if (rq->curr == rq->idle && rq->cfs.nr_running)
 		resched_curr(rq);
+#endif
 }
 
 static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
@@ -5341,7 +5616,11 @@ static inline bool cfs_bandwidth_used(void)
 
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
+
+#if !defined(CONFIG_CACHY_SCHED)
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
+#endif
+
 static inline void sync_throttle(struct task_group *tg, int cpu) {}
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 
@@ -5472,7 +5751,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+#if !defined(CONFIG_CACHY_SCHED)
 	int idle_h_nr_running = task_has_idle_policy(p);
+#endif
 	int task_new = !(flags & ENQUEUE_WAKEUP);
 
 	/*
@@ -5491,6 +5772,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (p->in_iowait)
 		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
 
+#ifdef CONFIG_CACHY_SCHED
+	if (!se->on_rq) {
+		cfs_rq = cfs_rq_of(se);
+		enqueue_entity(cfs_rq, se, flags);
+		cfs_rq->h_nr_running++;
+	}
+#else
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
@@ -5528,6 +5816,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                if (throttled_hierarchy(cfs_rq))
                        list_add_leaf_cfs_rq(cfs_rq);
 	}
+#endif
 
 	/* At this point se is NULL and we are at root level*/
 	add_nr_running(rq, 1);
@@ -5549,6 +5838,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!task_new)
 		update_overutilized_status(rq);
 
+#if !defined(CONFIG_CACHY_SCHED)
 enqueue_throttle:
 	if (cfs_bandwidth_used()) {
 		/*
@@ -5564,13 +5854,15 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 				break;
 		}
 	}
-
+#endif
 	assert_list_leaf_cfs_rq(rq);
 
 	hrtick_update(rq);
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void set_next_buddy(struct sched_entity *se);
+#endif
 
 /*
  * The dequeue_task method is called before nr_running is
@@ -5582,6 +5874,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 	int task_sleep = flags & DEQUEUE_SLEEP;
+
+#ifdef CONFIG_CACHY_SCHED
+	cfs_rq = cfs_rq_of(se);
+	dequeue_entity(cfs_rq, se, flags);
+	cfs_rq->h_nr_running--;
+#else
 	int idle_h_nr_running = task_has_idle_policy(p);
 	bool was_sched_idle = sched_idle_rq(rq);
 
@@ -5600,12 +5898,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq->load.weight) {
 			/* Avoid re-evaluating load for this entity: */
 			se = parent_entity(se);
+
 			/*
 			 * Bias pick_next to pick a task from this cfs_rq, as
 			 * p is sleeping when it is within its sched_slice.
 			 */
 			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
 				set_next_buddy(se);
+
 			break;
 		}
 		flags |= DEQUEUE_SLEEP;
@@ -5626,15 +5926,17 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			goto dequeue_throttle;
 
 	}
-
+#endif
 	/* At this point se is NULL and we are at root level*/
 	sub_nr_running(rq, 1);
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/* balance early to pull high priority tasks */
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
 		rq->next_balance = jiffies;
 
 dequeue_throttle:
+#endif
 	util_est_dequeue(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -5721,6 +6023,7 @@ static unsigned long capacity_of(int cpu)
 	return cpu_rq(cpu)->cpu_capacity;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void record_wakee(struct task_struct *p)
 {
 	/*
@@ -5767,7 +6070,9 @@ static int wake_wide(struct task_struct *p)
 		return 0;
 	return 1;
 }
+#endif
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * The purpose of wake_affine() is to quickly determine on which CPU we can run
  * soonest. For the purpose of speed we only consider the waking and previous
@@ -5866,6 +6171,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 	schedstat_inc(p->se.statistics.nr_wakeups_affine);
 	return target;
 }
+#endif
 
 static struct sched_group *
 find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
@@ -6424,6 +6730,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 	return min_t(unsigned long, util, capacity_orig_of(cpu));
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
  * to @dst_cpu.
@@ -6657,6 +6964,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 	return -1;
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
@@ -6673,6 +6981,11 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 static int
 select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
 {
+#ifdef CONFIG_CACHY_SCHED
+	int new_cpu = prev_cpu;
+
+	return select_idle_sibling(p, prev_cpu, new_cpu);
+#else
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
 	int new_cpu = prev_cpu;
@@ -6727,9 +7040,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	rcu_read_unlock();
 
 	return new_cpu;
+#endif /* CONFIG_CACHY_SCHED */
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void detach_entity_cfs_rq(struct sched_entity *se);
+#endif
 
 /*
  * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
@@ -6738,6 +7054,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
  */
 static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * As blocked tasks retain absolute vruntime the migration needs to
 	 * deal with this by subtracting the old and adding the new
@@ -6783,6 +7100,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 		 */
 		remove_entity_load_avg(&p->se);
 	}
+#endif /* CONFIG_CACHY_SCHED */
 
 	/* Tell new CPU we are migrated */
 	p->se.avg.last_update_time = 0;
@@ -6808,6 +7126,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 }
 #endif /* CONFIG_SMP */
 
+#if !defined(CONFIG_CACHY_SCHED)
 static unsigned long wakeup_gran(struct sched_entity *se)
 {
 	unsigned long gran = sysctl_sched_wakeup_granularity;
@@ -6886,6 +7205,7 @@ static void set_skip_buddy(struct sched_entity *se)
 	for_each_sched_entity(se)
 		cfs_rq_of(se)->skip = se;
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 /*
  * Preempt the current task with a newly woken task if needed:
@@ -6894,9 +7214,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_entity *se = &curr->se, *pse = &p->se;
+
+#if !defined(CONFIG_CACHY_SCHED)
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	int scale = cfs_rq->nr_running >= sched_nr_latency;
 	int next_buddy_marked = 0;
+#endif /* CONFIG_CACHY_SCHED */
 
 	if (unlikely(se == pse))
 		return;
@@ -6910,10 +7233,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
 		return;
 
+#if !defined(CONFIG_CACHY_SCHED)
 	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
 		set_next_buddy(pse);
 		next_buddy_marked = 1;
 	}
+#endif /* CONFIG_CACHY_SCHED */
 
 	/*
 	 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -6943,6 +7268,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	find_matching_se(&se, &pse);
 	update_curr(cfs_rq_of(se));
 	BUG_ON(!pse);
+
+#ifdef CONFIG_CACHY_SCHED
+	if (entity_before(sched_clock(), se, pse) == 1)
+		goto preempt;
+#else
 	if (wakeup_preempt_entity(se, pse) == 1) {
 		/*
 		 * Bias pick_next to pick the sched entity that is
@@ -6952,11 +7282,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 			set_next_buddy(pse);
 		goto preempt;
 	}
+#endif /* CONFIG_CACHY_SCHED */
 
 	return;
 
 preempt:
 	resched_curr_lazy(rq);
+
+#if !defined(CONFIG_CACHY_SCHED)
 	/*
 	 * Only set the backward buddy when the current task is still
 	 * on the rq. This can happen when a wakeup gets interleaved
@@ -6971,6 +7304,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 
 	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
 		set_last_buddy(se);
+#endif /* CONFIG_CACHY_SCHED */
 }
 
 struct task_struct *
@@ -7066,11 +7400,23 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 	if (prev)
 		put_prev_task(rq, prev);
 
+#ifdef CONFIG_CACHY_SCHED
+	se = pick_next_entity(cfs_rq, NULL);
+	set_next_entity(cfs_rq, se);
+
+	if (cfs_rq->head) {
+		u32 hrrn = hrrn_of(sched_clock(), cfs_rq->head);
+		WRITE_ONCE(cfs_rq->hrrn_head, hrrn);
+	} else {
+		WRITE_ONCE(cfs_rq->hrrn_head, 0UL);
+	}
+#else
 	do {
 		se = pick_next_entity(cfs_rq, NULL);
 		set_next_entity(cfs_rq, se);
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
+#endif
 
 	p = task_of(se);
 
@@ -7092,6 +7438,10 @@ done: __maybe_unused;
 	return p;
 
 idle:
+#ifdef CONFIG_CACHY_SCHED
+	WRITE_ONCE(cfs_rq->hrrn_head, 0UL);
+#endif
+
 	if (!rf)
 		return NULL;
 
@@ -7145,7 +7495,10 @@ static void yield_task_fair(struct rq *rq)
 {
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+
+#if !defined(CONFIG_CACHY_SCHED)
 	struct sched_entity *se = &curr->se;
+#endif
 
 	/*
 	 * Are we the only task in the tree?
@@ -7153,7 +7506,9 @@ static void yield_task_fair(struct rq *rq)
 	if (unlikely(rq->nr_running == 1))
 		return;
 
+#if !defined(CONFIG_CACHY_SCHED)
 	clear_buddies(cfs_rq, se);
+#endif
 
 	if (curr->policy != SCHED_BATCH) {
 		update_rq_clock(rq);
@@ -7169,7 +7524,9 @@ static void yield_task_fair(struct rq *rq)
 		rq_clock_skip_update(rq);
 	}
 
+#if !defined(CONFIG_CACHY_SCHED)
 	set_skip_buddy(se);
+#endif
 }
 
 static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
@@ -7180,8 +7537,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
 	if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
 		return false;
 
+#if !defined(CONFIG_CACHY_SCHED)
 	/* Tell the scheduler that we'd really like pse to run next. */
 	set_next_buddy(se);
+#endif
 
 	yield_task_fair(rq);
 
@@ -7390,6 +7749,7 @@ struct lb_env {
 	struct list_head	tasks;
 };
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * Is this task likely cache-hot:
  */
@@ -7737,7 +8097,9 @@ static int detach_tasks(struct lb_env *env)
 
 	return detached;
 }
+#endif
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * attach_task() -- attach the task detached by detach_task() to its new rq.
  */
@@ -7786,6 +8148,7 @@ static void attach_tasks(struct lb_env *env)
 
 	rq_unlock(env->dst_rq, &rf);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_COMMON
 static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
@@ -7831,6 +8194,7 @@ static inline bool others_have_blocked(struct rq *rq) { return false; }
 static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
 #endif
 
+#if !defined(CONFIG_CACHY_SCHED)
 static bool __update_blocked_others(struct rq *rq, bool *done)
 {
 	const struct sched_class *curr_class;
@@ -7856,6 +8220,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 	return decayed;
 }
+#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
@@ -7963,6 +8328,7 @@ static unsigned long task_h_load(struct task_struct *p)
 			cfs_rq_load_avg(cfs_rq) + 1);
 }
 #else
+#if !defined(CONFIG_CACHY_SCHED)
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
 	struct cfs_rq *cfs_rq = &rq->cfs;
@@ -7974,6 +8340,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
 
 	return decayed;
 }
+#endif
 
 static unsigned long task_h_load(struct task_struct *p)
 {
@@ -7983,6 +8350,7 @@ static unsigned long task_h_load(struct task_struct *p)
 
 static void update_blocked_averages(int cpu)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	bool decayed = false, done = true;
 	struct rq *rq = cpu_rq(cpu);
 	struct rq_flags rf;
@@ -7997,6 +8365,7 @@ static void update_blocked_averages(int cpu)
 	if (decayed)
 		cpufreq_update_util(rq, 0);
 	rq_unlock_irqrestore(rq, &rf);
+#endif
 }
 
 /********** Helpers for find_busiest_group ************************/
@@ -9140,6 +9509,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
  *            different in groups.
  */
 
+#if !defined(CONFIG_CACHY_SCHED)
 /**
  * find_busiest_group - Returns the busiest group within the sched_domain
  * if there is an imbalance.
@@ -9405,6 +9775,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 
 	return busiest;
 }
+#endif
 
 /*
  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
@@ -9451,6 +9822,7 @@ voluntary_active_balance(struct lb_env *env)
 	return 0;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 static int need_active_balance(struct lb_env *env)
 {
 	struct sched_domain *sd = env->sd;
@@ -9463,6 +9835,7 @@ static int need_active_balance(struct lb_env *env)
 
 static int active_load_balance_cpu_stop(void *data);
 
+
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
@@ -9772,6 +10145,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 out:
 	return ld_moved;
 }
+#endif
 
 static inline unsigned long
 get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
@@ -9801,6 +10175,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
 		*next_balance = next;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
  * running tasks off the busiest CPU onto idle CPUs. It requires at
@@ -9892,6 +10267,7 @@ static int active_load_balance_cpu_stop(void *data)
 }
 
 static DEFINE_SPINLOCK(balancing);
+#endif
 
 /*
  * Scale the max load_balance interval with the number of CPUs in the system.
@@ -9902,6 +10278,7 @@ void update_max_interval(void)
 	max_load_balance_interval = HZ*num_online_cpus()/10;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -10007,6 +10384,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 #endif
 	}
 }
+#endif
 
 static inline int on_null_domain(struct rq *rq)
 {
@@ -10036,6 +10414,7 @@ static inline int find_new_ilb(void)
 	return nr_cpu_ids;
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * Kick a CPU to do the nohz balancing, if it is time for it. We pick any
  * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
@@ -10186,6 +10565,7 @@ static void nohz_balancer_kick(struct rq *rq)
 	if (flags)
 		kick_ilb(flags);
 }
+#endif
 
 static void set_cpu_sd_state_busy(int cpu)
 {
@@ -10293,6 +10673,7 @@ void nohz_balance_enter_idle(int cpu)
 	WRITE_ONCE(nohz.has_blocked, 1);
 }
 
+#if !defined(CONFIG_CACHY_SCHED)
 /*
  * Internal function that runs load balance for all idle cpus. The load balance
  * can be a simple update of blocked load or a complete load balance with
@@ -10453,8 +10834,10 @@ static void nohz_newidle_balance(struct rq *this_rq)
 		kick_ilb(NOHZ_STATS_KICK);
 	raw_spin_lock(&this_rq->lock);
 }
+#endif
 
 #else /* !CONFIG_NO_HZ_COMMON */
+#if !defined(CONFIG_CACHY_SCHED)
 static inline void nohz_balancer_kick(struct rq *rq) { }
 
 static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
@@ -10463,8 +10846,111 @@ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle
 }
 
 static inline void nohz_newidle_balance(struct rq *this_rq) { }
+#endif
+
 #endif /* CONFIG_NO_HZ_COMMON */
 
+#ifdef CONFIG_CACHY_SCHED
+static int
+cachy_can_migrate_task(struct task_struct *p, int dst_cpu, struct rq *src_rq)
+{
+	/*
+	 * We do not migrate tasks that are:
+	 * 1) throttled_lb_pair, or
+	 * 2) cannot be migrated to this CPU due to cpus_ptr, or
+	 * 3) running (obviously), or
+	 */
+
+	if (task_running(src_rq, p))
+		return 0;
+
+	if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr))
+		return 0;
+
+	if (p->se.exec_start == 0)
+		return 0;
+
+	return 1;
+}
+
+static void pull_from_unlock(struct rq *this_rq,
+			     struct rq *src_rq,
+			     struct rq_flags *rf,
+			     struct task_struct *p,
+			     int dst_cpu)
+{
+	// detach task
+	deactivate_task(src_rq, p, DEQUEUE_NOCLOCK);
+	set_task_cpu(p, dst_cpu);
+
+	// unlock src rq
+	rq_unlock(src_rq, rf);
+	local_irq_restore(rf->flags);
+
+	// lock this rq
+	raw_spin_lock(&this_rq->lock);
+	update_rq_clock(this_rq);
+
+	activate_task(this_rq, p, ENQUEUE_NOCLOCK);
+	check_preempt_curr(this_rq, p, 0);
+
+	// unlock this rq
+	raw_spin_unlock(&this_rq->lock);
+}
+
+static inline struct rq *
+find_max_hrrn_rq(struct cfs_rq *cfs_rq, int dst_cpu)
+{
+	struct rq *tmp_rq, *max_rq = NULL;
+	int cpu;
+	u32 max_hrrn = cfs_rq->hrrn_head;
+	u32 local_hrrn;
+
+	// find max hrrn
+	for_each_online_cpu(cpu) {
+		if (cpu == dst_cpu)
+			continue;
+
+		tmp_rq = cpu_rq(cpu);
+
+		if (tmp_rq->cfs.nr_running < 2 || !tmp_rq->cfs.head)
+			continue;
+
+		local_hrrn = READ_ONCE(tmp_rq->cfs.hrrn_head);
+
+		if (local_hrrn > max_hrrn) {
+			max_hrrn = local_hrrn;
+			max_rq = tmp_rq;
+		}
+	}
+
+	return max_rq;
+}
+
+static int try_pull_from(struct rq *src_rq, struct rq *this_rq)
+{
+	struct rq_flags rf;
+	int dst_cpu = cpu_of(this_rq);
+	struct task_struct *p;
+
+	rq_lock_irqsave(src_rq, &rf);
+	update_rq_clock(src_rq);
+
+	if (src_rq->cfs.head && src_rq->cfs.nr_running > 1) {
+		p = task_of(src_rq->cfs.head);
+
+		if (cachy_can_migrate_task(p, dst_cpu, src_rq)) {
+			pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
+			return 1;
+		}
+	}
+
+	rq_unlock(src_rq, &rf);
+	local_irq_restore(rf.flags);
+
+	return 0;
+}
+
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
@@ -10475,6 +10961,105 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
  *   > 0 - success, new (fair) tasks present
  */
 static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+{
+	int this_cpu = this_rq->cpu;
+	struct task_struct *p = NULL;
+	struct rq *src_rq;
+	int src_cpu;
+	struct rq_flags src_rf;
+	int pulled_task = 0;
+	int cores_round = 1;
+
+	update_misfit_status(NULL, this_rq);
+	/*
+	 * We must set idle_stamp _before_ calling idle_balance(), such that we
+	 * measure the duration of idle_balance() as idle time.
+	 */
+	this_rq->idle_stamp = rq_clock(this_rq);
+
+	/*
+	 * Do not pull tasks towards !active CPUs...
+	 */
+	if (!cpu_active(this_cpu))
+		return 0;
+
+	/*
+	 * This is OK, because current is on_cpu, which avoids it being picked
+	 * for load-balance and preemption/IRQs are still disabled avoiding
+	 * further scheduler activity on it and we're being very careful to
+	 * re-start the picking loop.
+	 */
+	rq_unpin_lock(this_rq, rf);
+	raw_spin_unlock(&this_rq->lock);
+
+again:
+	for_each_online_cpu(src_cpu) {
+
+		if (src_cpu == this_cpu)
+			continue;
+
+		if (cores_round && !cpus_share_cache(src_cpu, this_cpu))
+			continue;
+
+		src_rq = cpu_rq(src_cpu);
+
+		rq_lock_irqsave(src_rq, &src_rf);
+		update_rq_clock(src_rq);
+
+		if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head))
+			goto next;
+
+		p = task_of(src_rq->cfs.head);
+
+		if (cachy_can_migrate_task(p, this_cpu, src_rq)) {
+			pull_from_unlock(this_rq, src_rq, &src_rf, p, this_cpu);
+
+			pulled_task = 1;
+			goto out;
+		}
+
+next:
+		rq_unlock(src_rq, &src_rf);
+		local_irq_restore(src_rf.flags);
+
+		/*
+		 * Stop searching for tasks to pull if there are
+		 * now runnable tasks on this rq.
+		 */
+		if (pulled_task || this_rq->nr_running > 0)
+			goto out;
+	}
+
+	if (cores_round) {
+		// now search for all cpus
+		cores_round = 0;
+		goto again;
+	}
+
+out:
+	raw_spin_lock(&this_rq->lock);
+
+	/*
+	 * While browsing the domains, we released the rq lock, a task could
+	 * have been enqueued in the meantime. Since we're not going idle,
+	 * pretend we pulled a task.
+	 */
+	if (this_rq->cfs.h_nr_running && !pulled_task)
+		pulled_task = 1;
+
+	/* Is there a task of a high priority class? */
+	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
+		pulled_task = -1;
+
+	if (pulled_task)
+		this_rq->idle_stamp = 0;
+
+	rq_repin_lock(this_rq, rf);
+
+	return pulled_task;
+}
+#else
+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 {
 	unsigned long next_balance = jiffies + HZ;
 	int this_cpu = this_rq->cpu;
@@ -10625,6 +11210,159 @@ void trigger_load_balance(struct rq *rq)
 
 	nohz_balancer_kick(rq);
 }
+#endif
+
+#ifdef CONFIG_CACHY_SCHED
+static int
+idle_try_pull_any(struct cfs_rq *cfs_rq)
+{
+	struct task_struct *p = NULL;
+	struct rq *this_rq = rq_of(cfs_rq), *src_rq;
+	int dst_cpu = cpu_of(this_rq);
+	int src_cpu;
+	struct rq_flags rf;
+	int pulled = 0;
+	int cores_round = 1;
+
+again:
+	for_each_online_cpu(src_cpu) {
+
+		if (src_cpu == dst_cpu)
+			continue;
+
+		if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
+			continue;
+
+		src_rq = cpu_rq(src_cpu);
+
+		rq_lock_irqsave(src_rq, &rf);
+		update_rq_clock(src_rq);
+
+		if (src_rq->cfs.nr_running < 2 || !src_rq->cfs.head)
+			goto next;
+
+		p = task_of(src_rq->cfs.head);
+
+		if (cachy_can_migrate_task(p, dst_cpu, src_rq)) {
+			pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
+			pulled = 1;
+			goto out;
+		}
+
+next:
+		rq_unlock(src_rq, &rf);
+		local_irq_restore(rf.flags);
+	}
+
+	if (cores_round) {
+		// now search for all cpus
+		cores_round = 0;
+		goto again;
+	}
+
+out:
+	return pulled;
+}
+
+
+static int
+try_pull_higher_HRRN(struct cfs_rq *cfs_rq)
+{
+	struct rq *this_rq = rq_of(cfs_rq), *max_rq;
+	int dst_cpu = cpu_of(this_rq);
+
+	max_rq = find_max_hrrn_rq(cfs_rq, dst_cpu);
+
+	if (!max_rq)
+		return 0;
+
+	if (try_pull_from(max_rq, this_rq))
+		return 1;
+
+	return 0;
+}
+
+static void try_pull_any(struct rq *this_rq)
+{
+	struct task_struct *p = NULL;
+	struct rq *src_rq;
+	int dst_cpu = cpu_of(this_rq);
+	int src_cpu;
+	struct rq_flags src_rf;
+	int cores_round = 1;
+
+again:
+	for_each_online_cpu(src_cpu) {
+
+		if (src_cpu == dst_cpu)
+			continue;
+
+		if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
+			continue;
+
+		src_rq = cpu_rq(src_cpu);
+
+		if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head)
+			|| src_rq->cfs.nr_running <= this_rq->cfs.nr_running)
+			continue;
+
+		rq_lock_irqsave(src_rq, &src_rf);
+		update_rq_clock(src_rq);
+
+		if (src_rq->cfs.nr_running < 2 || !(src_rq->cfs.head)
+			|| src_rq->cfs.nr_running <= this_rq->cfs.nr_running)
+			goto next;
+
+		p = task_of(src_rq->cfs.head);
+
+		if (cachy_can_migrate_task(p, dst_cpu, src_rq)) {
+			pull_from_unlock(this_rq, src_rq, &src_rf, p, dst_cpu);
+			return;
+		}
+
+next:
+		rq_unlock(src_rq, &src_rf);
+		local_irq_restore(src_rf.flags);
+	}
+
+	if (cores_round) {
+		// now search for all cpus
+		cores_round = 0;
+		goto again;
+	}
+}
+
+static inline void
+active_balance(struct rq *rq)
+{
+	struct cfs_rq *cfs_rq = &rq->cfs;
+
+	if (!cfs_rq->head || cfs_rq->nr_running < 2)
+		try_pull_higher_HRRN(&rq->cfs);
+	else
+		try_pull_any(rq);
+}
+
+void trigger_load_balance(struct rq *rq)
+{
+	//int pulled = 0;
+
+	/* Don't need to rebalance while attached to NULL domain */
+	if (unlikely(on_null_domain(rq)))
+		return;
+
+	if (rq->idle_balance) {
+		//pulled = idle_try_pull_any(&rq->cfs);
+		idle_try_pull_any(&rq->cfs);
+
+		//if (pulled)
+			//resched_curr(rq);
+	}
+	else {
+		active_balance(rq);
+	}
+}
+#endif
 
 static void rq_online_fair(struct rq *rq)
 {
@@ -10668,12 +11406,31 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	update_overutilized_status(task_rq(curr));
 }
 
+#ifdef CONFIG_CACHY_SCHED
 /*
  * called on fork with the child task as argument from the parent's context
  *  - child not yet on the tasklist
  *  - preemption disabled
  */
 static void task_fork_fair(struct task_struct *p)
+{
+	struct cfs_rq *cfs_rq;
+	struct sched_entity *curr;
+	struct rq *rq = this_rq();
+	struct rq_flags rf;
+
+	rq_lock(rq, &rf);
+	update_rq_clock(rq);
+
+	cfs_rq = task_cfs_rq(current);
+	curr = cfs_rq->curr;
+	if (curr)
+		update_curr(cfs_rq);
+
+	rq_unlock(rq, &rf);
+}
+#else
+static void task_fork_fair(struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se, *curr;
@@ -10703,6 +11460,7 @@ static void task_fork_fair(struct task_struct *p)
 	se->vruntime -= cfs_rq->min_vruntime;
 	rq_unlock(rq, &rf);
 }
+#endif /* CONFIG_CACHY_SCHED */
 
 /*
  * Priority of the task has changed. Check to see if we preempt
@@ -10779,9 +11537,12 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
 	}
 }
 #else
+#if !defined(CONFIG_CACHY_SCHED)
 static void propagate_entity_cfs_rq(struct sched_entity *se) { }
 #endif
+#endif
 
+#if !defined(CONFIG_CACHY_SCHED)
 static void detach_entity_cfs_rq(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -10792,9 +11553,11 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
 	update_tg_load_avg(cfs_rq, false);
 	propagate_entity_cfs_rq(se);
 }
+#endif
 
 static void attach_entity_cfs_rq(struct sched_entity *se)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10810,10 +11573,12 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 	attach_entity_load_avg(cfs_rq, se);
 	update_tg_load_avg(cfs_rq, false);
 	propagate_entity_cfs_rq(se);
+#endif
 }
 
 static void detach_task_cfs_rq(struct task_struct *p)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
@@ -10827,17 +11592,19 @@ static void detach_task_cfs_rq(struct task_struct *p)
 	}
 
 	detach_entity_cfs_rq(se);
+#endif
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
 {
+#if !defined(CONFIG_CACHY_SCHED)
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 	attach_entity_cfs_rq(se);
-
 	if (!vruntime_normalized(p))
 		se->vruntime += cfs_rq->min_vruntime;
+#endif
 }
 
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
@@ -10893,13 +11660,21 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
 void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+
+#if !defined(CONFIG_CACHY_SCHED)
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 #ifndef CONFIG_64BIT
 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
 #endif
+#endif /* CONFIG_CACHY_SCHED */
+
 #ifdef CONFIG_SMP
 	raw_spin_lock_init(&cfs_rq->removed.lock);
 #endif
+
+#ifdef CONFIG_CACHY_SCHED
+	cfs_rq->head = NULL;
+#endif
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -11224,7 +11999,9 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
 __init void init_sched_fair_class(void)
 {
 #ifdef CONFIG_SMP
+#if !defined(CONFIG_CACHY_SCHED)
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
+#endif
 
 #ifdef CONFIG_NO_HZ_COMMON
 	nohz.next_balance = jiffies;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8d1ca65db3b0..65ffb0244f1e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -523,10 +523,13 @@ struct cfs_rq {
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
 	u64			exec_clock;
+
+#if !defined(CONFIG_CACHY_SCHED)
 	u64			min_vruntime;
 #ifndef CONFIG_64BIT
 	u64			min_vruntime_copy;
 #endif
+#endif // CONFIG_CACHY_SCHED
 
 	struct rb_root_cached	tasks_timeline;
 
@@ -535,9 +538,14 @@ struct cfs_rq {
 	 * It is set to NULL otherwise (i.e when none are currently running).
 	 */
 	struct sched_entity	*curr;
+#ifdef CONFIG_CACHY_SCHED
+	struct sched_entity	*head;
+	u32			hrrn_head;
+#else
 	struct sched_entity	*next;
 	struct sched_entity	*last;
 	struct sched_entity	*skip;
+#endif // CONFIG_CACHY_SCHED
 
 #ifdef	CONFIG_SCHED_DEBUG
 	unsigned int		nr_spread_over;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index afad085960b8..bfe341f83969 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1659,6 +1659,22 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#if CONFIG_CACHY_SCHED
+	{
+		.procname	= "sched_hrrn_max_lifetime_ms",
+		.data		= &hrrn_max_lifetime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_cachy_harsh_mode_enabled",
+		.data		= &cachy_harsh_mode,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #ifdef CONFIG_SCHED_DEBUG
 	{
 		.procname	= "sched_min_granularity_ns",