diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b4cc633b..e50a70f45 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -541,6 +541,7 @@ struct sched_entity { struct rb_node run_node; struct list_head group_node; unsigned int on_rq; + unsigned int migrated; u64 exec_start; u64 sum_exec_runtime; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2f461f059..b586c6e84 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1004,6 +1004,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) /* * We are starting a new run period: */ + se->migrated = 0; se->exec_start = rq_clock_task(rq_of(cfs_rq)); } @@ -4190,17 +4191,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { u64 vruntime = cfs_rq->min_vruntime; - /* - * The 'current' period is already promised to the current tasks, - * however the extra weight of the new task will slow them down a - * little, place the new task so that it fits in the slot that - * stays open at the end. - */ - if (initial && sched_feat(START_DEBIT)) - vruntime += sched_vslice(cfs_rq, se); + if (unlikely(initial)) { /* task creation is once, wakeup is often */ + /* + * The 'current' period is already promised to the current + * tasks, however the extra weight of the new task will slow + * them down a little, place the new task so that it fits in + * the slot that stays open at the end. + */ + if (sched_feat(START_DEBIT)) + vruntime += sched_vslice(cfs_rq, se); - /* sleeps up to a single latency don't count. */ - if (!initial) { + } else { + /* sleeps up to a single latency don't count. */ unsigned long thresh; if (se_is_idle(se)) @@ -4221,6 +4223,27 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* ensure we never gain time by being placed backwards. */ se->vruntime = max_vruntime(se->vruntime, vruntime); } + +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + if (!sched_feat(PLACE_MIGRATE)) + return; + + if (cfs_rq->nr_running < se->migrated) { + /* + * Migrated to a shorter runqueue, go first because + * we were under-served on the old runqueue. + */ + se->vruntime = cfs_rq->min_vruntime; + return; + } + + /* + * Migrated to a longer runqueue, go last because + * we got over-served on the old runqueue. + */ + se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se); +} static void check_enqueue_throttle(struct cfs_rq *cfs_rq); @@ -4295,6 +4318,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (flags & ENQUEUE_WAKEUP) place_entity(cfs_rq, se, 0); + else if (se->migrated) + place_entity_migrate(cfs_rq, se); check_schedstat_required(); update_stats_enqueue_fair(cfs_rq, se, flags); @@ -6929,6 +6954,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se); */ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) { + struct sched_entity *se = &p->se; /* * As blocked tasks retain absolute vruntime the migration needs to * deal with this by subtracting the old and adding the new @@ -6936,7 +6962,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) * the task on the new runqueue. */ if (READ_ONCE(p->__state) == TASK_WAKING) { - struct sched_entity *se = &p->se; struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 min_vruntime; @@ -6961,7 +6986,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) * rq->lock and can modify state directly. */ lockdep_assert_rq_held(task_rq(p)); - detach_entity_cfs_rq(&p->se); + detach_entity_cfs_rq(se); } else { /* @@ -6972,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) * wakee task is less decayed, but giving the wakee more load * sounds not bad. */ - remove_entity_load_avg(&p->se); + remove_entity_load_avg(se); } /* Tell new CPU we are migrated */ - p->se.avg.last_update_time = 0; + se->avg.last_update_time = 0; /* We have migrated, no longer consider this task hot */ - p->se.exec_start = 0; + for_each_sched_entity(se) + se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq; update_scan_period(p, new_cpu); } @@ -7665,6 +7691,9 @@ static int task_hot(struct task_struct *p, struct lb_env *env) if (sysctl_sched_migration_cost == 0) return 0; + if (p->se.migrated) + return 0; + delta = rq_clock_task(env->src_rq) - p->se.exec_start; return delta < (s64)sysctl_sched_migration_cost; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1cf435bbc..cfff560e0 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -100,3 +100,5 @@ SCHED_FEAT(LATENCY_WARN, false) SCHED_FEAT(ALT_PERIOD, true) SCHED_FEAT(BASE_SLICE, true) + +SCHED_FEAT(PLACE_MIGRATE, true) \ No newline at end of file -- 2.30.2