From: Ziggy471 Date: Sat, 8 Jan 2011 18:01:11 +0000 (-0500) Subject: Update BFS to version 363 X-Git-Url: https://www.ziggy471.com/git/gitweb.cgi?p=ziggy471-frankenstein-kernel.git;a=commitdiff;h=afde2bad543eb05996a00ef849fb0371c1846932 Update BFS to version 363 --- --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1598,7 +1598,7 @@ static inline void tsk_cpus_current(stru static inline void print_scheduler_version(void) { - printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n"); + printk(KERN_INFO"BFS CPU scheduler v0.363 by Con Kolivas.\n"); } static inline int iso_task(struct task_struct *p) --- a/kernel/sched_bfs.c +++ b/kernel/sched_bfs.c @@ -120,7 +120,7 @@ #define NS_TO_MS(TIME) ((TIME) >> 20) #define NS_TO_US(TIME) ((TIME) >> 10) -#define RESCHED_US (100) /* Reschedule if less than this many us left */ +#define RESCHED_US (100) /* Reschedule if less than this many μs left */ /* * This is the time all tasks within the same priority round robin. @@ -187,7 +187,6 @@ struct rq { #ifdef CONFIG_NO_HZ unsigned char in_nohz_recently; #endif - struct task_struct *last_task; #endif struct task_struct *curr, *idle; @@ -743,19 +742,12 @@ static int suitable_idle_cpus(struct tas static void resched_task(struct task_struct *p); -/* - * last_task stores the last non-idle task scheduled on the local rq for - * cache warmth testing. - */ -static inline void set_last_task(struct rq *rq, struct task_struct *p) -{ - rq->last_task = p; -} - -#define CPUIDLE_CACHE_BUSY (1) -#define CPUIDLE_DIFF_CPU (2) -#define CPUIDLE_THREAD_BUSY (4) -#define CPUIDLE_DIFF_NODE (8) +#define CPUIDLE_DIFF_THREAD (1) +#define CPUIDLE_DIFF_CORE (2) +#define CPUIDLE_CACHE_BUSY (4) +#define CPUIDLE_DIFF_CPU (8) +#define CPUIDLE_THREAD_BUSY (16) +#define CPUIDLE_DIFF_NODE (32) /* * The best idle CPU is chosen according to the CPUIDLE ranking above where the @@ -808,27 +800,28 @@ static void resched_best_idle(struct tas } tmp_rq = cpu_rq(cpu_tmp); - if (rq->cpu_locality[cpu_tmp]) { - /* Check rq->last_task hasn't been dereferenced */ - if (rq->last_task && p != rq->last_task) { #ifdef CONFIG_NUMA - if (rq->cpu_locality[cpu_tmp] > 1) - ranking |= CPUIDLE_DIFF_NODE; + if (rq->cpu_locality[cpu_tmp] > 3) + ranking |= CPUIDLE_DIFF_NODE; + else #endif - ranking |= CPUIDLE_DIFF_CPU; - } - } + if (rq->cpu_locality[cpu_tmp] > 2) + ranking |= CPUIDLE_DIFF_CPU; #ifdef CONFIG_SCHED_MC + if (rq->cpu_locality[cpu_tmp] == 2) + ranking |= CPUIDLE_DIFF_CORE; if (!(tmp_rq->cache_idle(cpu_tmp))) ranking |= CPUIDLE_CACHE_BUSY; #endif #ifdef CONFIG_SCHED_SMT + if (rq->cpu_locality[cpu_tmp] == 1) + ranking |= CPUIDLE_DIFF_THREAD; if (!(tmp_rq->siblings_idle(cpu_tmp))) ranking |= CPUIDLE_THREAD_BUSY; #endif if (ranking < best_ranking) { best_cpu = cpu_tmp; - if (ranking <= 1) + if (ranking == 0) break; best_ranking = ranking; } @@ -845,11 +838,11 @@ static inline void resched_suitable_idle /* * The cpu cache locality difference between CPUs is used to determine how far - * to offset the virtual deadline. "One" difference in locality means that one + * to offset the virtual deadline. <2 difference in locality means that one * timeslice difference is allowed longer for the cpu local tasks. This is * enough in the common case when tasks are up to 2* number of CPUs to keep * tasks within their shared cache CPUs only. CPUs on different nodes or not - * even in this domain (NUMA) have "3" difference, allowing 4 times longer + * even in this domain (NUMA) have "4" difference, allowing 4 times longer * deadlines before being taken onto another cpu, allowing for 2* the double * seen by separate CPUs above. * Simple summary: Virtual deadlines are equal on shared cache CPUs, double @@ -858,12 +851,11 @@ static inline void resched_suitable_idle static inline int cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p) { - /* Check rq->last_task hasn't been dereferenced */ - if (likely(rq->last_task)) { - if (rq->last_task == p) - return 0; - } - return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p); + int locality = rq->cpu_locality[cpu_of(task_rq)] - 2; + + if (locality > 0) + return task_timeslice(p) << locality; + return 0; } #else /* CONFIG_SMP */ static inline void inc_qnr(void) @@ -901,10 +893,6 @@ cache_distance(struct rq *task_rq, struc { return 0; } - -static inline void set_last_task(struct rq *rq, struct task_struct *p) -{ -} #endif /* CONFIG_SMP */ /* @@ -1348,7 +1336,7 @@ static inline int online_cpus(struct tas */ static inline int needs_other_cpu(struct task_struct *p, int cpu) { - if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p))) + if (unlikely(!cpu_isset(cpu, p->cpus_allowed))) return 1; return 0; } @@ -1365,19 +1353,19 @@ static void try_preempt(struct task_stru int highest_prio; cpumask_t tmp; - /* IDLEPRIO tasks never preempt anything */ - if (p->policy == SCHED_IDLEPRIO) - return; - if (suitable_idle_cpus(p)) { resched_best_idle(p); return; } - if (online_cpus(p)) + /* IDLEPRIO tasks never preempt anything */ + if (p->policy == SCHED_IDLEPRIO) + return; + + if (likely(online_cpus(p))) cpus_and(tmp, cpu_online_map, p->cpus_allowed); else - (cpumask_copy(&tmp, &cpu_online_map)); + return; latest_deadline = 0; highest_prio = -1; @@ -1836,14 +1824,14 @@ context_switch(struct rq *rq, struct tas */ arch_start_context_switch(prev); - if (unlikely(!mm)) { + if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (!prev->mm) { prev->active_mm = NULL; rq->prev_mm = oldmm; } @@ -2029,9 +2017,13 @@ pc_system_time(struct rq *rq, struct tas } p->sched_time += ns; - if (hardirq_count() - hardirq_offset) + if (hardirq_count() - hardirq_offset) { rq->irq_pc += pc; - else if (softirq_count()) { + if (rq->irq_pc >= 100) { + rq->irq_pc %= 100; + cpustat->irq = cputime64_add(cpustat->irq, tmp); + } + } else if (softirq_count()) { rq->softirq_pc += pc; if (rq->softirq_pc >= 100) { rq->softirq_pc %= 100; @@ -2416,7 +2408,7 @@ static void task_running_tick(struct rq * Tasks that were scheduled in the first half of a tick are not * allowed to run into the 2nd half of the next tick if they will * run out of time slice in the interim. Otherwise, if they have - * less than 100us of time slice left they will be rescheduled. + * less than RESCHED_US μs of time slice left they will be rescheduled. */ if (rq->dither) { if (rq->rq_time_slice > HALF_JIFFY_US) @@ -2799,8 +2791,6 @@ need_resched_nonpreemptible: sched_info_switch(prev, next); perf_event_task_sched_out(prev, next, cpu); - if (prev != idle) - set_last_task(rq, prev); set_rq_task(rq, next); grq.nr_switches++; prev->oncpu = 0; @@ -3627,8 +3617,8 @@ recheck: * SCHED_BATCH is 0. */ if (param->sched_priority < 0 || - (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || - (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) + (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) || + (!p->mm && param->sched_priority > MAX_RT_PRIO - 1)) return -EINVAL; if (is_rt_policy(policy) != (param->sched_priority != 0)) return -EINVAL; @@ -4349,7 +4339,10 @@ void init_idle(struct task_struct *idle, idle->prio = PRIO_LIMIT; set_rq_task(rq, idle); idle->cpus_allowed = cpumask_of_cpu(cpu); + /* Silence PROVE_RCU */ + rcu_read_lock(); set_task_cpu(idle, cpu); + rcu_read_unlock(); rq->curr = rq->idle = idle; idle->oncpu = 1; set_cpuidle_map(cpu); @@ -4579,6 +4572,29 @@ void move_task_off_dead_cpu(int dead_cpu } +/* Run through task list and find tasks affined to just the dead cpu, then + * allocate a new affinity */ +static void break_sole_affinity(int src_cpu) +{ + struct task_struct *p, *t; + + do_each_thread(t, p) { + if (!online_cpus(p)) { + cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu %d\n", + task_pid_nr(p), p->comm, src_cpu); + } + } + } while_each_thread(t, p); +} + /* * Schedules idle task to be the next runnable task on current CPU. * It does so by boosting its priority to highest possible. @@ -4599,6 +4615,7 @@ void sched_idle_next(void) * and interrupts disabled on the current cpu. */ grq_lock_irqsave(&flags); + break_sole_affinity(this_cpu); __setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1); @@ -6459,10 +6476,12 @@ void __init sched_init_smp(void) cpumask_set_cpu(other_cpu, &rq->cache_siblings); } #endif - if (sd->level <= SD_LV_MC) - locality = 0; - else if (sd->level <= SD_LV_NODE) + if (sd->level <= SD_LV_SIBLING) locality = 1; + else if (sd->level <= SD_LV_MC) + locality = 2; + else if (sd->level <= SD_LV_NODE) + locality = 3; else continue; @@ -6568,7 +6587,7 @@ void __init sched_init(void) if (i == j) rq->cpu_locality[j] = 0; else - rq->cpu_locality[j] = 3; + rq->cpu_locality[j] = 4; } } #endif