From: Ziggy471 <ziggy471@gmail.com>
Date: Sat, 8 Jan 2011 18:01:11 +0000 (-0500)
Subject: Update BFS to version 363
X-Git-Url: https://www.ziggy471.com/git/gitweb.cgi?p=ziggy471-frankenstein-kernel.git;a=commitdiff;h=afde2bad543eb05996a00ef849fb0371c1846932

Update BFS to version 363
---

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1598,7 +1598,7 @@ static inline void tsk_cpus_current(stru
 
 static inline void print_scheduler_version(void)
 {
-	printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n");
+	printk(KERN_INFO"BFS CPU scheduler v0.363 by Con Kolivas.\n");
 }
 
 static inline int iso_task(struct task_struct *p)
--- a/kernel/sched_bfs.c
+++ b/kernel/sched_bfs.c
@@ -120,7 +120,7 @@
 #define NS_TO_MS(TIME)		((TIME) >> 20)
 #define NS_TO_US(TIME)		((TIME) >> 10)
 
-#define RESCHED_US	(100) /* Reschedule if less than this many us left */
+#define RESCHED_US	(100) /* Reschedule if less than this many Î¼s left */
 
 /*
  * This is the time all tasks within the same priority round robin.
@@ -187,7 +187,6 @@ struct rq {
 #ifdef CONFIG_NO_HZ
 	unsigned char in_nohz_recently;
 #endif
-	struct task_struct *last_task;
 #endif
 
 	struct task_struct *curr, *idle;
@@ -743,19 +742,12 @@ static int suitable_idle_cpus(struct tas
 
 static void resched_task(struct task_struct *p);
 
-/*
- * last_task stores the last non-idle task scheduled on the local rq for
- * cache warmth testing.
- */
-static inline void set_last_task(struct rq *rq, struct task_struct *p)
-{
-	rq->last_task = p;
-}
-
-#define CPUIDLE_CACHE_BUSY	(1)
-#define CPUIDLE_DIFF_CPU	(2)
-#define CPUIDLE_THREAD_BUSY	(4)
-#define CPUIDLE_DIFF_NODE	(8)
+#define CPUIDLE_DIFF_THREAD	(1)
+#define CPUIDLE_DIFF_CORE	(2)
+#define CPUIDLE_CACHE_BUSY	(4)
+#define CPUIDLE_DIFF_CPU	(8)
+#define CPUIDLE_THREAD_BUSY	(16)
+#define CPUIDLE_DIFF_NODE	(32)
 
 /*
  * The best idle CPU is chosen according to the CPUIDLE ranking above where the
@@ -808,27 +800,28 @@ static void resched_best_idle(struct tas
 		}
 		tmp_rq = cpu_rq(cpu_tmp);
 
-		if (rq->cpu_locality[cpu_tmp]) {
-			/* Check rq->last_task hasn't been dereferenced */
-			if (rq->last_task && p != rq->last_task) {
 #ifdef CONFIG_NUMA
-				if (rq->cpu_locality[cpu_tmp] > 1)
-					ranking |= CPUIDLE_DIFF_NODE;
+		if (rq->cpu_locality[cpu_tmp] > 3)
+			ranking |= CPUIDLE_DIFF_NODE;
+		else
 #endif
-				ranking |= CPUIDLE_DIFF_CPU;
-			}
-		}
+		if (rq->cpu_locality[cpu_tmp] > 2)
+			ranking |= CPUIDLE_DIFF_CPU;
 #ifdef CONFIG_SCHED_MC
+		if (rq->cpu_locality[cpu_tmp] == 2)
+			ranking |= CPUIDLE_DIFF_CORE;
 		if (!(tmp_rq->cache_idle(cpu_tmp)))
 			ranking |= CPUIDLE_CACHE_BUSY;
 #endif
 #ifdef CONFIG_SCHED_SMT
+		if (rq->cpu_locality[cpu_tmp] == 1)
+			ranking |= CPUIDLE_DIFF_THREAD;
 		if (!(tmp_rq->siblings_idle(cpu_tmp)))
 			ranking |= CPUIDLE_THREAD_BUSY;
 #endif
 		if (ranking < best_ranking) {
 			best_cpu = cpu_tmp;
-			if (ranking <= 1)
+			if (ranking == 0)
 				break;
 			best_ranking = ranking;
 		}
@@ -845,11 +838,11 @@ static inline void resched_suitable_idle
 
 /*
  * The cpu cache locality difference between CPUs is used to determine how far
- * to offset the virtual deadline. "One" difference in locality means that one
+ * to offset the virtual deadline. <2 difference in locality means that one
  * timeslice difference is allowed longer for the cpu local tasks. This is
  * enough in the common case when tasks are up to 2* number of CPUs to keep
  * tasks within their shared cache CPUs only. CPUs on different nodes or not
- * even in this domain (NUMA) have "3" difference, allowing 4 times longer
+ * even in this domain (NUMA) have "4" difference, allowing 4 times longer
  * deadlines before being taken onto another cpu, allowing for 2* the double
  * seen by separate CPUs above.
  * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
@@ -858,12 +851,11 @@ static inline void resched_suitable_idle
 static inline int
 cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
 {
-	/* Check rq->last_task hasn't been dereferenced */
-	if (likely(rq->last_task)) {
-		if (rq->last_task == p)
-			return 0;
-	}
-	return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
+	int locality = rq->cpu_locality[cpu_of(task_rq)] - 2;
+
+	if (locality > 0)
+		return task_timeslice(p) << locality;
+	return 0;
 }
 #else /* CONFIG_SMP */
 static inline void inc_qnr(void)
@@ -901,10 +893,6 @@ cache_distance(struct rq *task_rq, struc
 {
 	return 0;
 }
-
-static inline void set_last_task(struct rq *rq, struct task_struct *p)
-{
-}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1348,7 +1336,7 @@ static inline int online_cpus(struct tas
  */
 static inline int needs_other_cpu(struct task_struct *p, int cpu)
 {
-	if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p)))
+	if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))
 		return 1;
 	return 0;
 }
@@ -1365,19 +1353,19 @@ static void try_preempt(struct task_stru
 	int highest_prio;
 	cpumask_t tmp;
 
-	/* IDLEPRIO tasks never preempt anything */
-	if (p->policy == SCHED_IDLEPRIO)
-		return;
-
 	if (suitable_idle_cpus(p)) {
 		resched_best_idle(p);
 		return;
 	}
 
-	if (online_cpus(p))
+	/* IDLEPRIO tasks never preempt anything */
+	if (p->policy == SCHED_IDLEPRIO)
+		return;
+
+	if (likely(online_cpus(p)))
 		cpus_and(tmp, cpu_online_map, p->cpus_allowed);
 	else
-		(cpumask_copy(&tmp, &cpu_online_map));
+		return;
 
 	latest_deadline = 0;
 	highest_prio = -1;
@@ -1836,14 +1824,14 @@ context_switch(struct rq *rq, struct tas
 	 */
 	arch_start_context_switch(prev);
 
-	if (unlikely(!mm)) {
+	if (!mm) {
 		next->active_mm = oldmm;
 		atomic_inc(&oldmm->mm_count);
 		enter_lazy_tlb(oldmm, next);
 	} else
 		switch_mm(oldmm, mm, next);
 
-	if (unlikely(!prev->mm)) {
+	if (!prev->mm) {
 		prev->active_mm = NULL;
 		rq->prev_mm = oldmm;
 	}
@@ -2029,9 +2017,13 @@ pc_system_time(struct rq *rq, struct tas
 	}
 	p->sched_time += ns;
 
-	if (hardirq_count() - hardirq_offset)
+	if (hardirq_count() - hardirq_offset) {
 		rq->irq_pc += pc;
-	else if (softirq_count()) {
+		if (rq->irq_pc >= 100) {
+			rq->irq_pc %= 100;
+			cpustat->irq = cputime64_add(cpustat->irq, tmp);
+		}
+	} else if (softirq_count()) {
 		rq->softirq_pc += pc;
 		if (rq->softirq_pc >= 100) {
 			rq->softirq_pc %= 100;
@@ -2416,7 +2408,7 @@ static void task_running_tick(struct rq
 	 * Tasks that were scheduled in the first half of a tick are not
 	 * allowed to run into the 2nd half of the next tick if they will
 	 * run out of time slice in the interim. Otherwise, if they have
-	 * less than 100us of time slice left they will be rescheduled.
+	 * less than RESCHED_US Î¼s of time slice left they will be rescheduled.
 	 */
 	if (rq->dither) {
 		if (rq->rq_time_slice > HALF_JIFFY_US)
@@ -2799,8 +2791,6 @@ need_resched_nonpreemptible:
 		sched_info_switch(prev, next);
 		perf_event_task_sched_out(prev, next, cpu);
 
-		if (prev != idle)
-			set_last_task(rq, prev);
 		set_rq_task(rq, next);
 		grq.nr_switches++;
 		prev->oncpu = 0;
@@ -3627,8 +3617,8 @@ recheck:
 	 * SCHED_BATCH is 0.
 	 */
 	if (param->sched_priority < 0 ||
-	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
-	    (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
+	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||
+	    (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))
 		return -EINVAL;
 	if (is_rt_policy(policy) != (param->sched_priority != 0))
 		return -EINVAL;
@@ -4349,7 +4339,10 @@ void init_idle(struct task_struct *idle,
 	idle->prio = PRIO_LIMIT;
 	set_rq_task(rq, idle);
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	/* Silence PROVE_RCU */
+	rcu_read_lock();
 	set_task_cpu(idle, cpu);
+	rcu_read_unlock();
 	rq->curr = rq->idle = idle;
 	idle->oncpu = 1;
 	set_cpuidle_map(cpu);
@@ -4579,6 +4572,29 @@ void move_task_off_dead_cpu(int dead_cpu
 
 }
 
+/* Run through task list and find tasks affined to just the dead cpu, then
+ * allocate a new affinity */
+static void break_sole_affinity(int src_cpu)
+{
+	struct task_struct *p, *t;
+
+	do_each_thread(t, p) {
+		if (!online_cpus(p)) {
+			cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+			/*
+			 * Don't tell them about moving exiting tasks or
+			 * kernel threads (both mm NULL), since they never
+			 * leave kernel.
+			 */
+			if (p->mm && printk_ratelimit()) {
+				printk(KERN_INFO "process %d (%s) no "
+				       "longer affine to cpu %d\n",
+				       task_pid_nr(p), p->comm, src_cpu);
+			}
+		}
+	} while_each_thread(t, p);
+}
+
 /*
  * Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible.
@@ -4599,6 +4615,7 @@ void sched_idle_next(void)
 	 * and interrupts disabled on the current cpu.
 	 */
 	grq_lock_irqsave(&flags);
+	break_sole_affinity(this_cpu);
 
 	__setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1);
 
@@ -6459,10 +6476,12 @@ void __init sched_init_smp(void)
 					cpumask_set_cpu(other_cpu, &rq->cache_siblings);
 			}
 #endif
-			if (sd->level <= SD_LV_MC)
-				locality = 0;
-			else if (sd->level <= SD_LV_NODE)
+			if (sd->level <= SD_LV_SIBLING)
 				locality = 1;
+			else if (sd->level <= SD_LV_MC)
+				locality = 2;
+			else if (sd->level <= SD_LV_NODE)
+				locality = 3;
 			else
 				continue;
 
@@ -6568,7 +6587,7 @@ void __init sched_init(void)
 			if (i == j)
 				rq->cpu_locality[j] = 0;
 			else
-				rq->cpu_locality[j] = 3;
+				rq->cpu_locality[j] = 4;
 		}
 	}
 #endif