Commit 897e81be authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (35 commits)
  sched, cputime: Introduce thread_group_times()
  sched, cputime: Cleanups related to task_times()
  Revert "sched, x86: Optimize branch hint in __switch_to()"
  sched: Fix isolcpus boot option
  sched: Revert 498657a4
  sched, time: Define nsecs_to_jiffies()
  sched: Remove task_{u,s,g}time()
  sched: Introduce task_times() to replace task_{u,s}time() pair
  sched: Limit the number of scheduler debug messages
  sched.c: Call debug_show_all_locks() when dumping all tasks
  sched, x86: Optimize branch hint in __switch_to()
  sched: Optimize branch hint in context_switch()
  sched: Optimize branch hint in pick_next_task_fair()
  sched_feat_write(): Update ppos instead of file->f_pos
  sched: Sched_rt_periodic_timer vs cpu hotplug
  sched, kvm: Fix race condition involving sched_in_preempt_notifers
  sched: More generic WAKE_AFFINE vs select_idle_sibling()
  sched: Cleanup select_task_rq_fair()
  sched: Fix granularity of task_u/stime()
  sched: Fix/add missing update_rq_clock() calls
  ...
parents c3fa27d1 0cf55e1e
......@@ -6,6 +6,21 @@ be removed from this file.
---------------------------
What: USER_SCHED
When: 2.6.34
Why: USER_SCHED was implemented as a proof of concept for group scheduling.
The effect of USER_SCHED can already be achieved from userspace with
the help of libcgroup. The removal of USER_SCHED will also simplify
the scheduler code with the removal of one major ifdef. There are also
issues USER_SCHED has with USER_NS. A decision was taken not to fix
those and instead remove USER_SCHED. Also new group scheduling
features will not be implemented for USER_SCHED.
Who: Dhaval Giani <dhaval@linux.vnet.ibm.com>
---------------------------
What: PRISM54
When: 2.6.34
......
......@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right:
- irq: servicing interrupts
- softirq: servicing softirqs
- steal: involuntary wait
- guest: running a guest
- guest: running a normal guest
- guest_nice: running a niced guest
The "intr" line gives counts of interrupts serviced since boot time, for each
of the possible system interrupts. The first column is the total of all
......
......@@ -2186,6 +2186,8 @@ and is between 256 and 4096 characters. It is defined in the file
sbni= [NET] Granch SBNI12 leased line adapter
sched_debug [KNL] Enables verbose scheduler debug messages.
sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver
Format: <io>[,<timeout>[,<isapnp>]]
......
......@@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
}
#endif /* CONFIG_MMU */
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
}
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
......@@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
}
task_sig(m, task);
task_cap(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
#if defined(CONFIG_S390)
task_show_regs(m, task);
......@@ -495,20 +506,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
/* add up live thread stats at the group level */
if (whole) {
struct task_cputime cputime;
struct task_struct *t = task;
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime = cputime_add(gtime, task_gtime(t));
gtime = cputime_add(gtime, t->gtime);
t = next_thread(t);
} while (t != task);
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
thread_group_cputime(task, &cputime);
utime = cputime.utime;
stime = cputime.stime;
thread_group_times(task, &utime, &stime);
gtime = cputime_add(gtime, sig->gtime);
}
......@@ -524,9 +532,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
utime = task_utime(task);
stime = task_stime(task);
gtime = task_gtime(task);
task_times(task, &utime, &stime);
gtime = task->gtime;
}
/* scale priority and nice values from timeslices to -20..20 */
......
......@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
int i, j;
unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
cputime64_t guest;
cputime64_t guest, guest_nice;
u64 sum = 0;
u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
......@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
guest = cputime64_zero;
guest = guest_nice = cputime64_zero;
getboottime(&boottime);
jif = boottime.tv_sec;
......@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
guest_nice = cputime64_add(guest_nice,
kstat_cpu(i).cpustat.guest_nice);
for_each_irq_nr(j) {
sum += kstat_irqs_cpu(j, i);
}
......@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n",
(unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice),
(unsigned long long)cputime64_to_clock_t(system),
......@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq),
(unsigned long long)cputime64_to_clock_t(steal),
(unsigned long long)cputime64_to_clock_t(guest));
(unsigned long long)cputime64_to_clock_t(guest),
(unsigned long long)cputime64_to_clock_t(guest_nice));
for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
......@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
softirq = kstat_cpu(i).cpustat.softirq;
steal = kstat_cpu(i).cpustat.steal;
guest = kstat_cpu(i).cpustat.guest;
guest_nice = kstat_cpu(i).cpustat.guest_nice;
seq_printf(p,
"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n",
i,
(unsigned long long)cputime64_to_clock_t(user),
(unsigned long long)cputime64_to_clock_t(nice),
......@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(irq),
(unsigned long long)cputime64_to_clock_t(softirq),
(unsigned long long)cputime64_to_clock_t(steal),
(unsigned long long)cputime64_to_clock_t(guest));
(unsigned long long)cputime64_to_clock_t(guest),
(unsigned long long)cputime64_to_clock_t(guest_nice));
}
seq_printf(p, "intr %llu", (unsigned long long)sum);
......
......@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
extern unsigned long nsecs_to_jiffies(u64 n);
#define TIMESTAMP_SIZE 30
......
......@@ -25,6 +25,7 @@ struct cpu_usage_stat {
cputime64_t iowait;
cputime64_t steal;
cputime64_t guest;
cputime64_t guest_nice;
};
struct kernel_stat {
......
......@@ -105,6 +105,11 @@ struct preempt_notifier;
* @sched_out: we've just been preempted
* notifier: struct preempt_notifier for the task being preempted
* next: the task that's kicking us out
*
* Please note that sched_in and out are called under different
* contexts. sched_out is called with rq lock held and irq disabled
* while sched_in is called without rq lock and irq enabled. This
* difference is intentional and depended upon by its users.
*/
struct preempt_ops {
void (*sched_in)(struct preempt_notifier *notifier, int cpu);
......
......@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
extern void calc_global_load(void);
extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr);
......@@ -171,8 +170,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
}
#endif
extern unsigned long long time_sync_thresh;
/*
* Task state bitmask. NOTE! These bits are also
* encoded in fs/proc/array.c: get_task_state().
......@@ -349,7 +346,6 @@ extern signed long schedule_timeout(signed long timeout);
extern signed long schedule_timeout_interruptible(signed long timeout);
extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void __schedule(void);
asmlinkage void schedule(void);
extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
......@@ -628,6 +624,9 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
......@@ -1013,9 +1012,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
return to_cpumask(sd->span);
}
extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new);
/* Allocate an array of sched domains, for partition_sched_domains(). */
cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
/* Test a flag in parent sched domain */
static inline int test_sd_parent(struct sched_domain *sd, int flag)
{
......@@ -1033,7 +1036,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
struct sched_domain_attr;
static inline void
partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
struct sched_domain_attr *dattr_new)
{
}
......@@ -1331,7 +1334,9 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
......@@ -1720,9 +1725,8 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
extern cputime_t task_utime(struct task_struct *p);
extern cputime_t task_stime(struct task_struct *p);
extern cputime_t task_gtime(struct task_struct *p);
extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
/*
* Per process flags
......
......@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to
* partition_sched_domains().
*/
/* FIXME: see the FIXME in partition_sched_domains() */
static int generate_sched_domains(struct cpumask **domains,
static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr **attributes)
{
LIST_HEAD(q); /* queue of cpusets to be scanned */
......@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
struct cpumask *doms; /* resulting partition; i.e. sched domains */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
......@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
doms = kmalloc(cpumask_size(), GFP_KERNEL);
ndoms = 1;
doms = alloc_sched_domains(ndoms);
if (!doms)
goto done;
......@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_copy(doms, top_cpuset.cpus_allowed);
cpumask_copy(doms[0], top_cpuset.cpus_allowed);
ndoms = 1;
goto done;
}
......@@ -636,7 +635,7 @@ static int generate_sched_domains(struct cpumask **domains,
* Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
doms = alloc_sched_domains(ndoms);
if (!doms)
goto done;
......@@ -656,7 +655,7 @@ static int generate_sched_domains(struct cpumask **domains,
continue;
}
dp = doms + nslot;
dp = doms[nslot];
if (nslot == ndoms) {
static int warnings = 10;
......@@ -718,7 +717,7 @@ static int generate_sched_domains(struct cpumask **domains,
static void do_rebuild_sched_domains(struct work_struct *unused)
{
struct sched_domain_attr *attr;
struct cpumask *doms;
cpumask_var_t *doms;
int ndoms;
get_online_cpus();
......@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
{
struct sched_domain_attr *attr;
struct cpumask *doms;
cpumask_var_t *doms;
int ndoms;
switch (phase) {
......@@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = {
};
#endif /* CONFIG_PROC_PID_CPUSET */
/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
/* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed:\t");
seq_nodemask(m, &task->mems_allowed);
seq_printf(m, "\n");
......
......@@ -111,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
sig->utime = cputime_add(sig->utime, task_utime(tsk));
sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, tsk->stime);
sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
......@@ -1210,6 +1210,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
struct signal_struct *psig;
struct signal_struct *sig;
unsigned long maxrss;
cputime_t tgutime, tgstime;
/*
* The resource counters for the group leader are in its
......@@ -1225,20 +1226,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* need to protect the access to parent->signal fields,
* as other threads in the parent group can be right
* here reaping other children at the same time.
*
* We use thread_group_times() to get times for the thread
* group, which consolidates times for all threads in the
* group including the group leader.
*/
thread_group_times(p, &tgutime, &tgstime);
spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal;
sig = p->signal;
psig->cutime =
cputime_add(psig->cutime,
cputime_add(p->utime,
cputime_add(sig->utime,
sig->cutime)));
cputime_add(tgutime,
sig->cutime));
psig->cstime =
cputime_add(psig->cstime,
cputime_add(p->stime,
cputime_add(sig->stime,
sig->cstime)));
cputime_add(tgstime,
sig->cstime));
psig->cgtime =
cputime_add(psig->cgtime,
cputime_add(p->gtime,
......
......@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
sig->cgtime = cputime_zero;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
sig->prev_utime = sig->prev_stime = cputime_zero;
#endif
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
......@@ -1066,8 +1069,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->gtime = cputime_zero;
p->utimescaled = cputime_zero;
p->stimescaled = cputime_zero;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_utime = cputime_zero;
p->prev_stime = cputime_zero;
#endif
p->default_timer_slack_ns = current->timer_slack_ns;
......
......@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
/*
* All threads that don't have debuggerinfo should be
* in __schedule() sleeping, since all other CPUs
* in schedule() sleeping, since all other CPUs
* are in kgdb_wait, and thus have debuggerinfo.
*/
if (local_debuggerinfo) {
......
......@@ -535,14 +535,12 @@ struct rq {
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
#ifdef CONFIG_NO_HZ
unsigned long last_tick_seen;
unsigned char in_nohz_recently;
#endif
/* capture load from *all* tasks on this cpu: */
struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
u64 nr_migrations_in;
struct cfs_rq cfs;
struct rt_rq rt;
......@@ -591,6 +589,8 @@ struct rq {
u64 rt_avg;
u64 age_stamp;
u64 idle_stamp;
u64 avg_idle;
#endif
/* calc_load related fields */
......@@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
if (!sched_feat_names[i])
return -EINVAL;
filp->f_pos += cnt;
*ppos += cnt;
return cnt;
}
......@@ -2017,6 +2017,7 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
}
spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
set_task_cpu(p, cpu);
p->cpus_allowed = cpumask_of_cpu(cpu);
p->rt.nr_cpus_allowed = 1;
......@@ -2078,7 +2079,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
#endif
if (old_cpu != new_cpu) {
p->se.nr_migrations++;
new_rq->nr_migrations_in++;
#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations);
......@@ -2115,6 +2115,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
* it is sufficient to simply update the task's cpu field.
*/
if (!p->se.on_rq && !task_running(rq, p)) {
update_rq_clock(rq);
set_task_cpu(p, dest_cpu);
return 0;
}
......@@ -2376,14 +2377,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
task_rq_unlock(rq, &flags);
cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
if (cpu != orig_cpu)
if (cpu != orig_cpu) {
local_irq_save(flags);
rq = cpu_rq(cpu);
update_rq_clock(rq);
set_task_cpu(p, cpu);
local_irq_restore(flags);
}
rq = task_rq_lock(p, &flags);
if (rq != orig_rq)
update_rq_clock(rq);
WARN_ON(p->state != TASK_WAKING);
cpu = task_cpu(p);
......@@ -2440,6 +2442,17 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
if (unlikely(rq->idle_stamp)) {
u64 delta = rq->clock - rq->idle_stamp;
u64 max = 2*sysctl_sched_migration_cost;
if (delta > max)
rq->avg_idle = max;
else
update_avg(&rq->avg_idle, delta);
rq->idle_stamp = 0;
}
#endif
out:
task_rq_unlock(rq, &flags);
......@@ -2545,6 +2558,7 @@ static void __sched_fork(struct task_struct *p)
void sched_fork(struct task_struct *p, int clone_flags)
{
int cpu = get_cpu();
unsigned long flags;
__sched_fork(p);
......@@ -2581,7 +2595,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
#ifdef CONFIG_SMP
cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
#endif
local_irq_save(flags);
update_rq_clock(cpu_rq(cpu));
set_task_cpu(p, cpu);
local_irq_restore(flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
......@@ -2848,14 +2865,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
arch_start_context_switch(prev);
if (unlikely(!mm)) {
if (likely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
if (unlikely(!prev->mm)) {
if (likely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
......@@ -3017,15 +3034,6 @@ static void calc_load_account_active(struct rq *this_rq)
}
}
/*
* Externally visible per-cpu scheduler statistics:
* cpu_nr_migrations(cpu) - number of migrations into that cpu
*/
u64 cpu_nr_migrations(int cpu)
{
return cpu_rq(cpu)->nr_migrations_in;
}
/*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).
......@@ -4126,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
unsigned long flags;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus);
cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
......@@ -4289,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
int all_pinned = 0;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus);
cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
......@@ -4429,6 +4437,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
this_rq->idle_stamp = this_rq->clock;
if (this_rq->avg_idle < sysctl_sched_migration_cost)
return;
for_each_domain(this_cpu, sd) {
unsigned long interval;
......@@ -4443,8 +4456,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
if (pulled_task)
if (pulled_task) {
this_rq->idle_stamp = 0;
break;
}
}
if (pulled_task || time_after(jiffies, this_rq->next_balance)) {