Commit e7858f52 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'cpu_stop' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into sched/core

parents 27a9da65 bbf1bb3e
......@@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following:
sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
state: -1 / 0:0 3:0 4:0
As before, the first four lines are similar to those for RCU.
The last line shows the task-migration state. The first number is
-1 if synchronize_sched_expedited() is idle, -2 if in the process of
posting wakeups to the migration kthreads, and N when waiting on CPU N.
Each of the colon-separated fields following the "/" is a CPU:state pair.
Valid states are "0" for idle, "1" for waiting for quiescent state,
"2" for passed through quiescent state, and "3" when a race with a
CPU-hotplug event forces use of the synchronize_sched() primitive.
USAGE
......
......@@ -391,7 +391,6 @@ static void __init time_init_wq(void)
if (time_sync_wq)
return;
time_sync_wq = create_singlethread_workqueue("timesync");
stop_machine_create();
}
/*
......
......@@ -80,12 +80,6 @@ static void do_suspend(void)
shutting_down = SHUTDOWN_SUSPEND;
err = stop_machine_create();
if (err) {
printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
goto out;
}
#ifdef CONFIG_PREEMPT
/* If the kernel is preemptible, we need to freeze all the processes
to prevent them from being in the middle of a pagetable update
......@@ -93,7 +87,7 @@ static void do_suspend(void)
err = freeze_processes();
if (err) {
printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
goto out_destroy_sm;
goto out;
}
#endif
......@@ -136,12 +130,8 @@ static void do_suspend(void)
out_thaw:
#ifdef CONFIG_PREEMPT
thaw_processes();
out_destroy_sm:
#endif
stop_machine_destroy();
out:
#endif
shutting_down = SHUTDOWN_INVALID;
}
#endif /* CONFIG_PM_SLEEP */
......
......@@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void)
return 0;
}
extern int rcu_expedited_torture_stats(char *page);
static inline void rcu_force_quiescent_state(void)
{
}
......
......@@ -35,7 +35,6 @@ struct notifier_block;
extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern int rcu_needs_cpu(int cpu);
extern int rcu_expedited_torture_stats(char *page);
#ifdef CONFIG_TREE_PREEMPT_RCU
......
#ifndef _LINUX_STOP_MACHINE
#define _LINUX_STOP_MACHINE
/* "Bogolock": stop the entire machine, disable interrupts. This is a
very heavy lock, which is equivalent to grabbing every spinlock
(and more). So the "read" side to such a lock is anything which
disables preeempt. */
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/list.h>
#include <asm/system.h>
/*
* stop_cpu[s]() is simplistic per-cpu maximum priority cpu
* monopolization mechanism. The caller can specify a non-sleeping
* function to be executed on a single or multiple cpus preempting all
* other processes and monopolizing those cpus until it finishes.
*
* Resources for this mechanism are preallocated when a cpu is brought
* up and requests are guaranteed to be served as long as the target
* cpus are online.
*/
typedef int (*cpu_stop_fn_t)(void *arg);
#ifdef CONFIG_SMP
struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn;
void *arg;
struct cpu_stop_done *done;
};
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
#else /* CONFIG_SMP */
#include <linux/workqueue.h>
struct cpu_stop_work {
struct work_struct work;
cpu_stop_fn_t fn;
void *arg;
};
static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
{
int ret = -ENOENT;
preempt_disable();
if (cpu == smp_processor_id())
ret = fn(arg);
preempt_enable();
return ret;
}
static void stop_one_cpu_nowait_workfn(struct work_struct *work)
{
struct cpu_stop_work *stwork =
container_of(work, struct cpu_stop_work, work);
preempt_disable();
stwork->fn(stwork->arg);
preempt_enable();
}
static inline void stop_one_cpu_nowait(unsigned int cpu,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
if (cpu == smp_processor_id()) {
INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn);
work_buf->fn = fn;
work_buf->arg = arg;
schedule_work(&work_buf->work);
}
}
static inline int stop_cpus(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg)
{
if (cpumask_test_cpu(raw_smp_processor_id(), cpumask))
return stop_one_cpu(raw_smp_processor_id(), fn, arg);
return -ENOENT;
}
static inline int try_stop_cpus(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg)
{
return stop_cpus(cpumask, fn, arg);
}
#endif /* CONFIG_SMP */
/*
* stop_machine "Bogolock": stop the entire machine, disable
* interrupts. This is a very heavy lock, which is equivalent to
* grabbing every spinlock (and more). So the "read" side to such a
* lock is anything which disables preeempt.
*/
#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
/**
......@@ -36,24 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
*/
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
/**
* stop_machine_create: create all stop_machine threads
*
* Description: This causes all stop_machine threads to be created before
* stop_machine actually gets called. This can be used by subsystems that
* need a non failing stop_machine infrastructure.
*/
int stop_machine_create(void);
/**
* stop_machine_destroy: destroy all stop_machine threads
*
* Description: This causes all stop_machine threads which were created with
* stop_machine_create to be destroyed again.
*/
void stop_machine_destroy(void);
#else
#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
static inline int stop_machine(int (*fn)(void *), void *data,
const struct cpumask *cpus)
......@@ -65,8 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
return ret;
}
static inline int stop_machine_create(void) { return 0; }
static inline void stop_machine_destroy(void) { }
#endif /* CONFIG_SMP */
#endif /* _LINUX_STOP_MACHINE */
#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
#endif /* _LINUX_STOP_MACHINE */
......@@ -68,7 +68,7 @@ obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
......
......@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
{
int err;
err = stop_machine_create();
if (err)
return err;
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
......@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
out:
cpu_maps_update_done();
stop_machine_destroy();
return err;
}
EXPORT_SYMBOL(cpu_down);
......@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error;
error = stop_machine_create();
if (error)
return error;
cpu_maps_update_begin();
first_cpu = cpumask_first(cpu_online_mask);
/*
......@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
cpu_maps_update_done();
stop_machine_destroy();
return error;
}
......
......@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
/* Create stop_machine threads since free_module relies on
* a non-failing stop_machine call. */
ret = stop_machine_create();
if (ret)
return ret;
if (mutex_lock_interruptible(&module_mutex) != 0) {
ret = -EINTR;
goto out_stop;
}
if (mutex_lock_interruptible(&module_mutex) != 0)
return -EINTR;
mod = find_module(name);
if (!mod) {
......@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
out:
mutex_unlock(&module_mutex);
out_stop:
stop_machine_destroy();
return ret;
}
......
......@@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
.fqs = rcu_sched_force_quiescent_state,
.stats = rcu_expedited_torture_stats,
.stats = NULL,
.irq_capable = 1,
.name = "sched_expedited"
};
......
......@@ -55,9 +55,9 @@
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/percpu.h>
#include <linux/kthread.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stop_machine.h>
#include <linux/sysctl.h>
#include <linux/syscalls.h>
#include <linux/times.h>
......@@ -539,15 +539,13 @@ struct rq {
int post_schedule;
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
/* cpu of this runqueue: */
int cpu;
int online;
unsigned long avg_load_per_task;
struct task_struct *migration_thread;
struct list_head migration_queue;
u64 rt_avg;
u64 age_stamp;
u64 idle_stamp;
......@@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
__set_task_cpu(p, new_cpu);
}
struct migration_req {
struct list_head list;
struct migration_arg {
struct task_struct *task;
int dest_cpu;
struct completion done;
};
static int migration_cpu_stop(void *data);
/*
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
static int
migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
static bool migrate_task(struct task_struct *p, int dest_cpu)
{
struct rq *rq = task_rq(p);
......@@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
* If the task is not on a runqueue (and not running), then
* the next wake-up will properly place the task.
*/
if (!p->se.on_rq && !task_running(rq, p))
return 0;
init_completion(&req->done);
req->task = p;
req->dest_cpu = dest_cpu;
list_add(&req->list, &rq->migration_queue);
return 1;
return p->se.on_rq || task_running(rq, p);
}
/*
......@@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq)
void sched_exec(void)
{
struct task_struct *p = current;
struct migration_req req;
unsigned long flags;
struct rq *rq;
int dest_cpu;
......@@ -3124,17 +3110,11 @@ void sched_exec(void)
* select_task_rq() can race against ->cpus_allowed
*/
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
likely(cpu_active(dest_cpu)) &&
migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
struct task_struct *mt = rq->migration_thread;
likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
struct migration_arg arg = { p, dest_cpu };
get_task_struct(mt);
task_rq_unlock(rq, &flags);
wake_up_process(mt);
put_task_struct(mt);
wait_for_completion(&req.done);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
return;
}
unlock:
......@@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void)
/*
* This is how migration works:
*
* 1) we queue a struct migration_req structure in the source CPU's
* runqueue and wake up that CPU's migration thread.
* 2) we down() the locked semaphore => thread blocks.
* 3) migration thread wakes up (implicitly it forces the migrated
* thread off the CPU)
* 4) it gets the migration request and checks whether the migrated
* task is still in the wrong runqueue.
* 5) if it's in the wrong runqueue then the migration thread removes
* 1) we invoke migration_cpu_stop() on the target CPU using
* stop_one_cpu().
* 2) stopper starts to run (implicitly forcing the migrated thread
* off the CPU)
* 3) it checks whether the migrated task is still in the wrong runqueue.
* 4) if it's in the wrong runqueue then the migration thread removes
* it and puts it into the right queue.
* 6) migration thread up()s the semaphore.
* 7) we wake up and the migration is done.
* 5) stopper completes and stop_one_cpu() returns and the migration
* is done.
*/
/*
......@@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void)
*/
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
struct migration_req req;
unsigned long flags;
struct rq *rq;
unsigned int dest_cpu;
int ret = 0;
/*
......@@ -5354,15 +5332,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
if (migrate_task(p, dest_cpu)) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
struct task_struct *mt = rq->migration_thread;
get_task_struct(mt);
task_rq_unlock(rq, &flags);
wake_up_process(mt);
put_task_struct(mt);
wait_for_completion(&req.done);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
}
......@@ -5420,70 +5395,22 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
return ret;
}
#define RCU_MIGRATION_IDLE 0
#define RCU_MIGRATION_NEED_QS 1
#define RCU_MIGRATION_GOT_QS 2
#define RCU_MIGRATION_MUST_SYNC 3
/*
* migration_thread - this is a highprio system thread that performs
* thread migration by bumping thread off CPU then 'pushing' onto
* another runqueue.
* migration_cpu_stop - this will be executed by a highprio stopper thread
* and performs thread migration by bumping thread off CPU then
* 'pushing' onto another runqueue.
*/
static int migration_thread(void *data)
static int migration_cpu_stop(void *data)
{
int badcpu;
int cpu = (long)data;
struct rq *rq;
rq = cpu_rq(cpu);
BUG_ON(rq->migration_thread != current);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
struct migration_req *req;
struct list_head *head;
raw_spin_lock_irq(&rq->lock);
if (cpu_is_offline(cpu)) {
raw_spin_unlock_irq(&rq->lock);
break;
}
if (rq->active_balance) {
active_load_balance(rq, cpu);
rq->active_balance = 0;
}
head = &rq->migration_queue;
if (list_empty(head)) {
raw_spin_unlock_irq(&rq->lock);
schedule();
set_current_state(TASK_INTERRUPTIBLE);
continue;
}
req = list_entry(head->next, struct migration_req, list);
list_del_init(head->next);
if (req->task != NULL) {
raw_spin_unlock(&rq->lock);
__migrate_task(req->task, cpu, req->dest_cpu);
} else if (likely(cpu == (badcpu = smp_processor_id()))) {
req->dest_cpu = RCU_MIGRATION_GOT_QS;
raw_spin_unlock(&rq->lock);
} else {
req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
raw_spin_unlock(&rq->lock);
WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
}
local_irq_enable();
complete(&req->done);
}
__set_current_state(TASK_RUNNING);
struct migration_arg *arg = data;
/*
* The original target cpu might have gone down and we might
* be on another cpu but it doesn't matter.
*/
local_irq_disable();
__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
local_irq_enable();
return 0;
}
......@@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq)
static int __cpuinit
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
struct task_struct *p;
int cpu = (long)hcpu;
unsigned long flags;
struct rq *rq;
struct rq *rq = cpu_rq(cpu);
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
if (IS_ERR(p))
return NOTIFY_BAD;
kthread_bind(p, cpu);
/* Must be high prio: stop_machine expects to yield to it. */
rq = task_rq_lock(p, &flags);
__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
task_rq_unlock(rq, &flags);
get_task_struct(p);
cpu_rq(cpu)->migration_thread = p;
rq->calc_load_update = calc_load_update;
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
/* Strictly unnecessary, as first user will wake it. */
wake_up_process(cpu_rq(cpu)->migration_thread);
/* Update our root-domain */
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
......@@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!cpu_rq(cpu)->migration_thread)
break;
/* Unbind it from offline cpu so it can run. Fall thru. */
kthread_bind(cpu_rq(cpu)->migration_thread,
cpumask_any(cpu_online_mask));
kthread_stop(cpu_rq(cpu)->migration_thread);
put_task_struct(cpu_rq(cpu)->migration_thread);
cpu_rq(cpu)->migration_thread = NULL;
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
migrate_live_tasks(cpu);
rq = cpu_rq(cpu);
kthread_stop(rq->migration_thread);
put_task_struct(rq->migration_thread);
rq->migration_thread = NULL;
/* Idle task back to normal (off runqueue, low prio) */
raw_spin_lock_irq(&rq->lock);
deactivate_task(rq, rq->idle, 0);
......@@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);
calc_global_load_remove(rq);
/*
* No need to migrate the tasks: it was best-effort if
* they didn't take sched_hotcpu_mutex. Just wake up
* the requestors.
*/
raw_spin_lock_irq(&rq->lock);
while (!list_empty(&rq->migration_queue)) {
struct migration_req *req;
req = list_entry(rq->migration_queue.next,
struct migration_req, list);
list_del_init(&req->list);
raw_spin_unlock_irq(&rq->lock);
complete(&req->done);
raw_spin_lock_irq(&rq->lock);
}
raw_spin_unlock_irq(&rq->lock);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
/* Update our root-domain */
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
......@@ -7757,10 +7635,8 @@ void __init sched_init(void)
rq->push_cpu = 0;
rq->cpu = i;
rq->online = 0;
rq->migration_thread = NULL;