Commit 8b8e8c1b authored by Yinghai Lu's avatar Yinghai Lu Committed by Ingo Molnar
Browse files

x86: remove irqbalance in kernel for 32 bit



This has been deprecated for years, the user space irqbalanced utility
works better with numa, has configurable policies, etc...
Signed-off-by: default avatarYinghai Lu <yhlu.kernel@gmai.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 6d50bc26
......@@ -1254,14 +1254,6 @@ config EFI
resultant kernel should continue to boot on existing non-EFI
platforms.
config IRQBALANCE
def_bool y
prompt "Enable kernel irq balancing"
depends on X86_32 && SMP && X86_IO_APIC
help
The default yes will allow the kernel to do irq load balancing.
Saying no will keep the kernel from doing irq load balancing.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
......
......@@ -287,7 +287,6 @@ CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_EFI=y
# CONFIG_IRQBALANCE is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
......
......@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
#if defined(CONFIG_IRQBALANCE)
# include <asm/processor.h> /* kernel_thread() */
# include <linux/kernel_stat.h> /* kstat */
# include <linux/slab.h> /* kmalloc() */
# include <linux/timer.h>
#define IRQBALANCE_CHECK_ARCH -999
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
#define BALANCED_IRQ_MORE_DELTA (HZ/10)
#define BALANCED_IRQ_LESS_DELTA (HZ)
static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
static int physical_balance __read_mostly;
static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
static struct irq_cpu_info {
unsigned long *last_irq;
unsigned long *irq_delta;
unsigned long irq;
} irq_cpu_data[NR_CPUS];
#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
#define IDLE_ENOUGH(cpu,now) \
(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
static cpumask_t *balance_irq_affinity;
static void __init irq_affinity_init_work(void *data)
{
struct dyn_array *da = data;
int i;
struct balance_irq_affinity *affinity;
affinity = *da->name;
for (i = 0; i < *da->nr; i++)
memcpy(&affinity[i], &balance_irq_affinity_init,
sizeof(struct balance_irq_affinity));
}
DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
balance_irq_affinity[irq] = mask;
}
static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
unsigned long now, int direction)
{
int search_idle = 1;
int cpu = curr_cpu;
goto inside;
do {
if (unlikely(cpu == curr_cpu))
search_idle = 0;
inside:
if (direction == 1) {
cpu++;
if (cpu >= NR_CPUS)
cpu = 0;
} else {
cpu--;
if (cpu == -1)
cpu = NR_CPUS-1;
}
} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
(search_idle && !IDLE_ENOUGH(cpu, now)));
return cpu;
}
static inline void balance_irq(int cpu, int irq)
{
unsigned long now = jiffies;
cpumask_t allowed_mask;
unsigned int new_cpu;
if (irqbalance_disabled)
return;
cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
new_cpu = move(cpu, allowed_mask, now, 1);
if (cpu != new_cpu)
set_pending_irq(irq, cpumask_of_cpu(new_cpu));
}
static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
{
int i, j;
struct irq_desc *desc;
for_each_online_cpu(i) {
for (j = 0; j < nr_irqs; j++) {
desc = irq_to_desc(j);
if (!desc->action)
continue;
/* Is it a significant load ? */
if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
useful_load_threshold)
continue;
balance_irq(i, j);
}
}
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
return;
}
static void do_irq_balance(void)
{
int i, j;
unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
unsigned long move_this_load = 0;
int max_loaded = 0, min_loaded = 0;
int load;
unsigned long useful_load_threshold = balanced_irq_interval + 10;
int selected_irq;
int tmp_loaded, first_attempt = 1;
unsigned long tmp_cpu_irq;
unsigned long imbalance = 0;
cpumask_t allowed_mask, target_cpu_mask, tmp;
struct irq_desc *desc;
for_each_possible_cpu(i) {
int package_index;
CPU_IRQ(i) = 0;
if (!cpu_online(i))
continue;
package_index = CPU_TO_PACKAGEINDEX(i);
for (j = 0; j < nr_irqs; j++) {
unsigned long value_now, delta;
/* Is this an active IRQ or balancing disabled ? */
desc = irq_to_desc(j);
if (!desc->action || irq_balancing_disabled(j))
continue;
if (package_index == i)
IRQ_DELTA(package_index, j) = 0;
/* Determine the total count per processor per IRQ */
value_now = (unsigned long) kstat_irqs_cpu(j, i);
/* Determine the activity per processor per IRQ */
delta = value_now - LAST_CPU_IRQ(i, j);
/* Update last_cpu_irq[][] for the next time */
LAST_CPU_IRQ(i, j) = value_now;
/* Ignore IRQs whose rate is less than the clock */
if (delta < useful_load_threshold)
continue;
/* update the load for the processor or package total */
IRQ_DELTA(package_index, j) += delta;
/* Keep track of the higher numbered sibling as well */
if (i != package_index)
CPU_IRQ(i) += delta;
/*
* We have sibling A and sibling B in the package
*
* cpu_irq[A] = load for cpu A + load for cpu B
* cpu_irq[B] = load for cpu B
*/
CPU_IRQ(package_index) += delta;
}
}
/* Find the least loaded processor package */
for_each_online_cpu(i) {
if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (min_cpu_irq > CPU_IRQ(i)) {
min_cpu_irq = CPU_IRQ(i);
min_loaded = i;
}
}
max_cpu_irq = ULONG_MAX;
tryanothercpu:
/*
* Look for heaviest loaded processor.
* We may come back to get the next heaviest loaded processor.
* Skip processors with trivial loads.
*/
tmp_cpu_irq = 0;
tmp_loaded = -1;
for_each_online_cpu(i) {
if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (max_cpu_irq <= CPU_IRQ(i))
continue;
if (tmp_cpu_irq < CPU_IRQ(i)) {
tmp_cpu_irq = CPU_IRQ(i);
tmp_loaded = i;
}
}
if (tmp_loaded == -1) {
/*
* In the case of small number of heavy interrupt sources,
* loading some of the cpus too much. We use Ingo's original
* approach to rotate them around.
*/
if (!first_attempt && imbalance >= useful_load_threshold) {
rotate_irqs_among_cpus(useful_load_threshold);
return;
}
goto not_worth_the_effort;
}
first_attempt = 0; /* heaviest search */
max_cpu_irq = tmp_cpu_irq; /* load */
max_loaded = tmp_loaded; /* processor */
imbalance = (max_cpu_irq - min_cpu_irq) / 2;
/*
* if imbalance is less than approx 10% of max load, then
* observe diminishing returns action. - quit
*/
if (imbalance < (max_cpu_irq >> 3))
goto not_worth_the_effort;
tryanotherirq:
/* if we select an IRQ to move that can't go where we want, then
* see if there is another one to try.
*/
move_this_load = 0;
selected_irq = -1;
for (j = 0; j < nr_irqs; j++) {
/* Is this an active IRQ? */
desc = irq_to_desc(j);
if (!desc->action)
continue;
if (imbalance <= IRQ_DELTA(max_loaded, j))
continue;
/* Try to find the IRQ that is closest to the imbalance
* without going over.
*/
if (move_this_load < IRQ_DELTA(max_loaded, j)) {
move_this_load = IRQ_DELTA(max_loaded, j);
selected_irq = j;
}
}
if (selected_irq == -1)
goto tryanothercpu;
imbalance = move_this_load;
/* For physical_balance case, we accumulated both load
* values in the one of the siblings cpu_irq[],
* to use the same code for physical and logical processors
* as much as possible.
*
* NOTE: the cpu_irq[] array holds the sum of the load for
* sibling A and sibling B in the slot for the lowest numbered
* sibling (A), _AND_ the load for sibling B in the slot for
* the higher numbered sibling.
*
* We seek the least loaded sibling by making the comparison
* (A+B)/2 vs B
*/
load = CPU_IRQ(min_loaded) >> 1;
for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
if (load > CPU_IRQ(j)) {
/* This won't change cpu_sibling_map[min_loaded] */
load = CPU_IRQ(j);
min_loaded = j;
}
}
cpus_and(allowed_mask,
cpu_online_map,
balance_irq_affinity[selected_irq]);
target_cpu_mask = cpumask_of_cpu(min_loaded);
cpus_and(tmp, target_cpu_mask, allowed_mask);
if (!cpus_empty(tmp)) {
/* mark for change destination */
set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
/* Since we made a change, come back sooner to
* check for more variation.
*/
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
return;
}
goto tryanotherirq;
not_worth_the_effort:
/*
* if we did not find an IRQ to move, then adjust the time interval
* upward
*/
balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
return;
}
static int balanced_irq(void *unused)
{
int i;
unsigned long prev_balance_time = jiffies;
long time_remaining = balanced_irq_interval;
struct irq_desc *desc;
/* push everything to CPU 0 to give us a starting point. */
for (i = 0 ; i < nr_irqs ; i++) {
desc = irq_to_desc(i);
desc->pending_mask = cpumask_of_cpu(0);
set_pending_irq(i, cpumask_of_cpu(0));
}
set_freezable();
for ( ; ; ) {
time_remaining = schedule_timeout_interruptible(time_remaining);
try_to_freeze();
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
preempt_enable();
}
}
return 0;
}
static int __init balanced_irq_init(void)
{
int i;
struct cpuinfo_x86 *c;
cpumask_t tmp;
cpus_shift_right(tmp, cpu_online_map, 2);
c = &boot_cpu_data;
/* When not overwritten by the command line ask subarchitecture. */
if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
irqbalance_disabled = NO_BALANCE_IRQ;
if (irqbalance_disabled)
return 0;
/* disable irqbalance completely if there is only one processor online */
if (num_online_cpus() < 2) {
irqbalance_disabled = 1;
return 0;
}
/*
* Enable physical balance only if more than 1 physical processor
* is present
*/
if (smp_num_siblings > 1 && !cpus_empty(tmp))
physical_balance = 1;
for_each_online_cpu(i) {
irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
printk(KERN_ERR "balanced_irq_init: out of memory");
goto failed;
}
}
printk(KERN_INFO "Starting balanced_irq\n");
if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
return 0;
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
for_each_possible_cpu(i) {
kfree(irq_cpu_data[i].irq_delta);
irq_cpu_data[i].irq_delta = NULL;
kfree(irq_cpu_data[i].last_irq);
irq_cpu_data[i].last_irq = NULL;
}
return 0;
}
int __devinit irqbalance_disable(char *str)
{
irqbalance_disabled = 1;
return 1;
}
__setup("noirqbalance", irqbalance_disable);
late_initcall(balanced_irq_init);
#endif /* CONFIG_IRQBALANCE */
#endif /* CONFIG_SMP */
#ifndef CONFIG_SMP
......
......@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
if (!(word & (1 << 13))) {
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
"disabling irq balancing and affinity\n");
#ifdef CONFIG_IRQBALANCE
irqbalance_disable("");
#endif
noirqdebug_setup("");
#ifdef CONFIG_PROC_FS
no_irq_affinity = 1;
......
......@@ -185,7 +185,7 @@ struct irq_desc {
cpumask_t affinity;
unsigned int cpu;
#endif
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_t pending_mask;
#endif
#ifdef CONFIG_PROC_FS
......@@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
#ifdef CONFIG_SMP
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
#ifdef CONFIG_GENERIC_PENDING_IRQ
void set_pending_irq(unsigned int irq, cpumask_t mask);
void move_native_irq(int irq);
void move_masked_irq(int irq);
#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline void move_irq(int irq)
{
......@@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
#endif /* CONFIG_SMP */
#ifdef CONFIG_IRQBALANCE
extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
#else
static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
}
#endif
extern int no_irq_affinity;
static inline int irq_balancing_disabled(unsigned int irq)
......
......@@ -86,8 +86,6 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
if (!desc->chip->set_affinity)
return -EINVAL;
set_balance_irq_affinity(irq, cpumask);
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT) {
unsigned long flags;
......@@ -122,7 +120,6 @@ int irq_select_affinity(unsigned int irq)
desc->affinity = mask;
desc->chip->set_affinity(irq, mask);
set_balance_irq_affinity(irq, mask);
return 0;
}
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment