Commit ef08e3b4 authored by Paul Jackson's avatar Paul Jackson Committed by Linus Torvalds
Browse files

[PATCH] cpusets: confine oom_killer to mem_exclusive cpuset



Now the real motivation for this cpuset mem_exclusive patch series seems
trivial.

This patch keeps a task in or under one mem_exclusive cpuset from provoking an
oom kill of a task under a non-overlapping mem_exclusive cpuset.  Since only
interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive
containment, there is little to gain from oom killing a task under a
non-overlapping mem_exclusive cpuset, as almost all kernel and user memory
allocation must come from disjoint memory nodes.

This patch enables configuring a system so that a runaway job under one
mem_exclusive cpuset cannot cause the killing of a job in another such cpuset
that might be using very high compute and memory resources for a prolonged
time.

Signed-off-by: default avatarPaul Jackson <pj@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 9bf2229f
...@@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void); ...@@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void);
void cpuset_restrict_to_mems_allowed(unsigned long *nodes); void cpuset_restrict_to_mems_allowed(unsigned long *nodes);
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask);
extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
extern struct file_operations proc_cpuset_operations; extern struct file_operations proc_cpuset_operations;
extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
...@@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z, ...@@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z,
return 1; return 1;
} }
static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
{
return 1;
}
static inline char *cpuset_task_status_allowed(struct task_struct *task, static inline char *cpuset_task_status_allowed(struct task_struct *task,
char *buffer) char *buffer)
{ {
......
...@@ -1688,6 +1688,39 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) ...@@ -1688,6 +1688,39 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
return allowed; return allowed;
} }
/**
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
* @p: pointer to task_struct of some other task.
*
* Description: Return true if the nearest mem_exclusive ancestor
* cpusets of tasks @p and current overlap. Used by oom killer to
* determine if task @p's memory usage might impact the memory
* available to the current task.
*
* Acquires cpuset_sem - not suitable for calling from a fast path.
**/
int cpuset_excl_nodes_overlap(const struct task_struct *p)
{
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */
down(&cpuset_sem);
cs1 = current->cpuset;
if (!cs1)
goto done; /* current task exiting */
cs2 = p->cpuset;
if (!cs2)
goto done; /* task p is exiting */
cs1 = nearest_exclusive_ancestor(cs1);
cs2 = nearest_exclusive_ancestor(cs2);
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done:
up(&cpuset_sem);
return overlap;
}
/* /*
* proc_cpuset_show() * proc_cpuset_show()
* - Print tasks cpuset path into seq_file. * - Print tasks cpuset path into seq_file.
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/cpuset.h>
/* #define DEBUG */ /* #define DEBUG */
...@@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void) ...@@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void)
continue; continue;
if (p->oomkilladj == OOM_DISABLE) if (p->oomkilladj == OOM_DISABLE)
continue; continue;
/* If p's nodes don't overlap ours, it won't help to kill p. */
if (!cpuset_excl_nodes_overlap(p))
continue;
/* /*
* This is in the process of releasing memory so for wait it * This is in the process of releasing memory so for wait it
* to finish before killing some other task by mistake. * to finish before killing some other task by mistake.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment