Commit b297d520 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'dmapool' of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc

* 'dmapool' of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc:
  pool: Improve memory usage for devices which can't cross boundaries
  Change dmapool free block management
  dmapool: Tidy up includes and add comments
  dmapool: Validate parameters to dma_pool_create
  Avoid taking waitqueue lock in dmapool
  dmapool: Fix style problems
  Move dmapool.c to mm/ directory
parents c7736339 e34f44b3
......@@ -5,7 +5,7 @@ obj-y := core.o sys.o bus.o dd.o \
cpu.o firmware.o init.o map.o devres.o \
attribute_container.o transport_class.o
obj-y += power/
obj-$(CONFIG_HAS_DMA) += dma-mapping.o dmapool.o
obj-$(CONFIG_HAS_DMA) += dma-mapping.o
obj-$(CONFIG_ISA) += isa.o
obj-$(CONFIG_FW_LOADER) += firmware_class.o
obj-$(CONFIG_NUMA) += node.o
......
......@@ -16,6 +16,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HAS_DMA) += dmapool.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
......
/*
* DMA Pool allocator
*
* Copyright 2001 David Brownell
* Copyright 2007 Intel Corporation
* Author: Matthew Wilcox <willy@linux.intel.com>
*
* This software may be redistributed and/or modified under the terms of
* the GNU General Public License ("GPL") version 2 as published by the
* Free Software Foundation.
*
* This allocator returns small blocks of a given size which are DMA-able by
* the given device. It uses the dma_alloc_coherent page allocator to get
* new pages, then splits them up into blocks of the required size.
* Many older drivers still have their own code to do this.
*
* The current design of this allocator is fairly simple. The pool is
* represented by the 'struct dma_pool' which keeps a doubly-linked list of
* allocated pages. Each page in the page_list is split into blocks of at
* least 'size' bytes. Free blocks are tracked in an unsorted singly-linked
* list of free blocks within the page. Used blocks aren't tracked, but we
* keep a count of how many are currently allocated from each page.
*/
#include <linux/device.h>
#include <linux/mm.h>
#include <asm/io.h> /* Needed for i386 to build */
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/sched.h>
/*
* Pool allocator ... wraps the dma_alloc_coherent page allocator, so
* small blocks are easily used by drivers for bus mastering controllers.
* This should probably be sharing the guts of the slab allocator.
*/
struct dma_pool { /* the pool */
struct list_head page_list;
spinlock_t lock;
size_t blocks_per_page;
size_t size;
struct device *dev;
size_t allocation;
char name [32];
wait_queue_head_t waitq;
struct list_head pools;
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/wait.h>
struct dma_pool { /* the pool */
struct list_head page_list;
spinlock_t lock;
size_t size;
struct device *dev;
size_t allocation;
size_t boundary;
char name[32];
wait_queue_head_t waitq;
struct list_head pools;
};
struct dma_page { /* cacheable header for 'allocation' bytes */
struct list_head page_list;
void *vaddr;
dma_addr_t dma;
unsigned in_use;
unsigned long bitmap [0];
struct dma_page { /* cacheable header for 'allocation' bytes */
struct list_head page_list;
void *vaddr;
dma_addr_t dma;
unsigned int in_use;
unsigned int offset;
};
#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000)
static DEFINE_MUTEX (pools_lock);
static DEFINE_MUTEX(pools_lock);
static ssize_t
show_pools (struct device *dev, struct device_attribute *attr, char *buf)
show_pools(struct device *dev, struct device_attribute *attr, char *buf)
{
unsigned temp;
unsigned size;
......@@ -67,9 +89,9 @@ show_pools (struct device *dev, struct device_attribute *attr, char *buf)
/* per-pool info, no real statistics yet */
temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
pool->name,
blocks, pages * pool->blocks_per_page,
pool->size, pages);
pool->name, blocks,
pages * (pool->allocation / pool->size),
pool->size, pages);
size -= temp;
next += temp;
}
......@@ -77,7 +99,8 @@ show_pools (struct device *dev, struct device_attribute *attr, char *buf)
return PAGE_SIZE - size;
}
static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
static DEVICE_ATTR(pools, S_IRUGO, show_pools, NULL);
/**
* dma_pool_create - Creates a pool of consistent memory blocks, for dma.
......@@ -85,7 +108,7 @@ static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
* @dev: device that will be doing the DMA
* @size: size of the blocks in this pool.
* @align: alignment requirement for blocks; must be a power of two
* @allocation: returned blocks won't cross this boundary (or zero)
* @boundary: returned blocks won't cross this power of two boundary
* Context: !in_interrupt()
*
* Returns a dma allocation pool with the requested characteristics, or
......@@ -95,131 +118,135 @@ static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
* cache flushing primitives. The actual size of blocks allocated may be
* larger than requested because of alignment.
*
* If allocation is nonzero, objects returned from dma_pool_alloc() won't
* If @boundary is nonzero, objects returned from dma_pool_alloc() won't
* cross that size boundary. This is useful for devices which have
* addressing restrictions on individual DMA transfers, such as not crossing
* boundaries of 4KBytes.
*/
struct dma_pool *
dma_pool_create (const char *name, struct device *dev,
size_t size, size_t align, size_t allocation)
struct dma_pool *dma_pool_create(const char *name, struct device *dev,
size_t size, size_t align, size_t boundary)
{
struct dma_pool *retval;
struct dma_pool *retval;
size_t allocation;
if (align == 0)
if (align == 0) {
align = 1;
if (size == 0)
} else if (align & (align - 1)) {
return NULL;
else if (size < align)
size = align;
else if ((size % align) != 0) {
size += align + 1;
size &= ~(align - 1);
}
if (allocation == 0) {
if (PAGE_SIZE < size)
allocation = size;
else
allocation = PAGE_SIZE;
// FIXME: round up for less fragmentation
} else if (allocation < size)
if (size == 0) {
return NULL;
} else if (size < 4) {
size = 4;
}
if ((size % align) != 0)
size = ALIGN(size, align);
allocation = max_t(size_t, size, PAGE_SIZE);
if (!boundary) {
boundary = allocation;
} else if ((boundary < size) || (boundary & (boundary - 1))) {
return NULL;
}
if (!(retval = kmalloc_node (sizeof *retval, GFP_KERNEL, dev_to_node(dev))))
retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
if (!retval)
return retval;
strlcpy (retval->name, name, sizeof retval->name);
strlcpy(retval->name, name, sizeof(retval->name));
retval->dev = dev;
INIT_LIST_HEAD (&retval->page_list);
spin_lock_init (&retval->lock);
INIT_LIST_HEAD(&retval->page_list);
spin_lock_init(&retval->lock);
retval->size = size;
retval->boundary = boundary;
retval->allocation = allocation;
retval->blocks_per_page = allocation / size;
init_waitqueue_head (&retval->waitq);
init_waitqueue_head(&retval->waitq);
if (dev) {
int ret;
mutex_lock(&pools_lock);
if (list_empty (&dev->dma_pools))
ret = device_create_file (dev, &dev_attr_pools);
if (list_empty(&dev->dma_pools))
ret = device_create_file(dev, &dev_attr_pools);
else
ret = 0;
/* note: not currently insisting "name" be unique */
if (!ret)
list_add (&retval->pools, &dev->dma_pools);
list_add(&retval->pools, &dev->dma_pools);
else {
kfree(retval);
retval = NULL;
}
mutex_unlock(&pools_lock);
} else
INIT_LIST_HEAD (&retval->pools);
INIT_LIST_HEAD(&retval->pools);
return retval;
}
EXPORT_SYMBOL(dma_pool_create);
static struct dma_page *
pool_alloc_page (struct dma_pool *pool, gfp_t mem_flags)
static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
{
struct dma_page *page;
int mapsize;
unsigned int offset = 0;
unsigned int next_boundary = pool->boundary;
do {
unsigned int next = offset + pool->size;
if (unlikely((next + pool->size) >= next_boundary)) {
next = next_boundary;
next_boundary += pool->boundary;
}
*(int *)(page->vaddr + offset) = next;
offset = next;
} while (offset < pool->allocation);
}
mapsize = pool->blocks_per_page;
mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG;
mapsize *= sizeof (long);
static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
{
struct dma_page *page;
page = kmalloc(mapsize + sizeof *page, mem_flags);
page = kmalloc(sizeof(*page), mem_flags);
if (!page)
return NULL;
page->vaddr = dma_alloc_coherent (pool->dev,
pool->allocation,
&page->dma,
mem_flags);
page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
&page->dma, mem_flags);
if (page->vaddr) {
memset (page->bitmap, 0xff, mapsize); // bit set == free
#ifdef CONFIG_DEBUG_SLAB
memset (page->vaddr, POOL_POISON_FREED, pool->allocation);
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
list_add (&page->page_list, &pool->page_list);
pool_initialise_page(pool, page);
list_add(&page->page_list, &pool->page_list);
page->in_use = 0;
page->offset = 0;
} else {
kfree (page);
kfree(page);
page = NULL;
}
return page;
}
static inline int
is_page_busy (int blocks, unsigned long *bitmap)
static inline int is_page_busy(struct dma_page *page)
{
while (blocks > 0) {
if (*bitmap++ != ~0UL)
return 1;
blocks -= BITS_PER_LONG;
}
return 0;
return page->in_use != 0;
}
static void
pool_free_page (struct dma_pool *pool, struct dma_page *page)
static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
{
dma_addr_t dma = page->dma;
dma_addr_t dma = page->dma;
#ifdef CONFIG_DEBUG_SLAB
memset (page->vaddr, POOL_POISON_FREED, pool->allocation);
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
dma_free_coherent (pool->dev, pool->allocation, page->vaddr, dma);
list_del (&page->page_list);
kfree (page);
dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
list_del(&page->page_list);
kfree(page);
}
/**
* dma_pool_destroy - destroys a pool of dma memory blocks.
* @pool: dma pool that will be destroyed
......@@ -228,36 +255,37 @@ pool_free_page (struct dma_pool *pool, struct dma_page *page)
* Caller guarantees that no more memory from the pool is in use,
* and that nothing will try to use the pool after this call.
*/
void
dma_pool_destroy (struct dma_pool *pool)
void dma_pool_destroy(struct dma_pool *pool)
{
mutex_lock(&pools_lock);
list_del (&pool->pools);
if (pool->dev && list_empty (&pool->dev->dma_pools))
device_remove_file (pool->dev, &dev_attr_pools);
list_del(&pool->pools);
if (pool->dev && list_empty(&pool->dev->dma_pools))
device_remove_file(pool->dev, &dev_attr_pools);
mutex_unlock(&pools_lock);
while (!list_empty (&pool->page_list)) {
struct dma_page *page;
page = list_entry (pool->page_list.next,
struct dma_page, page_list);
if (is_page_busy (pool->blocks_per_page, page->bitmap)) {
while (!list_empty(&pool->page_list)) {
struct dma_page *page;
page = list_entry(pool->page_list.next,
struct dma_page, page_list);
if (is_page_busy(page)) {
if (pool->dev)
dev_err(pool->dev, "dma_pool_destroy %s, %p busy\n",
dev_err(pool->dev,
"dma_pool_destroy %s, %p busy\n",
pool->name, page->vaddr);
else
printk (KERN_ERR "dma_pool_destroy %s, %p busy\n",
pool->name, page->vaddr);
printk(KERN_ERR
"dma_pool_destroy %s, %p busy\n",
pool->name, page->vaddr);
/* leak the still-in-use consistent memory */
list_del (&page->page_list);
kfree (page);
list_del(&page->page_list);
kfree(page);
} else
pool_free_page (pool, page);
pool_free_page(pool, page);
}
kfree (pool);
kfree(pool);
}
EXPORT_SYMBOL(dma_pool_destroy);
/**
* dma_pool_alloc - get a block of consistent memory
......@@ -267,75 +295,62 @@ dma_pool_destroy (struct dma_pool *pool)
*
* This returns the kernel virtual address of a currently unused block,
* and reports its dma address through the handle.
* If such a memory block can't be allocated, null is returned.
* If such a memory block can't be allocated, %NULL is returned.
*/
void *
dma_pool_alloc (struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle)
void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle)
{
unsigned long flags;
struct dma_page *page;
int map, block;
size_t offset;
void *retval;
restart:
spin_lock_irqsave (&pool->lock, flags);
unsigned long flags;
struct dma_page *page;
size_t offset;
void *retval;
spin_lock_irqsave(&pool->lock, flags);
restart:
list_for_each_entry(page, &pool->page_list, page_list) {
int i;
/* only cachable accesses here ... */
for (map = 0, i = 0;
i < pool->blocks_per_page;
i += BITS_PER_LONG, map++) {
if (page->bitmap [map] == 0)
continue;
block = ffz (~ page->bitmap [map]);
if ((i + block) < pool->blocks_per_page) {
clear_bit (block, &page->bitmap [map]);
offset = (BITS_PER_LONG * map) + block;
offset *= pool->size;
goto ready;
}
}
if (page->offset < pool->allocation)
goto ready;
}
if (!(page = pool_alloc_page (pool, GFP_ATOMIC))) {
page = pool_alloc_page(pool, GFP_ATOMIC);
if (!page) {
if (mem_flags & __GFP_WAIT) {
DECLARE_WAITQUEUE (wait, current);
DECLARE_WAITQUEUE(wait, current);
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue (&pool->waitq, &wait);
spin_unlock_irqrestore (&pool->lock, flags);
__add_wait_queue(&pool->waitq, &wait);
spin_unlock_irqrestore(&pool->lock, flags);
schedule_timeout (POOL_TIMEOUT_JIFFIES);
schedule_timeout(POOL_TIMEOUT_JIFFIES);
remove_wait_queue (&pool->waitq, &wait);
spin_lock_irqsave(&pool->lock, flags);
__remove_wait_queue(&pool->waitq, &wait);
goto restart;
}
retval = NULL;
goto done;
}
clear_bit (0, &page->bitmap [0]);
offset = 0;
ready:
ready:
page->in_use++;
offset = page->offset;
page->offset = *(int *)(page->vaddr + offset);
retval = offset + page->vaddr;
*handle = offset + page->dma;
#ifdef CONFIG_DEBUG_SLAB
memset (retval, POOL_POISON_ALLOCATED, pool->size);
memset(retval, POOL_POISON_ALLOCATED, pool->size);
#endif
done:
spin_unlock_irqrestore (&pool->lock, flags);
done:
spin_unlock_irqrestore(&pool->lock, flags);
return retval;
}
EXPORT_SYMBOL(dma_pool_alloc);
static struct dma_page *
pool_find_page (struct dma_pool *pool, dma_addr_t dma)
static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
{
unsigned long flags;
struct dma_page *page;
unsigned long flags;
struct dma_page *page;
spin_lock_irqsave (&pool->lock, flags);
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
......@@ -343,12 +358,11 @@ pool_find_page (struct dma_pool *pool, dma_addr_t dma)
goto done;
}
page = NULL;
done:
spin_unlock_irqrestore (&pool->lock, flags);
done:
spin_unlock_irqrestore(&pool->lock, flags);
return page;
}
/**
* dma_pool_free - put block back into dma pool
* @pool: the dma pool holding the block
......@@ -358,62 +372,72 @@ pool_find_page (struct dma_pool *pool, dma_addr_t dma)
* Caller promises neither device nor driver will again touch this block
* unless it is first re-allocated.
*/
void
dma_pool_free (struct dma_pool *pool, void *vaddr, dma_addr_t dma)
void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
{
struct dma_page *page;
unsigned long flags;
int map, block;
struct dma_page *page;
unsigned long flags;
unsigned int offset;
if ((page = pool_find_page(pool, dma)) == NULL) {
page = pool_find_page(pool, dma);
if (!page) {
if (pool->dev)
dev_err(pool->dev, "dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long) dma);
dev_err(pool->dev,
"dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long)dma);
else
printk (KERN_ERR "dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long) dma);
printk(KERN_ERR "dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long)dma);
return;
}
block = dma - page->dma;
block /= pool->size;
map = block / BITS_PER_LONG;
block %= BITS_PER_LONG;
offset = vaddr - page->vaddr;
#ifdef CONFIG_DEBUG_SLAB
if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
if ((dma - page->dma) != offset) {
if (pool->dev)
dev_err(pool->dev, "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
pool->name, vaddr, (unsigned long long) dma);
dev_err(pool->dev,
"dma_pool_free %s, %p (bad vaddr)/%Lx\n",
pool->name, vaddr, (unsigned long long)dma);
else
printk (KERN_ERR "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
pool->name, vaddr, (unsigned long long) dma);
printk(KERN_ERR
"dma_pool_free %s, %p (bad vaddr)/%Lx\n",
pool->name, vaddr, (unsigned long long)dma);
return;
}
if (page->bitmap [map] & (1UL << block)) {
if (pool->dev)
dev_err(pool->dev, "dma_pool_free %s, dma %Lx already free\n",
pool->name, (unsigned long long)dma);
else
printk (KERN_ERR "dma_pool_free %s, dma %Lx already free\n",
pool->name, (unsigned long long)dma);
return;
{
unsigned int chain = page->offset;
while (chain < pool->allocation) {
if (chain != offset) {
chain = *(int *)(page->vaddr + chain);
continue;
}
if (pool->dev)
dev_err(pool->dev, "dma_pool_free %s, dma %Lx "
"already free\n", pool->name,
(unsigned long long)dma);
else
printk(KERN_ERR "dma_pool_free %s, dma %Lx "
"already free\n", pool->name,
(unsigned long long)dma);
return;
}
}
memset (vaddr, POOL_POISON_FREED, pool->size);
memset(vaddr, POOL_POISON_FREED, pool->size);
#endif
spin_lock_irqsave (&pool->lock, flags);
spin_lock_irqsave(&pool->lock, flags);
page->in_use--;
set_bit (block, &page->bitmap [map]);
if (waitqueue_active (&pool->waitq))
wake_up (&pool->waitq);
*(int *)vaddr = page->offset;
page->offset = offset;
if (waitqueue_active(&pool->waitq))
wake_up_locked(&pool->waitq);
/*
* Resist a temptation to do
* if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page);
* if (!is_page_busy(page)) pool_free_page(pool, page);
* Better have a few empty pages hang around.
*/
spin_unlock_irqrestore (&pool->lock, flags);
spin_unlock_irqrestore(&pool->lock, flags);