Commit 4ce01c51 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
 "A round of fixes/updates for the current series.

  This looks a little bigger than it is, but that's mainly because we
  pushed the lightnvm enabled null_blk change out of the merge window so
  it could be updated a bit.  The rest of the volume is also mostly
  lightnvm.  In particular:

   - Lightnvm.  Various fixes, additions, updates from Matias and
     Javier, as well as from Wenwei Tao.

   - NVMe:
        - Fix for potential arithmetic overflow from Keith.
        - Also from Keith, ensure that we reap pending completions from
          a completion queue before deleting it.  Fixes kernel crashes
          when resetting a device with IO pending.
        - Various little lightnvm related tweaks from Matias.

   - Fixup flushes to go through the IO scheduler, for the cases where a
     flush is not required.  Fixes a case in CFQ where we would be
     idling and not see this request, hence not break the idling.  From
     Jan Kara.

   - Use list_{first,prev,next} in elevator.c for cleaner code.  From
     Gelian Tang.

   - Fix for a warning trigger on btrfs and raid on single queue blk-mq
     devices, where we would flush plug callbacks with preemption
     disabled.  From me.

   - A mac partition validation fix from Kees Cook.

   - Two merge fixes from Ming, marked stable.  A third part is adding a
     new warning so we'll notice this quicker in the future, if we screw
     up the accounting.

   - Cleanup of thread name/creation in mtip32xx from Rasmus Villemoes"

* 'for-linus' of git://git.kernel.dk/linux-block: (32 commits)
  blk-merge: warn if figured out segment number is bigger than nr_phys_segments
  blk-merge: fix blk_bio_segment_split
  block: fix segment split
  blk-mq: fix calling unplug callbacks with preempt disabled
  mac: validate mac_partition is within sector
  mtip32xx: use formatting capability of kthread_create_on_node
  NVMe: reap completion entries when deleting queue
  lightnvm: add free and bad lun info to show luns
  lightnvm: keep track of block counts
  nvme: lightnvm: use admin queues for admin cmds
  lightnvm: missing free on init error
  lightnvm: wrong return value and redundant free
  null_blk: do not del gendisk with lightnvm
  null_blk: use device addressing mode
  null_blk: use ppa_cache pool
  NVMe: Fix possible arithmetic overflow for max segments
  blk-flush: Queue through IO scheduler when flush not required
  null_blk: register as a LightNVM device
  elevator: use list_{first,prev,next}_entry
  lightnvm: cleanup queue before target removal
  ...
parents a2931547 12e57f59
......@@ -70,3 +70,6 @@ use_per_node_hctx=[0/1]: Default: 0
parameter.
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
use_lightnvm=[0/1]: Default: 0
Register device with LightNVM. Requires blk-mq to be used.
......@@ -6366,6 +6366,7 @@ F: arch/*/include/asm/pmem.h
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
W: http://github/OpenChannelSSD
L: linux-block@vger.kernel.org
S: Maintained
F: drivers/lightnvm/
F: include/linux/lightnvm.h
......
......@@ -422,7 +422,7 @@ void blk_insert_flush(struct request *rq)
if (q->mq_ops) {
blk_mq_insert_request(rq, false, false, true);
} else
list_add_tail(&rq->queuelist, &q->queue_head);
q->elevator->type->ops.elevator_add_req_fn(q, rq);
return;
}
......
......@@ -76,6 +76,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
unsigned seg_size = 0, nsegs = 0, sectors = 0;
unsigned front_seg_size = bio->bi_seg_front_size;
bool do_split = true;
struct bio *new = NULL;
bio_for_each_segment(bv, bio, iter) {
if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
......@@ -98,7 +101,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
seg_size += bv.bv_len;
bvprv = bv;
bvprvp = &bv;
bvprvp = &bvprv;
sectors += bv.bv_len >> 9;
continue;
}
......@@ -108,16 +111,29 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
nsegs++;
bvprv = bv;
bvprvp = &bv;
bvprvp = &bvprv;
seg_size = bv.bv_len;
sectors += bv.bv_len >> 9;
if (nsegs == 1 && seg_size > front_seg_size)
front_seg_size = seg_size;
}
*segs = nsegs;
return NULL;
do_split = false;
split:
*segs = nsegs;
return bio_split(bio, sectors, GFP_NOIO, bs);
if (do_split) {
new = bio_split(bio, sectors, GFP_NOIO, bs);
if (new)
bio = new;
}
bio->bi_seg_front_size = front_seg_size;
if (seg_size > bio->bi_seg_back_size)
bio->bi_seg_back_size = seg_size;
return do_split ? new : NULL;
}
void blk_queue_split(struct request_queue *q, struct bio **bio,
......@@ -412,6 +428,12 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
if (sg)
sg_mark_end(sg);
/*
* Something must have been wrong if the figured number of
* segment is bigger than number of req's physical segments
*/
WARN_ON(nsegs > rq->nr_phys_segments);
return nsegs;
}
EXPORT_SYMBOL(blk_rq_map_sg);
......
......@@ -1291,15 +1291,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
/*
* we do limited pluging. If bio can be merged, do merge.
* We do limited pluging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be
* issued. So the plug list will have one request at most
*/
if (plug) {
/*
* The plug list might get flushed before this. If that
* happens, same_queue_rq is invalid and plug list is empty
**/
* happens, same_queue_rq is invalid and plug list is
* empty
*/
if (same_queue_rq && !list_empty(&plug->mq_list)) {
old_rq = same_queue_rq;
list_del_init(&old_rq->queuelist);
......@@ -1380,12 +1381,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
if (!request_count)
trace_block_plug(q);
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_mq_put_ctx(data.ctx);
if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
list_add_tail(&rq->queuelist, &plug->mq_list);
blk_mq_put_ctx(data.ctx);
return cookie;
}
......
......@@ -21,10 +21,10 @@ static void noop_merged_requests(struct request_queue *q, struct request *rq,
static int noop_dispatch(struct request_queue *q, int force)
{
struct noop_data *nd = q->elevator->elevator_data;
struct request *rq;
if (!list_empty(&nd->queue)) {
struct request *rq;
rq = list_entry(nd->queue.next, struct request, queuelist);
rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
if (rq) {
list_del_init(&rq->queuelist);
elv_dispatch_sort(q, rq);
return 1;
......@@ -46,7 +46,7 @@ noop_former_request(struct request_queue *q, struct request *rq)
if (rq->queuelist.prev == &nd->queue)
return NULL;
return list_entry(rq->queuelist.prev, struct request, queuelist);
return list_prev_entry(rq, queuelist);
}
static struct request *
......@@ -56,7 +56,7 @@ noop_latter_request(struct request_queue *q, struct request *rq)
if (rq->queuelist.next == &nd->queue)
return NULL;
return list_entry(rq->queuelist.next, struct request, queuelist);
return list_next_entry(rq, queuelist);
}
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
......
......@@ -32,7 +32,7 @@ int mac_partition(struct parsed_partitions *state)
Sector sect;
unsigned char *data;
int slot, blocks_in_map;
unsigned secsize;
unsigned secsize, datasize, partoffset;
#ifdef CONFIG_PPC_PMAC
int found_root = 0;
int found_root_goodness = 0;
......@@ -50,10 +50,14 @@ int mac_partition(struct parsed_partitions *state)
}
secsize = be16_to_cpu(md->block_size);
put_dev_sector(sect);
data = read_part_sector(state, secsize/512, &sect);
datasize = round_down(secsize, 512);
data = read_part_sector(state, datasize / 512, &sect);
if (!data)
return -1;
part = (struct mac_partition *) (data + secsize%512);
partoffset = secsize % 512;
if (partoffset + sizeof(*part) > datasize)
return -1;
part = (struct mac_partition *) (data + partoffset);
if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
put_dev_sector(sect);
return 0; /* not a MacOS disk */
......
......@@ -63,6 +63,7 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
......@@ -70,7 +71,6 @@ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/
......
......@@ -3810,7 +3810,6 @@ static int mtip_block_initialize(struct driver_data *dd)
sector_t capacity;
unsigned int index = 0;
struct kobject *kobj;
unsigned char thd_name[16];
if (dd->disk)
goto skip_create_disk; /* hw init done, before rebuild */
......@@ -3958,10 +3957,9 @@ static int mtip_block_initialize(struct driver_data *dd)
}
start_service_thread:
sprintf(thd_name, "mtip_svc_thd_%02d", index);
dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
dd, dd->numa_node, "%s",
thd_name);
dd, dd->numa_node,
"mtip_svc_thd_%02d", index);
if (IS_ERR(dd->mtip_svc_handler)) {
dev_err(&dd->pdev->dev, "service thread failed to start\n");
......
......@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
#include <linux/lightnvm.h>
struct nullb_cmd {
struct list_head list;
......@@ -39,12 +40,14 @@ struct nullb {
struct nullb_queue *queues;
unsigned int nr_queues;
char disk_name[DISK_NAME_LEN];
};
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;
struct completion_queue {
struct llist_head list;
......@@ -119,6 +122,10 @@ static int nr_devices = 2;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
static bool use_lightnvm;
module_param(use_lightnvm, bool, S_IRUGO);
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
......@@ -427,15 +434,156 @@ static void null_del_dev(struct nullb *nullb)
{
list_del_init(&nullb->list);
del_gendisk(nullb->disk);
if (use_lightnvm)
nvm_unregister(nullb->disk_name);
else
del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set);
put_disk(nullb->disk);
if (!use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
kfree(nullb);
}
#ifdef CONFIG_NVM
static void null_lnvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
struct nvm_dev *dev = rqd->dev;
dev->mt->end_io(rqd, error);
blk_put_request(rq);
}
static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
{
struct request *rq;
struct bio *bio = rqd->bio;
rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
if (IS_ERR(rq))
return -ENOMEM;
rq->cmd_type = REQ_TYPE_DRV_PRIV;
rq->__sector = bio->bi_iter.bi_sector;
rq->ioprio = bio_prio(bio);
if (bio_has_data(bio))
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
rq->end_io_data = rqd;
blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
return 0;
}
static int null_lnvm_id(struct request_queue *q, struct nvm_id *id)
{
sector_t size = gb * 1024 * 1024 * 1024ULL;
sector_t blksize;
struct nvm_id_group *grp;
id->ver_id = 0x1;
id->vmnt = 0;
id->cgrps = 1;
id->cap = 0x3;
id->dom = 0x1;
id->ppaf.blk_offset = 0;
id->ppaf.blk_len = 16;
id->ppaf.pg_offset = 16;
id->ppaf.pg_len = 16;
id->ppaf.sect_offset = 32;
id->ppaf.sect_len = 8;
id->ppaf.pln_offset = 40;
id->ppaf.pln_len = 8;
id->ppaf.lun_offset = 48;
id->ppaf.lun_len = 8;
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
do_div(size, bs); /* convert size to pages */
do_div(size, 256); /* concert size to pgs pr blk */
grp = &id->groups[0];
grp->mtype = 0;
grp->fmtype = 0;
grp->num_ch = 1;
grp->num_pg = 256;
blksize = size;
do_div(size, (1 << 16));
grp->num_lun = size + 1;
do_div(blksize, grp->num_lun);
grp->num_blk = blksize;
grp->num_pln = 1;
grp->fpg_sz = bs;
grp->csecs = bs;
grp->trdt = 25000;
grp->trdm = 25000;
grp->tprt = 500000;
grp->tprm = 500000;
grp->tbet = 1500000;
grp->tbem = 1500000;
grp->mpos = 0x010101; /* single plane rwe */
grp->cpar = hw_queue_depth;
return 0;
}
static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name)
{
mempool_t *virtmem_pool;
virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
if (!virtmem_pool) {
pr_err("null_blk: Unable to create virtual memory pool\n");
return NULL;
}
return virtmem_pool;
}
static void null_lnvm_destroy_dma_pool(void *pool)
{
mempool_destroy(pool);
}
static void *null_lnvm_dev_dma_alloc(struct request_queue *q, void *pool,
gfp_t mem_flags, dma_addr_t *dma_handler)
{
return mempool_alloc(pool, mem_flags);
}
static void null_lnvm_dev_dma_free(void *pool, void *entry,
dma_addr_t dma_handler)
{
mempool_free(entry, pool);
}
static struct nvm_dev_ops null_lnvm_dev_ops = {
.identity = null_lnvm_id,
.submit_io = null_lnvm_submit_io,
.create_dma_pool = null_lnvm_create_dma_pool,
.destroy_dma_pool = null_lnvm_destroy_dma_pool,
.dev_dma_alloc = null_lnvm_dev_dma_alloc,
.dev_dma_free = null_lnvm_dev_dma_free,
/* Simulate nvme protocol restriction */
.max_phys_sect = 64,
};
#else
static struct nvm_dev_ops null_lnvm_dev_ops;
#endif /* CONFIG_NVM */
static int null_open(struct block_device *bdev, fmode_t mode)
{
return 0;
......@@ -575,11 +723,6 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
disk = nullb->disk = alloc_disk_node(1, home_node);
if (!disk) {
rv = -ENOMEM;
goto out_cleanup_blk_queue;
}
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
......@@ -589,6 +732,21 @@ static int null_add_dev(void)
blk_queue_logical_block_size(nullb->q, bs);
blk_queue_physical_block_size(nullb->q, bs);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
if (use_lightnvm) {
rv = nvm_register(nullb->q, nullb->disk_name,
&null_lnvm_dev_ops);
if (rv)
goto out_cleanup_blk_queue;
goto done;
}
disk = nullb->disk = alloc_disk_node(1, home_node);
if (!disk) {
rv = -ENOMEM;
goto out_cleanup_lightnvm;
}
size = gb * 1024 * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
......@@ -598,10 +756,15 @@ static int null_add_dev(void)
disk->fops = &null_fops;
disk->private_data = nullb;
disk->queue = nullb->q;
sprintf(disk->disk_name, "nullb%d", nullb->index);
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
add_disk(disk);
done:
return 0;
out_cleanup_lightnvm:
if (use_lightnvm)
nvm_unregister(nullb->disk_name);
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
......@@ -625,6 +788,18 @@ static int __init null_init(void)
bs = PAGE_SIZE;
}
if (use_lightnvm && bs != 4096) {
pr_warn("null_blk: LightNVM only supports 4k block size\n");
pr_warn("null_blk: defaults block size to 4k\n");
bs = 4096;
}
if (use_lightnvm && queue_mode != NULL_Q_MQ) {
pr_warn("null_blk: LightNVM only supported for blk-mq\n");
pr_warn("null_blk: defaults queue mode to blk-mq\n");
queue_mode = NULL_Q_MQ;
}
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
......@@ -655,15 +830,27 @@ static int __init null_init(void)
if (null_major < 0)
return null_major;
if (use_lightnvm) {
ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
0, 0, NULL);
if (!ppa_cache) {
pr_err("null_blk: unable to create ppa cache\n");
return -ENOMEM;
}
}
for (i = 0; i < nr_devices; i++) {
if (null_add_dev()) {
unregister_blkdev(null_major, "nullb");
return -EINVAL;
goto err_ppa;
}
}
pr_info("null: module loaded\n");
return 0;
err_ppa:
kmem_cache_destroy(ppa_cache);
return -EINVAL;
}
static void __exit null_exit(void)
......@@ -678,6 +865,8 @@ static void __exit null_exit(void)
null_del_dev(nullb);
}
mutex_unlock(&lock);
kmem_cache_destroy(ppa_cache);
}
module_init(null_init);
......
......@@ -160,11 +160,6 @@ int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
}
EXPORT_SYMBOL(nvm_erase_blk);
static void nvm_core_free(struct nvm_dev *dev)
{
kfree(dev);
}
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
......@@ -179,12 +174,21 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
dev->addr_mode = id->ppat;
dev->addr_format = id->ppaf;
memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
dev->plane_mode = NVM_PLANE_SINGLE;
dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
if (grp->mtype != 0) {
pr_err("nvm: memory type not supported\n");
return -EINVAL;
}
if (grp->fmtype != 0 && grp->fmtype != 1) {
pr_err("nvm: flash type not supported\n");
return -EINVAL;
}
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
......@@ -213,21 +217,18 @@ static void nvm_free(struct nvm_dev *dev)
if (dev->mt)
dev->mt->unregister_mgr(dev);
nvm_core_free(dev);
}
static int nvm_init(struct nvm_dev *dev)
{
struct nvmm_type *mt;
int ret = 0;
int ret = -EINVAL;
if (!dev->q || !dev->ops)
return -EINVAL;
return ret;
if (dev->ops->identity(dev->q, &dev->identity)) {
pr_err("nvm: device could not be identified\n");
ret = -EINVAL;
goto err;
}
......@@ -273,7 +274,6 @@ static int nvm_init(struct nvm_dev *dev)
dev->nr_chnls);
return 0;
err:
nvm_free(dev);
pr_err("nvm: failed to initialize nvm\n");
return ret;
}
......@@ -308,22 +308,24 @@ int nvm_register(struct request_queue *q, char *disk_name,
if (ret)
goto err_init;
down_write(&nvm_lock);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
if (dev->ops->max_phys_sect > 1) {
dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
"ppalist");
if (!dev->ppalist_pool) {
pr_err("nvm: could not create ppa pool\n");
return -ENOMEM;
ret = -ENOMEM;
goto err_init;
}
} else if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
return -EINVAL;
ret = -EINVAL;
goto err_init;
}
down_write(&nvm_lock);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
return 0;
err_init:
kfree(dev);
......@@ -341,11 +343,12 @@ void nvm_unregister(char *disk_name)
return;
}
nvm_exit(dev);
down_write(&nvm_lock);