Commit 23d4ed53 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
 "Final small batch of fixes to be included before -rc1.  Some general
  cleanups in here as well, but some of the blk-mq fixes we need for the
  NVMe conversion and/or scsi-mq.  The pull request contains:

   - Support for not merging across a specified "chunk size", if set by
     the driver.  Some NVMe devices perform poorly for IO that crosses
     such a chunk, so we need to support it generically as part of
     request merging avoid having to do complicated split logic.  From
     me.

   - Bump max tag depth to 10Ki tags.  Some scsi devices have a huge
     shared tag space.  Before we failed with EINVAL if a too large tag
     depth was specified, now we truncate it and pass back the actual
     value.  From me.

   - Various blk-mq rq init fixes from me and others.

   - A fix for enter on a dying queue for blk-mq from Keith.  This is
     needed to prevent oopsing on hot device removal.

   - Fixup for blk-mq timer addition from Ming Lei.

   - Small round of performance fixes for mtip32xx from Sam Bradshaw.

   - Minor stack leak fix from Rickard Strandqvist.

   - Two __init annotations from Fabian Frederick"

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: add __init to blkcg_policy_register
  block: add __init to elv_register
  block: ensure that bio_add_page() always accepts a page for an empty bio
  blk-mq: add timer in blk_mq_start_request
  blk-mq: always initialize request->start_time
  block: blk-exec.c: Cleaning up local variable address returnd
  mtip32xx: minor performance enhancements
  blk-mq: ->timeout should be cleared in blk_mq_rq_ctx_init()
  blk-mq: don't allow queue entering for a dying queue
  blk-mq: bump max tag depth to 10K tags
  block: add blk_rq_set_block_pc()
  block: add notion of a chunk size for request merging
parents e413a19a a2d445d4
......@@ -849,7 +849,13 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
unsigned int max_sectors;
max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
max_sectors = len >> 9;
return __bio_add_page(q, bio, page, len, offset, max_sectors);
}
EXPORT_SYMBOL(bio_add_page);
......
......@@ -1093,7 +1093,7 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
* Register @pol with blkcg core. Might sleep and @pol may be modified on
* successful registration. Returns 0 on success and -errno on failure.
*/
int blkcg_policy_register(struct blkcg_policy *pol)
int __init blkcg_policy_register(struct blkcg_policy *pol)
{
int i, ret;
......
......@@ -145,7 +145,7 @@ void blkcg_drain_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
int blkcg_policy_register(struct blkcg_policy *pol);
int __init blkcg_policy_register(struct blkcg_policy *pol);
void blkcg_policy_unregister(struct blkcg_policy *pol);
int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol);
......@@ -580,7 +580,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { ret
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline int __init blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
static inline int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { return 0; }
......
......@@ -1218,6 +1218,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
if (unlikely(!rq))
return ERR_PTR(-ENOMEM);
blk_rq_set_block_pc(rq);
for_each_bio(bio) {
struct bio *bounce_bio = bio;
int ret;
......@@ -1234,6 +1236,22 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
}
EXPORT_SYMBOL(blk_make_request);
/**
* blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
* @rq: request to be initialized
*
*/
void blk_rq_set_block_pc(struct request *rq)
{
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->__data_len = 0;
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
memset(rq->__cmd, 0, sizeof(rq->__cmd));
rq->cmd = rq->__cmd;
}
EXPORT_SYMBOL(blk_rq_set_block_pc);
/**
* blk_requeue_request - put a request back on queue
* @q: request queue where request should be inserted
......
......@@ -132,6 +132,11 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
if (rq->errors)
err = -EIO;
if (rq->sense == sense) {
rq->sense = NULL;
rq->sense_len = 0;
}
return err;
}
EXPORT_SYMBOL(blk_execute_rq);
......@@ -82,8 +82,10 @@ static int blk_mq_queue_enter(struct request_queue *q)
__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
smp_wmb();
/* we have problems to freeze the queue if it's initializing */
if (!blk_queue_bypass(q) || !blk_queue_init_done(q))
/* we have problems freezing the queue if it's initializing */
if (!blk_queue_dying(q) &&
(!blk_queue_bypass(q) || !blk_queue_init_done(q)))
return 0;
__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
......@@ -183,6 +185,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
RB_CLEAR_NODE(&rq->rb_node);
rq->rq_disk = NULL;
rq->part = NULL;
rq->start_time = jiffies;
#ifdef CONFIG_BLK_CGROUP
rq->rl = NULL;
set_start_time_ns(rq);
......@@ -202,6 +205,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
rq->sense = NULL;
INIT_LIST_HEAD(&rq->timeout_list);
rq->timeout = 0;
rq->end_io = NULL;
rq->end_io_data = NULL;
rq->next_rq = NULL;
......@@ -406,16 +411,7 @@ static void blk_mq_start_request(struct request *rq, bool last)
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
/*
* Just mark start time and set the started bit. Due to memory
* ordering, we know we'll see the correct deadline as long as
* REQ_ATOMIC_STARTED is seen. Use the default queue timeout,
* unless one has been set in the request.
*/
if (!rq->timeout)
rq->deadline = jiffies + q->rq_timeout;
else
rq->deadline = jiffies + rq->timeout;
blk_add_timer(rq);
/*
* Mark us as started and clear complete. Complete might have been
......@@ -967,11 +963,6 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
list_add_tail(&rq->queuelist, &ctx->rq_list);
blk_mq_hctx_mark_pending(hctx, ctx);
/*
* We do this early, to ensure we are on the right CPU.
*/
blk_add_timer(rq);
}
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
......@@ -1100,10 +1091,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
{
init_request_from_bio(rq, bio);
if (blk_do_io_stat(rq)) {
rq->start_time = jiffies;
if (blk_do_io_stat(rq))
blk_account_io_start(rq, 1);
}
}
static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
......@@ -1216,7 +1205,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
blk_mq_start_request(rq, true);
blk_add_timer(rq);
/*
* For OK queue, we are done. For error, kill it. Any other
......@@ -1967,13 +1955,19 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK;
}
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
* requested depth down, if if it too large. In that case, the set
* value will be stored in set->queue_depth.
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
int i;
if (!set->nr_hw_queues)
return -EINVAL;
if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
if (!set->queue_depth)
return -EINVAL;
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
......@@ -1981,6 +1975,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
return -EINVAL;
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
pr_info("blk-mq: reduced tag depth to %u\n",
BLK_MQ_MAX_DEPTH);
set->queue_depth = BLK_MQ_MAX_DEPTH;
}
set->tags = kmalloc_node(set->nr_hw_queues *
sizeof(struct blk_mq_tags *),
......
......@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
......@@ -276,6 +277,26 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
/**
* blk_queue_chunk_sectors - set size of the chunk for this queue
* @q: the request queue for the device
* @chunk_sectors: chunk sectors in the usual 512b unit
*
* Description:
* If a driver doesn't want IOs to cross a given chunk size, it can set
* this limit and prevent merging across chunks. Note that the chunk size
* must currently be a power-of-2 in sectors. Also note that the block
* layer must accept a page worth of data at any offset. So if the
* crossing of chunks is a hard limitation in the driver, it must still be
* prepared to split single page bios.
**/
void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
{
BUG_ON(!is_power_of_2(chunk_sectors));
q->limits.chunk_sectors = chunk_sectors;
}
EXPORT_SYMBOL(blk_queue_chunk_sectors);
/**
* blk_queue_max_discard_sectors - set max sectors for a single discard
* @q: the request queue for the device
......
......@@ -196,7 +196,6 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
* fill in request structure
*/
rq->cmd_len = hdr->request_len;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = msecs_to_jiffies(hdr->timeout);
if (!rq->timeout)
......@@ -273,6 +272,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
blk_rq_set_block_pc(rq);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
if (ret)
goto out;
......
......@@ -845,7 +845,7 @@ void elv_unregister_queue(struct request_queue *q)
}
EXPORT_SYMBOL(elv_unregister_queue);
int elv_register(struct elevator_type *e)
int __init elv_register(struct elevator_type *e)
{
char *def = "";
......
......@@ -229,7 +229,6 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
* fill in request structure
*/
rq->cmd_len = hdr->cmd_len;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = msecs_to_jiffies(hdr->timeout);
if (!rq->timeout)
......@@ -311,6 +310,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
if (!rq)
return -ENOMEM;
blk_rq_set_block_pc(rq);
if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
blk_put_request(rq);
......@@ -491,7 +491,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
memset(sense, 0, sizeof(sense));
rq->sense = sense;
rq->sense_len = 0;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(rq);
blk_execute_rq(q, disk, rq, 0);
......@@ -524,7 +524,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
int err;
rq = blk_get_request(q, WRITE, __GFP_WAIT);
rq->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(rq);
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
rq->cmd[0] = cmd;
rq->cmd[4] = data;
......
......@@ -39,6 +39,7 @@
#include <../drivers/ata/ahci.h>
#include <linux/export.h>
#include <linux/debugfs.h>
#include <linux/prefetch.h>
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
......@@ -2380,6 +2381,8 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
/* Map the scatter list for DMA access */
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
prefetch(&port->flags);
command->scatter_ents = nents;
/*
......@@ -2392,7 +2395,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
fis = command->command;
fis->type = 0x27;
fis->opts = 1 << 7;
if (rq_data_dir(rq) == READ)
if (dma_dir == DMA_FROM_DEVICE)
fis->command = ATA_CMD_FPDMA_READ;
else
fis->command = ATA_CMD_FPDMA_WRITE;
......@@ -2412,7 +2415,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
fis->res3 = 0;
fill_command_sg(dd, command, nents);
if (command->unaligned)
if (unlikely(command->unaligned))
fis->device |= 1 << 7;
/* Populate the command header */
......@@ -2433,7 +2436,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
* To prevent this command from being issued
* if an internal command is in progress or error handling is active.
*/
if (port->flags & MTIP_PF_PAUSE_IO) {
if (unlikely(port->flags & MTIP_PF_PAUSE_IO)) {
set_bit(rq->tag, port->cmds_to_issue);
set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
return;
......@@ -3754,7 +3757,7 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
struct driver_data *dd = hctx->queue->queuedata;
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
if (!dd->unal_qdepth || rq_data_dir(rq) == READ)
if (rq_data_dir(rq) == READ || !dd->unal_qdepth)
return false;
/*
......@@ -3776,11 +3779,11 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
int ret;
if (mtip_check_unal_depth(hctx, rq))
if (unlikely(mtip_check_unal_depth(hctx, rq)))
return BLK_MQ_RQ_QUEUE_BUSY;
ret = mtip_submit_request(hctx, rq);
if (!ret)
if (likely(!ret))
return BLK_MQ_RQ_QUEUE_OK;
rq->errors = ret;
......
......@@ -493,19 +493,19 @@ struct driver_data {
struct workqueue_struct *isr_workq;
struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
atomic_t irq_workers_active;
struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
int isr_binding;
struct block_device *bdev;
int unal_qdepth; /* qdepth of unaligned IO queue */
struct list_head online_list; /* linkage for online list */
struct list_head remove_list; /* linkage for removing list */
int unal_qdepth; /* qdepth of unaligned IO queue */
};
#endif
......@@ -704,6 +704,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
WRITE : READ, __GFP_WAIT);
blk_rq_set_block_pc(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
......@@ -716,7 +717,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
rq->timeout = 60*HZ;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
if (cgc->quiet)
rq->cmd_flags |= REQ_QUIET;
......
......@@ -2184,6 +2184,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
ret = -ENOMEM;
break;
}
blk_rq_set_block_pc(rq);
ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
if (ret) {
......@@ -2203,7 +2204,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
rq->cmd[9] = 0xf8;
rq->cmd_len = 12;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = 60 * HZ;
bio = rq->bio;
......
......@@ -120,6 +120,7 @@ static struct request *get_alua_req(struct scsi_device *sdev,
"%s: blk_get_request failed\n", __func__);
return NULL;
}
blk_rq_set_block_pc(rq);
if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
blk_put_request(rq);
......@@ -128,7 +129,6 @@ static struct request *get_alua_req(struct scsi_device *sdev,
return NULL;
}
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
rq->retries = ALUA_FAILOVER_RETRIES;
......
......@@ -280,6 +280,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
return NULL;
}
blk_rq_set_block_pc(rq);
rq->cmd_len = COMMAND_SIZE(cmd);
rq->cmd[0] = cmd;
......@@ -304,7 +305,6 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
break;
}
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
rq->timeout = CLARIION_TIMEOUT;
......
......@@ -120,7 +120,7 @@ retry:
if (!req)
return SCSI_DH_RES_TEMP_UNAVAIL;
req->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(req);
req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
......@@ -250,7 +250,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
if (!req)
return SCSI_DH_RES_TEMP_UNAVAIL;
req->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(req);
req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
req->cmd_len = COMMAND_SIZE(START_STOP);
......
......@@ -279,6 +279,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
"get_rdac_req: blk_get_request failed.\n");
return NULL;
}
blk_rq_set_block_pc(rq);
if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
blk_put_request(rq);
......@@ -287,7 +288,6 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
return NULL;
}
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
rq->retries = RDAC_RETRIES;
......
......@@ -1570,6 +1570,7 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
if (unlikely(!req))
return ERR_PTR(-ENOMEM);
blk_rq_set_block_pc(req);
return req;
}
}
......@@ -1590,7 +1591,6 @@ static int _init_blk_request(struct osd_request *or,
}
or->request = req;
req->cmd_type = REQ_TYPE_BLOCK_PC;
req->cmd_flags |= REQ_QUIET;
req->timeout = or->timeout;
......@@ -1608,7 +1608,7 @@ static int _init_blk_request(struct osd_request *or,
ret = PTR_ERR(req);
goto out;
}
req->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(req);
or->in.req = or->request->next_rq = req;
}
} else if (has_in)
......
......@@ -365,7 +365,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
if (!req)
return DRIVER_ERROR << 24;
req->cmd_type = REQ_TYPE_BLOCK_PC;
blk_rq_set_block_pc(req);
req->cmd_flags |= REQ_QUIET;
SRpnt->bio = NULL;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment