ib_srp.c 93.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

33
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34

35
36
37
38
39
40
41
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/parser.h>
#include <linux/random.h>
42
#include <linux/jiffies.h>
43
#include <rdma/ib_cache.h>
44

Arun Sharma's avatar
Arun Sharma committed
45
#include <linux/atomic.h>
46
47
48
49

#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_dbg.h>
50
#include <scsi/scsi_tcq.h>
51
#include <scsi/srp.h>
52
#include <scsi/scsi_transport_srp.h>
53
54
55
56
57

#include "ib_srp.h"

#define DRV_NAME	"ib_srp"
#define PFX		DRV_NAME ": "
58
59
#define DRV_VERSION	"1.0"
#define DRV_RELDATE	"July 1, 2013"
60
61
62
63
64
65

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
		   "v" DRV_VERSION " (" DRV_RELDATE ")");
MODULE_LICENSE("Dual BSD/GPL");

66
67
static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
68
69
static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
70
static bool prefer_fr;
71
static bool register_always;
72
static int topspin_workarounds = 1;
73

74
75
module_param(srp_sg_tablesize, uint, 0444);
MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
76

77
78
79
module_param(cmd_sg_entries, uint, 0444);
MODULE_PARM_DESC(cmd_sg_entries,
		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
80

81
82
83
84
85
86
87
88
module_param(indirect_sg_entries, uint, 0444);
MODULE_PARM_DESC(indirect_sg_entries,
		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");

module_param(allow_ext_sg, bool, 0444);
MODULE_PARM_DESC(allow_ext_sg,
		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");

89
90
91
92
module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");

93
94
95
96
module_param(prefer_fr, bool, 0444);
MODULE_PARM_DESC(prefer_fr,
"Whether to use fast registration if both FMR and fast registration are supported");

97
98
99
100
module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
		 "Use memory registration even for contiguous memory regions");

101
102
static struct kernel_param_ops srp_tmo_ops;

103
104
105
106
107
static int srp_reconnect_delay = 10;
module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");

108
109
110
111
112
113
114
115
static int srp_fast_io_fail_tmo = 15;
module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(fast_io_fail_tmo,
		 "Number of seconds between the observation of a transport"
		 " layer error and failing all I/O. \"off\" means that this"
		 " functionality is disabled.");

116
static int srp_dev_loss_tmo = 600;
117
118
119
120
121
122
123
124
125
126
module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(dev_loss_tmo,
		 "Maximum number of seconds that the SRP transport should"
		 " insulate transport layer errors. After this time has been"
		 " exceeded the SCSI host is removed. Should be"
		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
		 " if fast_io_fail_tmo has not been set. \"off\" means that"
		 " this functionality is disabled.");

127
128
129
130
131
static unsigned ch_count;
module_param(ch_count, uint, 0444);
MODULE_PARM_DESC(ch_count,
		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");

132
133
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
134
135
static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
136
137
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);

138
static struct scsi_transport_template *ib_srp_transport_template;
139
static struct workqueue_struct *srp_remove_wq;
140

141
142
143
144
145
146
static struct ib_client srp_client = {
	.name   = "srp",
	.add    = srp_add_one,
	.remove = srp_remove_one
};

147
148
static struct ib_sa_client srp_sa_client;

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
{
	int tmo = *(int *)kp->arg;

	if (tmo >= 0)
		return sprintf(buffer, "%d", tmo);
	else
		return sprintf(buffer, "off");
}

static int srp_tmo_set(const char *val, const struct kernel_param *kp)
{
	int tmo, res;

	if (strncmp(val, "off", 3) != 0) {
		res = kstrtoint(val, 0, &tmo);
		if (res)
			goto out;
	} else {
		tmo = -1;
	}
170
171
172
173
174
	if (kp->arg == &srp_reconnect_delay)
		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
				    srp_dev_loss_tmo);
	else if (kp->arg == &srp_fast_io_fail_tmo)
		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
175
	else
176
177
		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
				    tmo);
178
179
180
181
182
183
184
185
186
187
188
189
190
	if (res)
		goto out;
	*(int *)kp->arg = tmo;

out:
	return res;
}

static struct kernel_param_ops srp_tmo_ops = {
	.get = srp_tmo_get,
	.set = srp_tmo_set,
};

191
192
193
194
195
196
197
198
199
200
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
{
	return (struct srp_target_port *) host->hostdata;
}

static const char *srp_target_info(struct Scsi_Host *host)
{
	return host_to_target(host)->target_name;
}

201
202
203
static int srp_target_is_topspin(struct srp_target_port *target)
{
	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
204
	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
205
206

	return topspin_workarounds &&
207
208
		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209
210
}

211
212
213
214
215
216
217
218
219
220
221
222
223
224
static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
				   gfp_t gfp_mask,
				   enum dma_data_direction direction)
{
	struct srp_iu *iu;

	iu = kmalloc(sizeof *iu, gfp_mask);
	if (!iu)
		goto out;

	iu->buf = kzalloc(size, gfp_mask);
	if (!iu->buf)
		goto out_free_iu;

225
226
227
	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
				    direction);
	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
		goto out_free_buf;

	iu->size      = size;
	iu->direction = direction;

	return iu;

out_free_buf:
	kfree(iu->buf);
out_free_iu:
	kfree(iu);
out:
	return NULL;
}

static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
{
	if (!iu)
		return;

248
249
	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
			    iu->direction);
250
251
252
253
254
255
	kfree(iu->buf);
	kfree(iu);
}

static void srp_qp_event(struct ib_event *event, void *context)
{
256
	pr_debug("QP event %d\n", event->event);
257
258
259
260
261
262
263
264
265
266
267
268
}

static int srp_init_qp(struct srp_target_port *target,
		       struct ib_qp *qp)
{
	struct ib_qp_attr *attr;
	int ret;

	attr = kmalloc(sizeof *attr, GFP_KERNEL);
	if (!attr)
		return -ENOMEM;

269
270
271
272
	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
				  target->srp_host->port,
				  be16_to_cpu(target->pkey),
				  &attr->pkey_index);
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
	if (ret)
		goto out;

	attr->qp_state        = IB_QPS_INIT;
	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
				    IB_ACCESS_REMOTE_WRITE);
	attr->port_num        = target->srp_host->port;

	ret = ib_modify_qp(qp, attr,
			   IB_QP_STATE		|
			   IB_QP_PKEY_INDEX	|
			   IB_QP_ACCESS_FLAGS	|
			   IB_QP_PORT);

out:
	kfree(attr);
	return ret;
}

292
static int srp_new_cm_id(struct srp_rdma_ch *ch)
David Dillow's avatar
David Dillow committed
293
{
294
	struct srp_target_port *target = ch->target;
David Dillow's avatar
David Dillow committed
295
296
	struct ib_cm_id *new_cm_id;

297
	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298
				    srp_cm_handler, ch);
David Dillow's avatar
David Dillow committed
299
300
301
	if (IS_ERR(new_cm_id))
		return PTR_ERR(new_cm_id);

302
303
304
305
306
307
308
	if (ch->cm_id)
		ib_destroy_cm_id(ch->cm_id);
	ch->cm_id = new_cm_id;
	ch->path.sgid = target->sgid;
	ch->path.dgid = target->orig_dgid;
	ch->path.pkey = target->pkey;
	ch->path.service_id = target->service_id;
David Dillow's avatar
David Dillow committed
309
310
311
312

	return 0;
}

313
314
315
316
317
318
319
320
321
static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
{
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_fmr_pool_param fmr_param;

	memset(&fmr_param, 0, sizeof(fmr_param));
	fmr_param.pool_size	    = target->scsi_host->can_queue;
	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
	fmr_param.cache		    = 1;
322
323
	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
324
325
326
327
328
329
330
	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
				       IB_ACCESS_REMOTE_WRITE |
				       IB_ACCESS_REMOTE_READ);

	return ib_create_fmr_pool(dev->pd, &fmr_param);
}

331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
/**
 * srp_destroy_fr_pool() - free the resources owned by a pool
 * @pool: Fast registration pool to be destroyed.
 */
static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
{
	int i;
	struct srp_fr_desc *d;

	if (!pool)
		return;

	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
		if (d->frpl)
			ib_free_fast_reg_page_list(d->frpl);
		if (d->mr)
			ib_dereg_mr(d->mr);
	}
	kfree(pool);
}

/**
 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
 * @device:            IB device to allocate fast registration descriptors for.
 * @pd:                Protection domain associated with the FR descriptors.
 * @pool_size:         Number of descriptors to allocate.
 * @max_page_list_len: Maximum fast registration work request page list length.
 */
static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
					      struct ib_pd *pd, int pool_size,
					      int max_page_list_len)
{
	struct srp_fr_pool *pool;
	struct srp_fr_desc *d;
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
	int i, ret = -EINVAL;

	if (pool_size <= 0)
		goto err;
	ret = -ENOMEM;
	pool = kzalloc(sizeof(struct srp_fr_pool) +
		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
	if (!pool)
		goto err;
	pool->size = pool_size;
	pool->max_page_list_len = max_page_list_len;
	spin_lock_init(&pool->lock);
	INIT_LIST_HEAD(&pool->free_list);

	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
		if (IS_ERR(mr)) {
			ret = PTR_ERR(mr);
			goto destroy_pool;
		}
		d->mr = mr;
		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
		if (IS_ERR(frpl)) {
			ret = PTR_ERR(frpl);
			goto destroy_pool;
		}
		d->frpl = frpl;
		list_add_tail(&d->entry, &pool->free_list);
	}

out:
	return pool;

destroy_pool:
	srp_destroy_fr_pool(pool);

err:
	pool = ERR_PTR(ret);
	goto out;
}

/**
 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
 * @pool: Pool to obtain descriptor from.
 */
static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
{
	struct srp_fr_desc *d = NULL;
	unsigned long flags;

	spin_lock_irqsave(&pool->lock, flags);
	if (!list_empty(&pool->free_list)) {
		d = list_first_entry(&pool->free_list, typeof(*d), entry);
		list_del(&d->entry);
	}
	spin_unlock_irqrestore(&pool->lock, flags);

	return d;
}

/**
 * srp_fr_pool_put() - put an FR descriptor back in the free list
 * @pool: Pool the descriptor was allocated from.
 * @desc: Pointer to an array of fast registration descriptor pointers.
 * @n:    Number of descriptors to put back.
 *
 * Note: The caller must already have queued an invalidation request for
 * desc->mr->rkey before calling this function.
 */
static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
			    int n)
{
	unsigned long flags;
	int i;

	spin_lock_irqsave(&pool->lock, flags);
	for (i = 0; i < n; i++)
		list_add(&desc[i]->entry, &pool->free_list);
	spin_unlock_irqrestore(&pool->lock, flags);
}

static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
	struct srp_device *dev = target->srp_host->srp_dev;

	return srp_create_fr_pool(dev->dev, dev->pd,
				  target->scsi_host->can_queue,
				  dev->max_pages_per_mr);
}

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
/**
 * srp_destroy_qp() - destroy an RDMA queue pair
 * @ch: SRP RDMA channel.
 *
 * Change a queue pair into the error state and wait until all receive
 * completions have been processed before destroying it. This avoids that
 * the receive completion handler can access the queue pair while it is
 * being destroyed.
 */
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
	static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
	static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
	struct ib_recv_wr *bad_wr;
	int ret;

	/* Destroying a QP and reusing ch->done is only safe if not connected */
474
	WARN_ON_ONCE(ch->connected);
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490

	ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
	WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
	if (ret)
		goto out;

	init_completion(&ch->done);
	ret = ib_post_recv(ch->qp, &wr, &bad_wr);
	WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
	if (ret == 0)
		wait_for_completion(&ch->done);

out:
	ib_destroy_qp(ch->qp);
}

491
static int srp_create_ch_ib(struct srp_rdma_ch *ch)
492
{
493
	struct srp_target_port *target = ch->target;
494
	struct srp_device *dev = target->srp_host->srp_dev;
495
	struct ib_qp_init_attr *init_attr;
496
497
	struct ib_cq *recv_cq, *send_cq;
	struct ib_qp *qp;
498
	struct ib_fmr_pool *fmr_pool = NULL;
499
500
	struct srp_fr_pool *fr_pool = NULL;
	const int m = 1 + dev->use_fast_reg;
501
502
503
504
505
506
	int ret;

	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
	if (!init_attr)
		return -ENOMEM;

507
	/* + 1 for SRP_LAST_WR_ID */
508
	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
509
			       target->queue_size + 1, ch->comp_vector);
510
511
	if (IS_ERR(recv_cq)) {
		ret = PTR_ERR(recv_cq);
512
		goto err;
513
514
	}

515
516
	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
			       m * target->queue_size, ch->comp_vector);
517
518
	if (IS_ERR(send_cq)) {
		ret = PTR_ERR(send_cq);
519
		goto err_recv_cq;
520
521
	}

522
	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
523
524

	init_attr->event_handler       = srp_qp_event;
525
	init_attr->cap.max_send_wr     = m * target->queue_size;
526
	init_attr->cap.max_recv_wr     = target->queue_size + 1;
527
528
	init_attr->cap.max_recv_sge    = 1;
	init_attr->cap.max_send_sge    = 1;
529
	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
530
	init_attr->qp_type             = IB_QPT_RC;
531
532
	init_attr->send_cq             = send_cq;
	init_attr->recv_cq             = recv_cq;
533

534
	qp = ib_create_qp(dev->pd, init_attr);
535
536
	if (IS_ERR(qp)) {
		ret = PTR_ERR(qp);
537
		goto err_send_cq;
538
539
	}

540
	ret = srp_init_qp(target, qp);
541
542
	if (ret)
		goto err_qp;
543

544
545
546
547
548
549
550
551
	if (dev->use_fast_reg && dev->has_fr) {
		fr_pool = srp_alloc_fr_pool(target);
		if (IS_ERR(fr_pool)) {
			ret = PTR_ERR(fr_pool);
			shost_printk(KERN_WARNING, target->scsi_host, PFX
				     "FR pool allocation failed (%d)\n", ret);
			goto err_qp;
		}
552
553
554
		if (ch->fr_pool)
			srp_destroy_fr_pool(ch->fr_pool);
		ch->fr_pool = fr_pool;
555
	} else if (!dev->use_fast_reg && dev->has_fmr) {
556
557
558
559
560
561
562
		fmr_pool = srp_alloc_fmr_pool(target);
		if (IS_ERR(fmr_pool)) {
			ret = PTR_ERR(fmr_pool);
			shost_printk(KERN_WARNING, target->scsi_host, PFX
				     "FMR pool allocation failed (%d)\n", ret);
			goto err_qp;
		}
563
564
565
		if (ch->fmr_pool)
			ib_destroy_fmr_pool(ch->fmr_pool);
		ch->fmr_pool = fmr_pool;
566
567
	}

568
	if (ch->qp)
569
		srp_destroy_qp(ch);
570
571
572
573
	if (ch->recv_cq)
		ib_destroy_cq(ch->recv_cq);
	if (ch->send_cq)
		ib_destroy_cq(ch->send_cq);
574

575
576
577
	ch->qp = qp;
	ch->recv_cq = recv_cq;
	ch->send_cq = send_cq;
578

579
580
581
582
	kfree(init_attr);
	return 0;

err_qp:
583
	ib_destroy_qp(qp);
584
585

err_send_cq:
586
	ib_destroy_cq(send_cq);
587
588

err_recv_cq:
589
	ib_destroy_cq(recv_cq);
590
591

err:
592
593
594
595
	kfree(init_attr);
	return ret;
}

596
597
/*
 * Note: this function may be called without srp_alloc_iu_bufs() having been
598
 * invoked. Hence the ch->[rt]x_ring checks.
599
 */
600
601
static void srp_free_ch_ib(struct srp_target_port *target,
			   struct srp_rdma_ch *ch)
602
{
603
	struct srp_device *dev = target->srp_host->srp_dev;
604
605
	int i;

606
607
608
	if (!ch->target)
		return;

609
610
611
	if (ch->cm_id) {
		ib_destroy_cm_id(ch->cm_id);
		ch->cm_id = NULL;
612
613
	}

614
615
616
617
	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
	if (!ch->qp)
		return;

618
	if (dev->use_fast_reg) {
619
620
		if (ch->fr_pool)
			srp_destroy_fr_pool(ch->fr_pool);
621
	} else {
622
623
		if (ch->fmr_pool)
			ib_destroy_fmr_pool(ch->fmr_pool);
624
	}
625
	srp_destroy_qp(ch);
626
627
	ib_destroy_cq(ch->send_cq);
	ib_destroy_cq(ch->recv_cq);
628

629
630
631
632
633
634
635
636
	/*
	 * Avoid that the SCSI error handler tries to use this channel after
	 * it has been freed. The SCSI error handler can namely continue
	 * trying to perform recovery actions after scsi_remove_host()
	 * returned.
	 */
	ch->target = NULL;

637
638
	ch->qp = NULL;
	ch->send_cq = ch->recv_cq = NULL;
639

640
	if (ch->rx_ring) {
641
		for (i = 0; i < target->queue_size; ++i)
642
643
644
			srp_free_iu(target->srp_host, ch->rx_ring[i]);
		kfree(ch->rx_ring);
		ch->rx_ring = NULL;
645
	}
646
	if (ch->tx_ring) {
647
		for (i = 0; i < target->queue_size; ++i)
648
649
650
			srp_free_iu(target->srp_host, ch->tx_ring[i]);
		kfree(ch->tx_ring);
		ch->tx_ring = NULL;
651
	}
652
653
654
655
}

static void srp_path_rec_completion(int status,
				    struct ib_sa_path_rec *pathrec,
656
				    void *ch_ptr)
657
{
658
659
	struct srp_rdma_ch *ch = ch_ptr;
	struct srp_target_port *target = ch->target;
660

661
	ch->status = status;
662
	if (status)
663
664
		shost_printk(KERN_ERR, target->scsi_host,
			     PFX "Got failed path rec status %d\n", status);
665
	else
666
667
		ch->path = *pathrec;
	complete(&ch->done);
668
669
}

670
static int srp_lookup_path(struct srp_rdma_ch *ch)
671
{
672
	struct srp_target_port *target = ch->target;
673
674
	int ret;

675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
	ch->path.numb_path = 1;

	init_completion(&ch->done);

	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
					       target->srp_host->srp_dev->dev,
					       target->srp_host->port,
					       &ch->path,
					       IB_SA_PATH_REC_SERVICE_ID |
					       IB_SA_PATH_REC_DGID	 |
					       IB_SA_PATH_REC_SGID	 |
					       IB_SA_PATH_REC_NUMB_PATH	 |
					       IB_SA_PATH_REC_PKEY,
					       SRP_PATH_REC_TIMEOUT_MS,
					       GFP_KERNEL,
					       srp_path_rec_completion,
					       ch, &ch->path_query);
	if (ch->path_query_id < 0)
		return ch->path_query_id;

	ret = wait_for_completion_interruptible(&ch->done);
696
697
	if (ret < 0)
		return ret;
698

699
	if (ch->status < 0)
700
701
		shost_printk(KERN_WARNING, target->scsi_host,
			     PFX "Path record query failed\n");
702

703
	return ch->status;
704
705
}

706
static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
707
{
708
	struct srp_target_port *target = ch->target;
709
710
711
712
713
714
715
716
717
718
	struct {
		struct ib_cm_req_param param;
		struct srp_login_req   priv;
	} *req = NULL;
	int status;

	req = kzalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;

719
	req->param.primary_path		      = &ch->path;
720
721
	req->param.alternate_path 	      = NULL;
	req->param.service_id 		      = target->service_id;
722
723
	req->param.qp_num		      = ch->qp->qp_num;
	req->param.qp_type		      = ch->qp->qp_type;
724
725
726
727
728
729
730
731
732
733
734
735
736
737
	req->param.private_data 	      = &req->priv;
	req->param.private_data_len 	      = sizeof req->priv;
	req->param.flow_control 	      = 1;

	get_random_bytes(&req->param.starting_psn, 4);
	req->param.starting_psn 	     &= 0xffffff;

	/*
	 * Pick some arbitrary defaults here; we could make these
	 * module parameters if anyone cared about setting them.
	 */
	req->param.responder_resources	      = 4;
	req->param.remote_cm_response_timeout = 20;
	req->param.local_cm_response_timeout  = 20;
738
	req->param.retry_count                = target->tl_retry_count;
739
740
741
742
743
	req->param.rnr_retry_count 	      = 7;
	req->param.max_cm_retries 	      = 15;

	req->priv.opcode     	= SRP_LOGIN_REQ;
	req->priv.tag        	= 0;
744
	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745
746
	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
					      SRP_BUF_FORMAT_INDIRECT);
747
748
	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
				   SRP_MULTICHAN_SINGLE);
749
	/*
Roland Dreier's avatar
Roland Dreier committed
750
	 * In the published SRP specification (draft rev. 16a), the
751
752
753
754
755
756
757
758
759
	 * port identifier format is 8 bytes of ID extension followed
	 * by 8 bytes of GUID.  Older drafts put the two halves in the
	 * opposite order, so that the GUID comes first.
	 *
	 * Targets conforming to these obsolete drafts can be
	 * recognized by the I/O Class they report.
	 */
	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
		memcpy(req->priv.initiator_port_id,
760
		       &target->sgid.global.interface_id, 8);
761
		memcpy(req->priv.initiator_port_id + 8,
762
		       &target->initiator_ext, 8);
763
764
765
766
		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
	} else {
		memcpy(req->priv.initiator_port_id,
767
768
		       &target->initiator_ext, 8);
		memcpy(req->priv.initiator_port_id + 8,
769
		       &target->sgid.global.interface_id, 8);
770
771
772
773
		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
	}

774
775
	/*
	 * Topspin/Cisco SRP targets will reject our login unless we
776
777
	 * zero out the first 8 bytes of our initiator port ID and set
	 * the second 8 bytes to the local node GUID.
778
	 */
779
	if (srp_target_is_topspin(target)) {
780
781
782
783
		shost_printk(KERN_DEBUG, target->scsi_host,
			     PFX "Topspin/Cisco initiator port ID workaround "
			     "activated for target GUID %016llx\n",
			     (unsigned long long) be64_to_cpu(target->ioc_guid));
784
		memset(req->priv.initiator_port_id, 0, 8);
785
		memcpy(req->priv.initiator_port_id + 8,
786
		       &target->srp_host->srp_dev->dev->node_guid, 8);
787
788
	}

789
	status = ib_send_cm_req(ch->cm_id, &req->param);
790
791
792
793
794
795

	kfree(req);

	return status;
}

796
797
798
799
800
801
802
803
804
805
806
807
static bool srp_queue_remove_work(struct srp_target_port *target)
{
	bool changed = false;

	spin_lock_irq(&target->lock);
	if (target->state != SRP_TARGET_REMOVED) {
		target->state = SRP_TARGET_REMOVED;
		changed = true;
	}
	spin_unlock_irq(&target->lock);

	if (changed)
808
		queue_work(srp_remove_wq, &target->remove_work);
809
810
811
812

	return changed;
}

813
814
static void srp_disconnect_target(struct srp_target_port *target)
{
815
816
	struct srp_rdma_ch *ch;
	int i;
817

818
	/* XXX should send SRP_I_LOGOUT request */
819

820
821
822
823
824
825
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		ch->connected = false;
		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
			shost_printk(KERN_DEBUG, target->scsi_host,
				     PFX "Sending CM DREQ failed\n");
826
		}
827
	}
828
829
}

830
831
static void srp_free_req_data(struct srp_target_port *target,
			      struct srp_rdma_ch *ch)
832
{
833
834
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = dev->dev;
835
836
837
	struct srp_request *req;
	int i;

838
	if (!ch->target || !ch->req_ring)
839
840
841
		return;

	for (i = 0; i < target->req_ring_size; ++i) {
842
		req = &ch->req_ring[i];
843
844
845
846
		if (dev->use_fast_reg)
			kfree(req->fr_list);
		else
			kfree(req->fmr_list);
847
		kfree(req->map_page);
848
849
850
851
852
853
		if (req->indirect_dma_addr) {
			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
					    target->indirect_size,
					    DMA_TO_DEVICE);
		}
		kfree(req->indirect_desc);
854
	}
855

856
857
	kfree(ch->req_ring);
	ch->req_ring = NULL;
858
859
}

860
static int srp_alloc_req_data(struct srp_rdma_ch *ch)
861
{
862
	struct srp_target_port *target = ch->target;
863
864
865
	struct srp_device *srp_dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = srp_dev->dev;
	struct srp_request *req;
866
	void *mr_list;
867
868
869
	dma_addr_t dma_addr;
	int i, ret = -ENOMEM;

870
871
872
	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
			       GFP_KERNEL);
	if (!ch->req_ring)
873
874
875
		goto out;

	for (i = 0; i < target->req_ring_size; ++i) {
876
		req = &ch->req_ring[i];
877
878
879
880
881
882
883
884
		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
				  GFP_KERNEL);
		if (!mr_list)
			goto out;
		if (srp_dev->use_fast_reg)
			req->fr_list = mr_list;
		else
			req->fmr_list = mr_list;
885
		req->map_page = kmalloc(srp_dev->max_pages_per_mr *
886
					sizeof(void *), GFP_KERNEL);
887
888
		if (!req->map_page)
			goto out;
889
		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
890
		if (!req->indirect_desc)
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
			goto out;

		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
					     target->indirect_size,
					     DMA_TO_DEVICE);
		if (ib_dma_mapping_error(ibdev, dma_addr))
			goto out;

		req->indirect_dma_addr = dma_addr;
	}
	ret = 0;

out:
	return ret;
}

907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
/**
 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
 * @shost: SCSI host whose attributes to remove from sysfs.
 *
 * Note: Any attributes defined in the host template and that did not exist
 * before invocation of this function will be ignored.
 */
static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
{
	struct device_attribute **attr;

	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
		device_remove_file(&shost->shost_dev, *attr);
}

922
923
static void srp_remove_target(struct srp_target_port *target)
{
924
925
	struct srp_rdma_ch *ch;
	int i;
926

927
928
	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);

929
	srp_del_scsi_host_attr(target->scsi_host);
930
	srp_rport_get(target->rport);
931
932
	srp_remove_host(target->scsi_host);
	scsi_remove_host(target->scsi_host);
933
	srp_stop_rport_timers(target->rport);
934
	srp_disconnect_target(target);
935
936
937
938
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		srp_free_ch_ib(target, ch);
	}
939
	cancel_work_sync(&target->tl_err_work);
940
	srp_rport_put(target->rport);
941
942
943
944
945
946
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		srp_free_req_data(target, ch);
	}
	kfree(target->ch);
	target->ch = NULL;
947
948
949
950
951

	spin_lock(&target->srp_host->target_lock);
	list_del(&target->list);
	spin_unlock(&target->srp_host->target_lock);

952
953
954
	scsi_host_put(target->scsi_host);
}

David Howells's avatar
David Howells committed
955
static void srp_remove_work(struct work_struct *work)
956
{
David Howells's avatar
David Howells committed
957
	struct srp_target_port *target =
958
		container_of(work, struct srp_target_port, remove_work);
959

960
	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
961

962
	srp_remove_target(target);
963
964
}

965
966
967
968
969
970
971
static void srp_rport_delete(struct srp_rport *rport)
{
	struct srp_target_port *target = rport->lld_data;

	srp_queue_remove_work(target);
}

972
973
974
975
976
977
978
979
980
981
982
983
984
985
/**
 * srp_connected_ch() - number of connected channels
 * @target: SRP target port.
 */
static int srp_connected_ch(struct srp_target_port *target)
{
	int i, c = 0;

	for (i = 0; i < target->ch_count; i++)
		c += target->ch[i].connected;

	return c;
}

986
static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
987
{
988
	struct srp_target_port *target = ch->target;
989
990
	int ret;

991
	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
992

993
	ret = srp_lookup_path(ch);
994
995
996
997
	if (ret)
		return ret;

	while (1) {
998
		init_completion(&ch->done);
999
		ret = srp_send_req(ch, multich);
1000
1001
		if (ret)
			return ret;
1002
		ret = wait_for_completion_interruptible(&ch->done);
1003
1004
		if (ret < 0)
			return ret;
1005
1006
1007
1008
1009
1010
1011

		/*
		 * The CM event handling code will set status to
		 * SRP_PORT_REDIRECT if we get a port redirect REJ
		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
		 * redirect REJ back.
		 */
1012
		switch (ch->status) {
1013
		case 0:
1014
			ch->connected = true;
1015
1016
1017
			return 0;

		case SRP_PORT_REDIRECT:
1018
			ret = srp_lookup_path(ch);
1019
1020
1021
1022
1023
1024
1025
			if (ret)
				return ret;
			break;

		case SRP_DLID_REDIRECT:
			break;

David Dillow's avatar
David Dillow committed
1026
1027
		case SRP_STALE_CONN:
			shost_printk(KERN_ERR, target->scsi_host, PFX
1028
				     "giving up on stale connection\n");
1029
1030
			ch->status = -ECONNRESET;
			return ch->status;
David Dillow's avatar
David Dillow committed
1031

1032
		default:
1033
			return ch->status;
1034
1035
1036
1037
		}
	}
}

1038
static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
{
	struct ib_send_wr *bad_wr;
	struct ib_send_wr wr = {
		.opcode		    = IB_WR_LOCAL_INV,
		.wr_id		    = LOCAL_INV_WR_ID_MASK,
		.next		    = NULL,
		.num_sge	    = 0,
		.send_flags	    = 0,
		.ex.invalidate_rkey = rkey,
	};

1050
	return ib_post_send(ch->qp, &wr, &bad_wr);
1051
1052
}

1053
static void srp_unmap_data(struct scsi_cmnd *scmnd,
1054
			   struct srp_rdma_ch *ch,
1055
1056
			   struct srp_request *req)
{
1057
	struct srp_target_port *target = ch->target;
1058
1059
1060
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = dev->dev;
	int i, res;
1061

1062
	if (!scsi_sglist(scmnd) ||
1063
1064
1065
1066
	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
		return;

1067
1068
1069
1070
	if (dev->use_fast_reg) {
		struct srp_fr_desc **pfr;

		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1071
			res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1072
1073
1074
1075
1076
1077
1078
1079
1080
			if (res < 0) {
				shost_printk(KERN_ERR, target->scsi_host, PFX
				  "Queueing INV WR for rkey %#x failed (%d)\n",
				  (*pfr)->mr->rkey, res);
				queue_work(system_long_wq,
					   &target->tl_err_work);
			}
		}
		if (req->nmdesc)
1081
			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1082
1083
1084
1085
1086
1087
1088
					req->nmdesc);
	} else {
		struct ib_pool_fmr **pfmr;

		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
			ib_fmr_pool_unmap(*pfmr);
	}
1089

1090
1091
	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
			scmnd->sc_data_direction);
1092
1093
}

Bart Van Assche's avatar
Bart Van Assche committed
1094
1095
/**
 * srp_claim_req - Take ownership of the scmnd associated with a request.
1096
 * @ch: SRP RDMA channel.
Bart Van Assche's avatar
Bart Van Assche committed
1097
 * @req: SRP request.
1098
 * @sdev: If not NULL, only take ownership for this SCSI device.
Bart Van Assche's avatar
Bart Van Assche committed
1099
1100
1101
1102
1103
1104
 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
 *         ownership of @req->scmnd if it equals @scmnd.
 *
 * Return value:
 * Either NULL or a pointer to the SCSI command the caller became owner of.
 */
1105
static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
Bart Van Assche's avatar
Bart Van Assche committed
1106
				       struct srp_request *req,
1107
				       struct scsi_device *sdev,
Bart Van Assche's avatar
Bart Van Assche committed
1108
1109
1110
1111
				       struct scsi_cmnd *scmnd)
{
	unsigned long flags;

1112
	spin_lock_irqsave(&ch->lock, flags);
1113
1114
1115
	if (req->scmnd &&
	    (!sdev || req->scmnd->device == sdev) &&
	    (!scmnd || req->scmnd == scmnd)) {
Bart Van Assche's avatar
Bart Van Assche committed
1116
1117
1118
1119
1120
		scmnd = req->scmnd;
		req->scmnd = NULL;
	} else {
		scmnd = NULL;
	}
1121
	spin_unlock_irqrestore(&ch->lock, flags);
Bart Van Assche's avatar
Bart Van Assche committed
1122
1123
1124
1125
1126
1127

	return scmnd;
}

/**
 * srp_free_req() - Unmap data and add request to the free request list.
1128
 * @ch:     SRP RDMA channel.
1129
1130
1131
 * @req:    Request to be freed.
 * @scmnd:  SCSI command associated with @req.
 * @req_lim_delta: Amount to be added to @target->req_lim.
Bart Van Assche's avatar
Bart Van Assche committed
1132
 */
1133
1134
static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1135
{
1136
1137
	unsigned long flags;

1138
	srp_unmap_data(scmnd, ch, req);
Bart Van Assche's avatar
Bart Van Assche committed
1139

1140
1141
1142
	spin_lock_irqsave(&ch->lock, flags);
	ch->req_lim += req_lim_delta;
	spin_unlock_irqrestore(&ch->lock, flags);
1143
1144
}

1145
1146
static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
			   struct scsi_device *sdev, int result)
1147
{
1148
	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
Bart Van Assche's avatar
Bart Van Assche committed
1149
1150

	if (scmnd) {
1151
		srp_free_req(ch, req, scmnd, 0);
1152
		scmnd->result = result;
Bart Van Assche's avatar
Bart Van Assche committed
1153
1154
		scmnd->scsi_done(scmnd);
	}
1155
1156
}

1157
static void srp_terminate_io(struct srp_rport *rport)
1158
{
1159
	struct srp_target_port *target = rport->lld_data;
1160
	struct srp_rdma_ch *ch;
1161
1162
	struct Scsi_Host *shost = target->scsi_host;
	struct scsi_device *sdev;
1163
	int i, j;
1164

1165
1166
1167
1168
1169
1170
1171
	/*
	 * Invoking srp_terminate_io() while srp_queuecommand() is running
	 * is not safe. Hence the warning statement below.
	 */
	shost_for_each_device(sdev, shost)
		WARN_ON_ONCE(sdev->request_queue->request_fn_active);

1172
1173
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
1174

1175
1176
1177
1178
1179
1180
		for (j = 0; j < target->req_ring_size; ++j) {
			struct srp_request *req = &ch->req_ring[j];

			srp_finish_req(ch, req, NULL,
				       DID_TRANSPORT_FAILFAST << 16);
		}
1181
1182
	}
}
1183

1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
/*
 * It is up to the caller to ensure that srp_rport_reconnect() calls are
 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
 * srp_reset_device() or srp_reset_host() calls will occur while this function
 * is in progress. One way to realize that is not to call this function
 * directly but to call srp_reconnect_rport() instead since that last function
 * serializes calls of this function via rport->mutex and also blocks
 * srp_queuecommand() calls before invoking this function.
 */
static int srp_rport_reconnect(struct srp_rport *rport)
{
	struct srp_target_port *target = rport->lld_data;
1196
1197
1198
	struct srp_rdma_ch *ch;
	int i, j, ret = 0;
	bool multich = false;
1199

1200
	srp_disconnect_target(target);
1201
1202
1203
1204

	if (target->state == SRP_TARGET_SCANNING)
		return -ENODEV;

1205
	/*
1206
1207
1208
	 * Now get a new local CM ID so that we avoid confusing the target in
	 * case things are really fouled up. Doing so also ensures that all CM
	 * callbacks will have finished before a new QP is allocated.
1209
	 */
1210
1211
1212
1213
1214
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		if (!ch->target)
			break;
		ret += srp_new_cm_id(ch);
1215
	}
1216
1217
1218
1219
1220
1221
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		if (!ch->target)
			break;
		for (j = 0; j < target->req_ring_size; ++j) {
			struct srp_request *req = &ch->req_ring[j];