ib_srp.c 93.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

33
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34

35
36
37
38
39
40
41
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/parser.h>
#include <linux/random.h>
42
#include <linux/jiffies.h>
43
#include <rdma/ib_cache.h>
44

Arun Sharma's avatar
Arun Sharma committed
45
#include <linux/atomic.h>
46
47
48
49

#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_dbg.h>
50
#include <scsi/scsi_tcq.h>
51
#include <scsi/srp.h>
52
#include <scsi/scsi_transport_srp.h>
53
54
55
56
57

#include "ib_srp.h"

#define DRV_NAME	"ib_srp"
#define PFX		DRV_NAME ": "
58
59
#define DRV_VERSION	"1.0"
#define DRV_RELDATE	"July 1, 2013"
60
61

MODULE_AUTHOR("Roland Dreier");
62
MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63
MODULE_LICENSE("Dual BSD/GPL");
64
65
MODULE_VERSION(DRV_VERSION);
MODULE_INFO(release_date, DRV_RELDATE);
66

67
68
static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
69
70
static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
71
static bool prefer_fr;
72
static bool register_always;
73
static int topspin_workarounds = 1;
74

75
76
module_param(srp_sg_tablesize, uint, 0444);
MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
77

78
79
80
module_param(cmd_sg_entries, uint, 0444);
MODULE_PARM_DESC(cmd_sg_entries,
		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
81

82
83
84
85
86
87
88
89
module_param(indirect_sg_entries, uint, 0444);
MODULE_PARM_DESC(indirect_sg_entries,
		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");

module_param(allow_ext_sg, bool, 0444);
MODULE_PARM_DESC(allow_ext_sg,
		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");

90
91
92
93
module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");

94
95
96
97
module_param(prefer_fr, bool, 0444);
MODULE_PARM_DESC(prefer_fr,
"Whether to use fast registration if both FMR and fast registration are supported");

98
99
100
101
module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
		 "Use memory registration even for contiguous memory regions");

102
103
static struct kernel_param_ops srp_tmo_ops;

104
105
106
107
108
static int srp_reconnect_delay = 10;
module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");

109
110
111
112
113
114
115
116
static int srp_fast_io_fail_tmo = 15;
module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(fast_io_fail_tmo,
		 "Number of seconds between the observation of a transport"
		 " layer error and failing all I/O. \"off\" means that this"
		 " functionality is disabled.");

117
static int srp_dev_loss_tmo = 600;
118
119
120
121
122
123
124
125
126
127
module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
		S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(dev_loss_tmo,
		 "Maximum number of seconds that the SRP transport should"
		 " insulate transport layer errors. After this time has been"
		 " exceeded the SCSI host is removed. Should be"
		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
		 " if fast_io_fail_tmo has not been set. \"off\" means that"
		 " this functionality is disabled.");

128
129
130
131
132
static unsigned ch_count;
module_param(ch_count, uint, 0444);
MODULE_PARM_DESC(ch_count,
		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");

133
134
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
135
136
static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137
138
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);

139
static struct scsi_transport_template *ib_srp_transport_template;
140
static struct workqueue_struct *srp_remove_wq;
141

142
143
144
145
146
147
static struct ib_client srp_client = {
	.name   = "srp",
	.add    = srp_add_one,
	.remove = srp_remove_one
};

148
149
static struct ib_sa_client srp_sa_client;

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
{
	int tmo = *(int *)kp->arg;

	if (tmo >= 0)
		return sprintf(buffer, "%d", tmo);
	else
		return sprintf(buffer, "off");
}

static int srp_tmo_set(const char *val, const struct kernel_param *kp)
{
	int tmo, res;

	if (strncmp(val, "off", 3) != 0) {
		res = kstrtoint(val, 0, &tmo);
		if (res)
			goto out;
	} else {
		tmo = -1;
	}
171
172
173
174
175
	if (kp->arg == &srp_reconnect_delay)
		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
				    srp_dev_loss_tmo);
	else if (kp->arg == &srp_fast_io_fail_tmo)
		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
176
	else
177
178
		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
				    tmo);
179
180
181
182
183
184
185
186
187
188
189
190
191
	if (res)
		goto out;
	*(int *)kp->arg = tmo;

out:
	return res;
}

static struct kernel_param_ops srp_tmo_ops = {
	.get = srp_tmo_get,
	.set = srp_tmo_set,
};

192
193
194
195
196
197
198
199
200
201
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
{
	return (struct srp_target_port *) host->hostdata;
}

static const char *srp_target_info(struct Scsi_Host *host)
{
	return host_to_target(host)->target_name;
}

202
203
204
static int srp_target_is_topspin(struct srp_target_port *target)
{
	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
205
	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
206
207

	return topspin_workarounds &&
208
209
		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
210
211
}

212
213
214
215
216
217
218
219
220
221
222
223
224
225
static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
				   gfp_t gfp_mask,
				   enum dma_data_direction direction)
{
	struct srp_iu *iu;

	iu = kmalloc(sizeof *iu, gfp_mask);
	if (!iu)
		goto out;

	iu->buf = kzalloc(size, gfp_mask);
	if (!iu->buf)
		goto out_free_iu;

226
227
228
	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
				    direction);
	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
		goto out_free_buf;

	iu->size      = size;
	iu->direction = direction;

	return iu;

out_free_buf:
	kfree(iu->buf);
out_free_iu:
	kfree(iu);
out:
	return NULL;
}

static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
{
	if (!iu)
		return;

249
250
	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
			    iu->direction);
251
252
253
254
255
256
	kfree(iu->buf);
	kfree(iu);
}

static void srp_qp_event(struct ib_event *event, void *context)
{
257
	pr_debug("QP event %d\n", event->event);
258
259
260
261
262
263
264
265
266
267
268
269
}

static int srp_init_qp(struct srp_target_port *target,
		       struct ib_qp *qp)
{
	struct ib_qp_attr *attr;
	int ret;

	attr = kmalloc(sizeof *attr, GFP_KERNEL);
	if (!attr)
		return -ENOMEM;

270
271
272
273
	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
				  target->srp_host->port,
				  be16_to_cpu(target->pkey),
				  &attr->pkey_index);
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
	if (ret)
		goto out;

	attr->qp_state        = IB_QPS_INIT;
	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
				    IB_ACCESS_REMOTE_WRITE);
	attr->port_num        = target->srp_host->port;

	ret = ib_modify_qp(qp, attr,
			   IB_QP_STATE		|
			   IB_QP_PKEY_INDEX	|
			   IB_QP_ACCESS_FLAGS	|
			   IB_QP_PORT);

out:
	kfree(attr);
	return ret;
}

293
static int srp_new_cm_id(struct srp_rdma_ch *ch)
David Dillow's avatar
David Dillow committed
294
{
295
	struct srp_target_port *target = ch->target;
David Dillow's avatar
David Dillow committed
296
297
	struct ib_cm_id *new_cm_id;

298
	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
299
				    srp_cm_handler, ch);
David Dillow's avatar
David Dillow committed
300
301
302
	if (IS_ERR(new_cm_id))
		return PTR_ERR(new_cm_id);

303
304
305
306
307
308
309
	if (ch->cm_id)
		ib_destroy_cm_id(ch->cm_id);
	ch->cm_id = new_cm_id;
	ch->path.sgid = target->sgid;
	ch->path.dgid = target->orig_dgid;
	ch->path.pkey = target->pkey;
	ch->path.service_id = target->service_id;
David Dillow's avatar
David Dillow committed
310
311
312
313

	return 0;
}

314
315
316
317
318
319
320
321
322
static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
{
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_fmr_pool_param fmr_param;

	memset(&fmr_param, 0, sizeof(fmr_param));
	fmr_param.pool_size	    = target->scsi_host->can_queue;
	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
	fmr_param.cache		    = 1;
323
324
	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
325
326
327
328
329
330
331
	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
				       IB_ACCESS_REMOTE_WRITE |
				       IB_ACCESS_REMOTE_READ);

	return ib_create_fmr_pool(dev->pd, &fmr_param);
}

332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
/**
 * srp_destroy_fr_pool() - free the resources owned by a pool
 * @pool: Fast registration pool to be destroyed.
 */
static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
{
	int i;
	struct srp_fr_desc *d;

	if (!pool)
		return;

	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
		if (d->frpl)
			ib_free_fast_reg_page_list(d->frpl);
		if (d->mr)
			ib_dereg_mr(d->mr);
	}
	kfree(pool);
}

/**
 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
 * @device:            IB device to allocate fast registration descriptors for.
 * @pd:                Protection domain associated with the FR descriptors.
 * @pool_size:         Number of descriptors to allocate.
 * @max_page_list_len: Maximum fast registration work request page list length.
 */
static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
					      struct ib_pd *pd, int pool_size,
					      int max_page_list_len)
{
	struct srp_fr_pool *pool;
	struct srp_fr_desc *d;
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
	int i, ret = -EINVAL;

	if (pool_size <= 0)
		goto err;
	ret = -ENOMEM;
	pool = kzalloc(sizeof(struct srp_fr_pool) +
		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
	if (!pool)
		goto err;
	pool->size = pool_size;
	pool->max_page_list_len = max_page_list_len;
	spin_lock_init(&pool->lock);
	INIT_LIST_HEAD(&pool->free_list);

	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
		if (IS_ERR(mr)) {
			ret = PTR_ERR(mr);
			goto destroy_pool;
		}
		d->mr = mr;
		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
		if (IS_ERR(frpl)) {
			ret = PTR_ERR(frpl);
			goto destroy_pool;
		}
		d->frpl = frpl;
		list_add_tail(&d->entry, &pool->free_list);
	}

out:
	return pool;

destroy_pool:
	srp_destroy_fr_pool(pool);

err:
	pool = ERR_PTR(ret);
	goto out;
}

/**
 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
 * @pool: Pool to obtain descriptor from.
 */
static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
{
	struct srp_fr_desc *d = NULL;
	unsigned long flags;

	spin_lock_irqsave(&pool->lock, flags);
	if (!list_empty(&pool->free_list)) {
		d = list_first_entry(&pool->free_list, typeof(*d), entry);
		list_del(&d->entry);
	}
	spin_unlock_irqrestore(&pool->lock, flags);

	return d;
}

/**
 * srp_fr_pool_put() - put an FR descriptor back in the free list
 * @pool: Pool the descriptor was allocated from.
 * @desc: Pointer to an array of fast registration descriptor pointers.
 * @n:    Number of descriptors to put back.
 *
 * Note: The caller must already have queued an invalidation request for
 * desc->mr->rkey before calling this function.
 */
static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
			    int n)
{
	unsigned long flags;
	int i;

	spin_lock_irqsave(&pool->lock, flags);
	for (i = 0; i < n; i++)
		list_add(&desc[i]->entry, &pool->free_list);
	spin_unlock_irqrestore(&pool->lock, flags);
}

static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
	struct srp_device *dev = target->srp_host->srp_dev;

	return srp_create_fr_pool(dev->dev, dev->pd,
				  target->scsi_host->can_queue,
				  dev->max_pages_per_mr);
}

458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
/**
 * srp_destroy_qp() - destroy an RDMA queue pair
 * @ch: SRP RDMA channel.
 *
 * Change a queue pair into the error state and wait until all receive
 * completions have been processed before destroying it. This avoids that
 * the receive completion handler can access the queue pair while it is
 * being destroyed.
 */
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
	static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
	static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
	struct ib_recv_wr *bad_wr;
	int ret;

	/* Destroying a QP and reusing ch->done is only safe if not connected */
475
	WARN_ON_ONCE(ch->connected);
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491

	ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
	WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
	if (ret)
		goto out;

	init_completion(&ch->done);
	ret = ib_post_recv(ch->qp, &wr, &bad_wr);
	WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
	if (ret == 0)
		wait_for_completion(&ch->done);

out:
	ib_destroy_qp(ch->qp);
}

492
static int srp_create_ch_ib(struct srp_rdma_ch *ch)
493
{
494
	struct srp_target_port *target = ch->target;
495
	struct srp_device *dev = target->srp_host->srp_dev;
496
	struct ib_qp_init_attr *init_attr;
497
498
	struct ib_cq *recv_cq, *send_cq;
	struct ib_qp *qp;
499
	struct ib_fmr_pool *fmr_pool = NULL;
500
501
	struct srp_fr_pool *fr_pool = NULL;
	const int m = 1 + dev->use_fast_reg;
502
503
504
505
506
507
	int ret;

	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
	if (!init_attr)
		return -ENOMEM;

508
	/* + 1 for SRP_LAST_WR_ID */
509
	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
510
			       target->queue_size + 1, ch->comp_vector);
511
512
	if (IS_ERR(recv_cq)) {
		ret = PTR_ERR(recv_cq);
513
		goto err;
514
515
	}

516
517
	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
			       m * target->queue_size, ch->comp_vector);
518
519
	if (IS_ERR(send_cq)) {
		ret = PTR_ERR(send_cq);
520
		goto err_recv_cq;
521
522
	}

523
	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
524
525

	init_attr->event_handler       = srp_qp_event;
526
	init_attr->cap.max_send_wr     = m * target->queue_size;
527
	init_attr->cap.max_recv_wr     = target->queue_size + 1;
528
529
	init_attr->cap.max_recv_sge    = 1;
	init_attr->cap.max_send_sge    = 1;
530
	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
531
	init_attr->qp_type             = IB_QPT_RC;
532
533
	init_attr->send_cq             = send_cq;
	init_attr->recv_cq             = recv_cq;
534

535
	qp = ib_create_qp(dev->pd, init_attr);
536
537
	if (IS_ERR(qp)) {
		ret = PTR_ERR(qp);
538
		goto err_send_cq;
539
540
	}

541
	ret = srp_init_qp(target, qp);
542
543
	if (ret)
		goto err_qp;
544

545
546
547
548
549
550
551
552
	if (dev->use_fast_reg && dev->has_fr) {
		fr_pool = srp_alloc_fr_pool(target);
		if (IS_ERR(fr_pool)) {
			ret = PTR_ERR(fr_pool);
			shost_printk(KERN_WARNING, target->scsi_host, PFX
				     "FR pool allocation failed (%d)\n", ret);
			goto err_qp;
		}
553
554
555
		if (ch->fr_pool)
			srp_destroy_fr_pool(ch->fr_pool);
		ch->fr_pool = fr_pool;
556
	} else if (!dev->use_fast_reg && dev->has_fmr) {
557
558
559
560
561
562
563
		fmr_pool = srp_alloc_fmr_pool(target);
		if (IS_ERR(fmr_pool)) {
			ret = PTR_ERR(fmr_pool);
			shost_printk(KERN_WARNING, target->scsi_host, PFX
				     "FMR pool allocation failed (%d)\n", ret);
			goto err_qp;
		}
564
565
566
		if (ch->fmr_pool)
			ib_destroy_fmr_pool(ch->fmr_pool);
		ch->fmr_pool = fmr_pool;
567
568
	}

569
	if (ch->qp)
570
		srp_destroy_qp(ch);
571
572
573
574
	if (ch->recv_cq)
		ib_destroy_cq(ch->recv_cq);
	if (ch->send_cq)
		ib_destroy_cq(ch->send_cq);
575

576
577
578
	ch->qp = qp;
	ch->recv_cq = recv_cq;
	ch->send_cq = send_cq;
579

580
581
582
583
	kfree(init_attr);
	return 0;

err_qp:
584
	ib_destroy_qp(qp);
585
586

err_send_cq:
587
	ib_destroy_cq(send_cq);
588
589

err_recv_cq:
590
	ib_destroy_cq(recv_cq);
591
592

err:
593
594
595
596
	kfree(init_attr);
	return ret;
}

597
598
/*
 * Note: this function may be called without srp_alloc_iu_bufs() having been
599
 * invoked. Hence the ch->[rt]x_ring checks.
600
 */
601
602
static void srp_free_ch_ib(struct srp_target_port *target,
			   struct srp_rdma_ch *ch)
603
{
604
	struct srp_device *dev = target->srp_host->srp_dev;
605
606
	int i;

607
608
609
	if (!ch->target)
		return;

610
611
612
	if (ch->cm_id) {
		ib_destroy_cm_id(ch->cm_id);
		ch->cm_id = NULL;
613
614
	}

615
616
617
618
	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
	if (!ch->qp)
		return;

619
	if (dev->use_fast_reg) {
620
621
		if (ch->fr_pool)
			srp_destroy_fr_pool(ch->fr_pool);
622
	} else {
623
624
		if (ch->fmr_pool)
			ib_destroy_fmr_pool(ch->fmr_pool);
625
	}
626
	srp_destroy_qp(ch);
627
628
	ib_destroy_cq(ch->send_cq);
	ib_destroy_cq(ch->recv_cq);
629

630
631
632
633
634
635
636
637
	/*
	 * Avoid that the SCSI error handler tries to use this channel after
	 * it has been freed. The SCSI error handler can namely continue
	 * trying to perform recovery actions after scsi_remove_host()
	 * returned.
	 */
	ch->target = NULL;

638
639
	ch->qp = NULL;
	ch->send_cq = ch->recv_cq = NULL;
640

641
	if (ch->rx_ring) {
642
		for (i = 0; i < target->queue_size; ++i)
643
644
645
			srp_free_iu(target->srp_host, ch->rx_ring[i]);
		kfree(ch->rx_ring);
		ch->rx_ring = NULL;
646
	}
647
	if (ch->tx_ring) {
648
		for (i = 0; i < target->queue_size; ++i)
649
650
651
			srp_free_iu(target->srp_host, ch->tx_ring[i]);
		kfree(ch->tx_ring);
		ch->tx_ring = NULL;
652
	}
653
654
655
656
}

static void srp_path_rec_completion(int status,
				    struct ib_sa_path_rec *pathrec,
657
				    void *ch_ptr)
658
{
659
660
	struct srp_rdma_ch *ch = ch_ptr;
	struct srp_target_port *target = ch->target;
661

662
	ch->status = status;
663
	if (status)
664
665
		shost_printk(KERN_ERR, target->scsi_host,
			     PFX "Got failed path rec status %d\n", status);
666
	else
667
668
		ch->path = *pathrec;
	complete(&ch->done);
669
670
}

671
static int srp_lookup_path(struct srp_rdma_ch *ch)
672
{
673
	struct srp_target_port *target = ch->target;
674
675
	int ret;

676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
	ch->path.numb_path = 1;

	init_completion(&ch->done);

	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
					       target->srp_host->srp_dev->dev,
					       target->srp_host->port,
					       &ch->path,
					       IB_SA_PATH_REC_SERVICE_ID |
					       IB_SA_PATH_REC_DGID	 |
					       IB_SA_PATH_REC_SGID	 |
					       IB_SA_PATH_REC_NUMB_PATH	 |
					       IB_SA_PATH_REC_PKEY,
					       SRP_PATH_REC_TIMEOUT_MS,
					       GFP_KERNEL,
					       srp_path_rec_completion,
					       ch, &ch->path_query);
	if (ch->path_query_id < 0)
		return ch->path_query_id;

	ret = wait_for_completion_interruptible(&ch->done);
697
698
	if (ret < 0)
		return ret;
699

700
	if (ch->status < 0)
701
702
		shost_printk(KERN_WARNING, target->scsi_host,
			     PFX "Path record query failed\n");
703

704
	return ch->status;
705
706
}

707
static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
708
{
709
	struct srp_target_port *target = ch->target;
710
711
712
713
714
715
716
717
718
719
	struct {
		struct ib_cm_req_param param;
		struct srp_login_req   priv;
	} *req = NULL;
	int status;

	req = kzalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;

720
	req->param.primary_path		      = &ch->path;
721
722
	req->param.alternate_path 	      = NULL;
	req->param.service_id 		      = target->service_id;
723
724
	req->param.qp_num		      = ch->qp->qp_num;
	req->param.qp_type		      = ch->qp->qp_type;
725
726
727
728
729
730
731
732
733
734
735
736
737
738
	req->param.private_data 	      = &req->priv;
	req->param.private_data_len 	      = sizeof req->priv;
	req->param.flow_control 	      = 1;

	get_random_bytes(&req->param.starting_psn, 4);
	req->param.starting_psn 	     &= 0xffffff;

	/*
	 * Pick some arbitrary defaults here; we could make these
	 * module parameters if anyone cared about setting them.
	 */
	req->param.responder_resources	      = 4;
	req->param.remote_cm_response_timeout = 20;
	req->param.local_cm_response_timeout  = 20;
739
	req->param.retry_count                = target->tl_retry_count;
740
741
742
743
744
	req->param.rnr_retry_count 	      = 7;
	req->param.max_cm_retries 	      = 15;

	req->priv.opcode     	= SRP_LOGIN_REQ;
	req->priv.tag        	= 0;
745
	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
746
747
	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
					      SRP_BUF_FORMAT_INDIRECT);
748
749
	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
				   SRP_MULTICHAN_SINGLE);
750
	/*
Roland Dreier's avatar
Roland Dreier committed
751
	 * In the published SRP specification (draft rev. 16a), the
752
753
754
755
756
757
758
759
760
	 * port identifier format is 8 bytes of ID extension followed
	 * by 8 bytes of GUID.  Older drafts put the two halves in the
	 * opposite order, so that the GUID comes first.
	 *
	 * Targets conforming to these obsolete drafts can be
	 * recognized by the I/O Class they report.
	 */
	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
		memcpy(req->priv.initiator_port_id,
761
		       &target->sgid.global.interface_id, 8);
762
		memcpy(req->priv.initiator_port_id + 8,
763
		       &target->initiator_ext, 8);
764
765
766
767
		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
	} else {
		memcpy(req->priv.initiator_port_id,
768
769
		       &target->initiator_ext, 8);
		memcpy(req->priv.initiator_port_id + 8,
770
		       &target->sgid.global.interface_id, 8);
771
772
773
774
		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
	}

775
776
	/*
	 * Topspin/Cisco SRP targets will reject our login unless we
777
778
	 * zero out the first 8 bytes of our initiator port ID and set
	 * the second 8 bytes to the local node GUID.
779
	 */
780
	if (srp_target_is_topspin(target)) {
781
782
783
		shost_printk(KERN_DEBUG, target->scsi_host,
			     PFX "Topspin/Cisco initiator port ID workaround "
			     "activated for target GUID %016llx\n",
784
			     be64_to_cpu(target->ioc_guid));
785
		memset(req->priv.initiator_port_id, 0, 8);
786
		memcpy(req->priv.initiator_port_id + 8,
787
		       &target->srp_host->srp_dev->dev->node_guid, 8);
788
789
	}

790
	status = ib_send_cm_req(ch->cm_id, &req->param);
791
792
793
794
795
796

	kfree(req);

	return status;
}

797
798
799
800
801
802
803
804
805
806
807
808
static bool srp_queue_remove_work(struct srp_target_port *target)
{
	bool changed = false;

	spin_lock_irq(&target->lock);
	if (target->state != SRP_TARGET_REMOVED) {
		target->state = SRP_TARGET_REMOVED;
		changed = true;
	}
	spin_unlock_irq(&target->lock);

	if (changed)
809
		queue_work(srp_remove_wq, &target->remove_work);
810
811
812
813

	return changed;
}

814
815
static void srp_disconnect_target(struct srp_target_port *target)
{
816
817
	struct srp_rdma_ch *ch;
	int i;
818

819
	/* XXX should send SRP_I_LOGOUT request */
820

821
822
823
824
825
826
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		ch->connected = false;
		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
			shost_printk(KERN_DEBUG, target->scsi_host,
				     PFX "Sending CM DREQ failed\n");
827
		}
828
	}
829
830
}

831
832
static void srp_free_req_data(struct srp_target_port *target,
			      struct srp_rdma_ch *ch)
833
{
834
835
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = dev->dev;
836
837
838
	struct srp_request *req;
	int i;

839
	if (!ch->req_ring)
840
841
842
		return;

	for (i = 0; i < target->req_ring_size; ++i) {
843
		req = &ch->req_ring[i];
844
845
846
847
		if (dev->use_fast_reg)
			kfree(req->fr_list);
		else
			kfree(req->fmr_list);
848
		kfree(req->map_page);
849
850
851
852
853
854
		if (req->indirect_dma_addr) {
			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
					    target->indirect_size,
					    DMA_TO_DEVICE);
		}
		kfree(req->indirect_desc);
855
	}
856

857
858
	kfree(ch->req_ring);
	ch->req_ring = NULL;
859
860
}

861
static int srp_alloc_req_data(struct srp_rdma_ch *ch)
862
{
863
	struct srp_target_port *target = ch->target;
864
865
866
	struct srp_device *srp_dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = srp_dev->dev;
	struct srp_request *req;
867
	void *mr_list;
868
869
870
	dma_addr_t dma_addr;
	int i, ret = -ENOMEM;

871
872
873
	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
			       GFP_KERNEL);
	if (!ch->req_ring)
874
875
876
		goto out;

	for (i = 0; i < target->req_ring_size; ++i) {
877
		req = &ch->req_ring[i];
878
879
880
881
882
883
884
885
		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
				  GFP_KERNEL);
		if (!mr_list)
			goto out;
		if (srp_dev->use_fast_reg)
			req->fr_list = mr_list;
		else
			req->fmr_list = mr_list;
886
		req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887
					sizeof(void *), GFP_KERNEL);
888
889
		if (!req->map_page)
			goto out;
890
		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
891
		if (!req->indirect_desc)
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
			goto out;

		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
					     target->indirect_size,
					     DMA_TO_DEVICE);
		if (ib_dma_mapping_error(ibdev, dma_addr))
			goto out;

		req->indirect_dma_addr = dma_addr;
	}
	ret = 0;

out:
	return ret;
}

908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
/**
 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
 * @shost: SCSI host whose attributes to remove from sysfs.
 *
 * Note: Any attributes defined in the host template and that did not exist
 * before invocation of this function will be ignored.
 */
static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
{
	struct device_attribute **attr;

	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
		device_remove_file(&shost->shost_dev, *attr);
}

923
924
static void srp_remove_target(struct srp_target_port *target)
{
925
926
	struct srp_rdma_ch *ch;
	int i;
927

928
929
	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);

930
	srp_del_scsi_host_attr(target->scsi_host);
931
	srp_rport_get(target->rport);
932
933
	srp_remove_host(target->scsi_host);
	scsi_remove_host(target->scsi_host);
934
	srp_stop_rport_timers(target->rport);
935
	srp_disconnect_target(target);
936
937
938
939
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		srp_free_ch_ib(target, ch);
	}
940
	cancel_work_sync(&target->tl_err_work);
941
	srp_rport_put(target->rport);
942
943
944
945
946
947
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		srp_free_req_data(target, ch);
	}
	kfree(target->ch);
	target->ch = NULL;
948
949
950
951
952

	spin_lock(&target->srp_host->target_lock);
	list_del(&target->list);
	spin_unlock(&target->srp_host->target_lock);

953
954
955
	scsi_host_put(target->scsi_host);
}

David Howells's avatar
David Howells committed
956
static void srp_remove_work(struct work_struct *work)
957
{
David Howells's avatar
David Howells committed
958
	struct srp_target_port *target =
959
		container_of(work, struct srp_target_port, remove_work);
960

961
	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
962

963
	srp_remove_target(target);
964
965
}

966
967
968
969
970
971
972
static void srp_rport_delete(struct srp_rport *rport)
{
	struct srp_target_port *target = rport->lld_data;

	srp_queue_remove_work(target);
}

973
974
975
976
977
978
979
980
981
982
983
984
985
986
/**
 * srp_connected_ch() - number of connected channels
 * @target: SRP target port.
 */
static int srp_connected_ch(struct srp_target_port *target)
{
	int i, c = 0;

	for (i = 0; i < target->ch_count; i++)
		c += target->ch[i].connected;

	return c;
}

987
static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
988
{
989
	struct srp_target_port *target = ch->target;
990
991
	int ret;

992
	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
993

994
	ret = srp_lookup_path(ch);
995
996
997
998
	if (ret)
		return ret;

	while (1) {
999
		init_completion(&ch->done);
1000
		ret = srp_send_req(ch, multich);
1001
1002
		if (ret)
			return ret;
1003
		ret = wait_for_completion_interruptible(&ch->done);
1004
1005
		if (ret < 0)
			return ret;
1006
1007
1008
1009
1010
1011
1012

		/*
		 * The CM event handling code will set status to
		 * SRP_PORT_REDIRECT if we get a port redirect REJ
		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
		 * redirect REJ back.
		 */
1013
		switch (ch->status) {
1014
		case 0:
1015
			ch->connected = true;
1016
1017
1018
			return 0;

		case SRP_PORT_REDIRECT:
1019
			ret = srp_lookup_path(ch);
1020
1021
1022
1023
1024
1025
1026
			if (ret)
				return ret;
			break;

		case SRP_DLID_REDIRECT:
			break;

David Dillow's avatar
David Dillow committed
1027
1028
		case SRP_STALE_CONN:
			shost_printk(KERN_ERR, target->scsi_host, PFX
1029
				     "giving up on stale connection\n");
1030
1031
			ch->status = -ECONNRESET;
			return ch->status;
David Dillow's avatar
David Dillow committed
1032

1033
		default:
1034
			return ch->status;
1035
1036
1037
1038
		}
	}
}

1039
static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
{
	struct ib_send_wr *bad_wr;
	struct ib_send_wr wr = {
		.opcode		    = IB_WR_LOCAL_INV,
		.wr_id		    = LOCAL_INV_WR_ID_MASK,
		.next		    = NULL,
		.num_sge	    = 0,
		.send_flags	    = 0,
		.ex.invalidate_rkey = rkey,
	};

1051
	return ib_post_send(ch->qp, &wr, &bad_wr);
1052
1053
}

1054
static void srp_unmap_data(struct scsi_cmnd *scmnd,
1055
			   struct srp_rdma_ch *ch,
1056
1057
			   struct srp_request *req)
{
1058
	struct srp_target_port *target = ch->target;
1059
1060
1061
	struct srp_device *dev = target->srp_host->srp_dev;
	struct ib_device *ibdev = dev->dev;
	int i, res;
1062

1063
	if (!scsi_sglist(scmnd) ||
1064
1065
1066
1067
	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
		return;

1068
1069
1070
1071
	if (dev->use_fast_reg) {
		struct srp_fr_desc **pfr;

		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1072
			res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1073
1074
1075
1076
1077
1078
1079
1080
1081
			if (res < 0) {
				shost_printk(KERN_ERR, target->scsi_host, PFX
				  "Queueing INV WR for rkey %#x failed (%d)\n",
				  (*pfr)->mr->rkey, res);
				queue_work(system_long_wq,
					   &target->tl_err_work);
			}
		}
		if (req->nmdesc)
1082
			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1083
1084
1085
1086
1087
1088
1089
					req->nmdesc);
	} else {
		struct ib_pool_fmr **pfmr;

		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
			ib_fmr_pool_unmap(*pfmr);
	}
1090

1091
1092
	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
			scmnd->sc_data_direction);
1093
1094
}

Bart Van Assche's avatar
Bart Van Assche committed
1095
1096
/**
 * srp_claim_req - Take ownership of the scmnd associated with a request.
1097
 * @ch: SRP RDMA channel.
Bart Van Assche's avatar
Bart Van Assche committed
1098
 * @req: SRP request.
1099
 * @sdev: If not NULL, only take ownership for this SCSI device.
Bart Van Assche's avatar
Bart Van Assche committed
1100
1101
1102
1103
1104
1105
 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
 *         ownership of @req->scmnd if it equals @scmnd.
 *
 * Return value:
 * Either NULL or a pointer to the SCSI command the caller became owner of.
 */
1106
static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
Bart Van Assche's avatar
Bart Van Assche committed
1107
				       struct srp_request *req,
1108
				       struct scsi_device *sdev,
Bart Van Assche's avatar
Bart Van Assche committed
1109
1110
1111
1112
				       struct scsi_cmnd *scmnd)
{
	unsigned long flags;

1113
	spin_lock_irqsave(&ch->lock, flags);
1114
1115
1116
	if (req->scmnd &&
	    (!sdev || req->scmnd->device == sdev) &&
	    (!scmnd || req->scmnd == scmnd)) {
Bart Van Assche's avatar
Bart Van Assche committed
1117
1118
1119
1120
1121
		scmnd = req->scmnd;
		req->scmnd = NULL;
	} else {
		scmnd = NULL;
	}
1122
	spin_unlock_irqrestore(&ch->lock, flags);
Bart Van Assche's avatar
Bart Van Assche committed
1123
1124
1125
1126
1127
1128

	return scmnd;
}

/**
 * srp_free_req() - Unmap data and add request to the free request list.
1129
 * @ch:     SRP RDMA channel.
1130
1131
1132
 * @req:    Request to be freed.
 * @scmnd:  SCSI command associated with @req.
 * @req_lim_delta: Amount to be added to @target->req_lim.
Bart Van Assche's avatar
Bart Van Assche committed
1133
 */
1134
1135
static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1136
{
1137
1138
	unsigned long flags;

1139
	srp_unmap_data(scmnd, ch, req);
Bart Van Assche's avatar
Bart Van Assche committed
1140

1141
1142
1143
	spin_lock_irqsave(&ch->lock, flags);
	ch->req_lim += req_lim_delta;
	spin_unlock_irqrestore(&ch->lock, flags);
1144
1145
}

1146
1147
static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
			   struct scsi_device *sdev, int result)
1148
{
1149
	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
Bart Van Assche's avatar
Bart Van Assche committed
1150
1151

	if (scmnd) {
1152
		srp_free_req(ch, req, scmnd, 0);
1153
		scmnd->result = result;
Bart Van Assche's avatar
Bart Van Assche committed
1154
1155
		scmnd->scsi_done(scmnd);
	}
1156
1157
}

1158
static void srp_terminate_io(struct srp_rport *rport)
1159
{
1160
	struct srp_target_port *target = rport->lld_data;
1161
	struct srp_rdma_ch *ch;
1162
1163
	struct Scsi_Host *shost = target->scsi_host;
	struct scsi_device *sdev;
1164
	int i, j;
1165

1166
1167
1168
1169
1170
1171
1172
	/*
	 * Invoking srp_terminate_io() while srp_queuecommand() is running
	 * is not safe. Hence the warning statement below.
	 */
	shost_for_each_device(sdev, shost)
		WARN_ON_ONCE(sdev->request_queue->request_fn_active);

1173
1174
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
1175

1176
1177
1178
1179
1180
1181
		for (j = 0; j < target->req_ring_size; ++j) {
			struct srp_request *req = &ch->req_ring[j];

			srp_finish_req(ch, req, NULL,
				       DID_TRANSPORT_FAILFAST << 16);
		}
1182
1183
	}
}
1184

1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
/*
 * It is up to the caller to ensure that srp_rport_reconnect() calls are
 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
 * srp_reset_device() or srp_reset_host() calls will occur while this function
 * is in progress. One way to realize that is not to call this function
 * directly but to call srp_reconnect_rport() instead since that last function
 * serializes calls of this function via rport->mutex and also blocks
 * srp_queuecommand() calls before invoking this function.
 */
static int srp_rport_reconnect(struct srp_rport *rport)
{
	struct srp_target_port *target = rport->lld_data;
1197
1198
1199
	struct srp_rdma_ch *ch;
	int i, j, ret = 0;
	bool multich = false;
1200

1201
	srp_disconnect_target(target);
1202
1203
1204
1205

	if (target->state == SRP_TARGET_SCANNING)
		return -ENODEV;

1206
	/*
1207
1208
1209
	 * Now get a new local CM ID so that we avoid confusing the target in
	 * case things are really fouled up. Doing so also ensures that all CM
	 * callbacks will have finished before a new QP is allocated.
1210
	 */
1211
1212
1213
	for (i = 0; i < target->ch_count; i++) {
		ch = &target->ch[i];
		ret += srp_new_cm_id(ch);