verbs.c 53 KB
Newer Older
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1
/*
2
 * Copyright(c) 2015 - 2018 Intel Corporation.
Mike Marciniszyn's avatar
Mike Marciniszyn committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
 *
 * GPL LICENSE SUMMARY
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *  - Neither the name of Intel Corporation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/utsname.h>
#include <linux/rculist.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
56
#include <rdma/opa_addr.h>
Mike Marciniszyn's avatar
Mike Marciniszyn committed
57
58
59
60
61
62

#include "hfi.h"
#include "common.h"
#include "device.h"
#include "trace.h"
#include "qp.h"
63
#include "verbs_txreq.h"
64
#include "debugfs.h"
65
#include "vnic.h"
66
#include "fault.h"
67
#include "affinity.h"
Mike Marciniszyn's avatar
Mike Marciniszyn committed
68

69
static unsigned int hfi1_lkey_table_size = 16;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
		   S_IRUGO);
MODULE_PARM_DESC(lkey_table_size,
		 "LKEY table size in bits (2^n, 1 <= n <= 23)");

static unsigned int hfi1_max_pds = 0xFFFF;
module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO);
MODULE_PARM_DESC(max_pds,
		 "Maximum number of protection domains to support");

static unsigned int hfi1_max_ahs = 0xFFFF;
module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");

84
unsigned int hfi1_max_cqes = 0x2FFFFF;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
85
86
87
88
89
90
91
92
93
94
95
96
module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqes,
		 "Maximum number of completion queue entries to support");

unsigned int hfi1_max_cqs = 0x1FFFF;
module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");

unsigned int hfi1_max_qp_wrs = 0x3FFF;
module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");

97
unsigned int hfi1_max_qps = 32768;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");

unsigned int hfi1_max_sges = 0x60;
module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO);
MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");

unsigned int hfi1_max_mcast_grps = 16384;
module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO);
MODULE_PARM_DESC(max_mcast_grps,
		 "Maximum number of multicast groups to support");

unsigned int hfi1_max_mcast_qp_attached = 16;
module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached,
		   uint, S_IRUGO);
MODULE_PARM_DESC(max_mcast_qp_attached,
		 "Maximum number of attached QPs to support");

unsigned int hfi1_max_srqs = 1024;
module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO);
MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");

unsigned int hfi1_max_srq_sges = 128;
module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO);
MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");

unsigned int hfi1_max_srq_wrs = 0x1FFFF;
module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");

128
unsigned short piothreshold = 256;
129
130
131
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");

132
133
134
135
136
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
		 "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");

Mike Marciniszyn's avatar
Mike Marciniszyn committed
137
138
static void verbs_sdma_complete(
	struct sdma_txreq *cookie,
139
	int status);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
140

141
142
143
144
145
static int pio_wait(struct rvt_qp *qp,
		    struct send_context *sc,
		    struct hfi1_pkt_state *ps,
		    u32 flag);

146
147
148
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24

149
150
151
/* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING];

152
static uint wss_threshold = 80;
153
154
155
156
157
158
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*
 * Translate ib_wr_opcode into ib_wc_opcode.
 */
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
	[IB_WR_SEND] = IB_WC_SEND,
	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
	[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
	[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
	[IB_WR_REG_MR] = IB_WC_REG_MR
};

Mike Marciniszyn's avatar
Mike Marciniszyn committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
 * Length of header by opcode, 0 --> not supported
 */
const u8 hdr_len_by_opcode[256] = {
	/* RC */
	[IB_OPCODE_RC_SEND_FIRST]                     = 12 + 8,
	[IB_OPCODE_RC_SEND_MIDDLE]                    = 12 + 8,
	[IB_OPCODE_RC_SEND_LAST]                      = 12 + 8,
	[IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE]       = 12 + 8 + 4,
	[IB_OPCODE_RC_SEND_ONLY]                      = 12 + 8,
	[IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE]       = 12 + 8 + 4,
	[IB_OPCODE_RC_RDMA_WRITE_FIRST]               = 12 + 8 + 16,
	[IB_OPCODE_RC_RDMA_WRITE_MIDDLE]              = 12 + 8,
	[IB_OPCODE_RC_RDMA_WRITE_LAST]                = 12 + 8,
	[IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
	[IB_OPCODE_RC_RDMA_WRITE_ONLY]                = 12 + 8 + 16,
	[IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
	[IB_OPCODE_RC_RDMA_READ_REQUEST]              = 12 + 8 + 16,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST]       = 12 + 8 + 4,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE]      = 12 + 8,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST]        = 12 + 8 + 4,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY]        = 12 + 8 + 4,
	[IB_OPCODE_RC_ACKNOWLEDGE]                    = 12 + 8 + 4,
198
	[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4 + 8,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
199
200
	[IB_OPCODE_RC_COMPARE_SWAP]                   = 12 + 8 + 28,
	[IB_OPCODE_RC_FETCH_ADD]                      = 12 + 8 + 28,
201
202
	[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE]      = 12 + 8 + 4,
	[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE]      = 12 + 8 + 4,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
	/* UC */
	[IB_OPCODE_UC_SEND_FIRST]                     = 12 + 8,
	[IB_OPCODE_UC_SEND_MIDDLE]                    = 12 + 8,
	[IB_OPCODE_UC_SEND_LAST]                      = 12 + 8,
	[IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE]       = 12 + 8 + 4,
	[IB_OPCODE_UC_SEND_ONLY]                      = 12 + 8,
	[IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE]       = 12 + 8 + 4,
	[IB_OPCODE_UC_RDMA_WRITE_FIRST]               = 12 + 8 + 16,
	[IB_OPCODE_UC_RDMA_WRITE_MIDDLE]              = 12 + 8,
	[IB_OPCODE_UC_RDMA_WRITE_LAST]                = 12 + 8,
	[IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4,
	[IB_OPCODE_UC_RDMA_WRITE_ONLY]                = 12 + 8 + 16,
	[IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20,
	/* UD */
	[IB_OPCODE_UD_SEND_ONLY]                      = 12 + 8 + 8,
	[IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE]       = 12 + 8 + 12
};

static const opcode_handler opcode_handler_tbl[256] = {
	/* RC */
	[IB_OPCODE_RC_SEND_FIRST]                     = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_MIDDLE]                    = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_LAST]                      = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE]       = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_ONLY]                      = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE]       = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_FIRST]               = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_MIDDLE]              = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_LAST]                = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_ONLY]                = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_READ_REQUEST]              = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST]       = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE]      = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST]        = &hfi1_rc_rcv,
	[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY]        = &hfi1_rc_rcv,
	[IB_OPCODE_RC_ACKNOWLEDGE]                    = &hfi1_rc_rcv,
	[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = &hfi1_rc_rcv,
	[IB_OPCODE_RC_COMPARE_SWAP]                   = &hfi1_rc_rcv,
	[IB_OPCODE_RC_FETCH_ADD]                      = &hfi1_rc_rcv,
244
245
	[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE]      = &hfi1_rc_rcv,
	[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE]      = &hfi1_rc_rcv,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
	/* UC */
	[IB_OPCODE_UC_SEND_FIRST]                     = &hfi1_uc_rcv,
	[IB_OPCODE_UC_SEND_MIDDLE]                    = &hfi1_uc_rcv,
	[IB_OPCODE_UC_SEND_LAST]                      = &hfi1_uc_rcv,
	[IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE]       = &hfi1_uc_rcv,
	[IB_OPCODE_UC_SEND_ONLY]                      = &hfi1_uc_rcv,
	[IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE]       = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_FIRST]               = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_MIDDLE]              = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_LAST]                = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_ONLY]                = &hfi1_uc_rcv,
	[IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv,
	/* UD */
	[IB_OPCODE_UD_SEND_ONLY]                      = &hfi1_ud_rcv,
	[IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE]       = &hfi1_ud_rcv,
	/* CNP */
	[IB_OPCODE_CNP]				      = &hfi1_cnp_rcv
};

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#define OPMASK 0x1f

static const u32 pio_opmask[BIT(3)] = {
	/* RC */
	[IB_OPCODE_RC >> 5] =
		BIT(RC_OP(SEND_ONLY) & OPMASK) |
		BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
		BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
		BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
		BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
		BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
		BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
		BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
		BIT(RC_OP(FETCH_ADD) & OPMASK),
	/* UC */
	[IB_OPCODE_UC >> 5] =
		BIT(UC_OP(SEND_ONLY) & OPMASK) |
		BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
		BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
		BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
};

Mike Marciniszyn's avatar
Mike Marciniszyn committed
288
289
290
291
292
293
294
295
/*
 * System image GUID.
 */
__be64 ib_hfi1_sys_image_guid;

/*
 * Make sure the QP is ready and able to accept the given opcode.
 */
296
static inline opcode_handler qp_ok(struct hfi1_packet *packet)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
297
{
298
	if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
299
		return NULL;
300
301
302
303
	if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
	     packet->qp->allowed_ops) ||
	    (packet->opcode == IB_OPCODE_CNP))
		return opcode_handler_tbl[packet->opcode];
304
305

	return NULL;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
306
307
}

308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
	if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
		/*
		 * In order to drop non-IB traffic we
		 * set PbcInsertHrc to NONE (0x2).
		 * The packet will still be delivered
		 * to the receiving node but a
		 * KHdrHCRCErr (KDETH packet with a bad
		 * HCRC) will be triggered and the
		 * packet will not be delivered to the
		 * correct context.
		 */
		pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
	else
		/*
		 * In order to drop regular verbs
		 * traffic we set the PbcTestEbp
		 * flag. The packet will still be
		 * delivered to the receiving node but
		 * a 'late ebp error' will be
		 * triggered and will be dropped.
		 */
		pbc |= PBC_TEST_EBP;
#endif
	return pbc;
}

337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
{
	struct hfi1_ctxtdata *rcd = packet->rcd;
	struct hfi1_pportdata *ppd = rcd->ppd;
	struct hfi1_16b_header *hdr = packet->hdr;
	u16 pkey;

	/* Pkey check needed only for bypass packets */
	if (packet->etype != RHF_RCV_TYPE_BYPASS)
		return 0;

	/* Perform pkey check */
	pkey = hfi1_16B_get_pkey(hdr);
	return ingress_pkey_check(ppd, pkey, packet->sc,
				  packet->qp->s_pkey_index,
				  packet->slid, true);
}

355
356
static inline void hfi1_handle_packet(struct hfi1_packet *packet,
				      bool is_mcast)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
357
{
358
	u32 qp_num;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
359
360
	struct hfi1_ctxtdata *rcd = packet->rcd;
	struct hfi1_pportdata *ppd = rcd->ppd;
361
	struct hfi1_ibport *ibp = rcd_to_iport(rcd);
362
	struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
363
	opcode_handler packet_handler;
364
	unsigned long flags;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
365

366
	inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
367

368
	if (unlikely(is_mcast)) {
369
370
		struct rvt_mcast *mcast;
		struct rvt_mcast_qp *p;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
371

372
		if (!packet->grh)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
373
			goto drop;
374
375
		mcast = rvt_mcast_find(&ibp->rvp,
				       &packet->grh->dgid,
376
				       opa_get_lid(packet->dlid, 9B));
377
		if (!mcast)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
378
379
380
			goto drop;
		list_for_each_entry_rcu(p, &mcast->qp_list, list) {
			packet->qp = p->qp;
381
382
			if (hfi1_do_pkey_check(packet))
				goto drop;
383
			spin_lock_irqsave(&packet->qp->r_lock, flags);
384
			packet_handler = qp_ok(packet);
385
386
387
388
			if (likely(packet_handler))
				packet_handler(packet);
			else
				ibp->rvp.n_pkt_drops++;
389
			spin_unlock_irqrestore(&packet->qp->r_lock, flags);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
390
391
		}
		/*
392
		 * Notify rvt_multicast_detach() if it is waiting for us
Mike Marciniszyn's avatar
Mike Marciniszyn committed
393
394
395
396
397
		 * to finish.
		 */
		if (atomic_dec_return(&mcast->refcount) <= 1)
			wake_up(&mcast->wait);
	} else {
398
		/* Get the destination QP number. */
399
400
401
402
403
404
		if (packet->etype == RHF_RCV_TYPE_BYPASS &&
		    hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM)
			qp_num = hfi1_16B_get_dest_qpn(packet->mgmt);
		else
			qp_num = ib_bth_get_qpn(packet->ohdr);

Mike Marciniszyn's avatar
Mike Marciniszyn committed
405
		rcu_read_lock();
406
		packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
407
408
409
410
411
412
		if (!packet->qp)
			goto unlock_drop;

		if (hfi1_do_pkey_check(packet))
			goto unlock_drop;

413
		spin_lock_irqsave(&packet->qp->r_lock, flags);
414
		packet_handler = qp_ok(packet);
415
416
417
418
		if (likely(packet_handler))
			packet_handler(packet);
		else
			ibp->rvp.n_pkt_drops++;
419
		spin_unlock_irqrestore(&packet->qp->r_lock, flags);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
420
421
422
		rcu_read_unlock();
	}
	return;
423
424
unlock_drop:
	rcu_read_unlock();
Mike Marciniszyn's avatar
Mike Marciniszyn committed
425
drop:
426
	ibp->rvp.n_pkt_drops++;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
427
428
}

429
430
431
432
433
434
435
436
437
438
/**
 * hfi1_ib_rcv - process an incoming packet
 * @packet: data packet information
 *
 * This is called to process an incoming packet at interrupt level.
 */
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
	struct hfi1_ctxtdata *rcd = packet->rcd;

439
440
441
442
443
444
445
	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
	hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
}

void hfi1_16B_rcv(struct hfi1_packet *packet)
{
	struct hfi1_ctxtdata *rcd = packet->rcd;
446

447
448
	trace_input_ibhdr(rcd->dd, packet, false);
	hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
449
450
}

Mike Marciniszyn's avatar
Mike Marciniszyn committed
451
452
453
454
/*
 * This is called from a timer to check for QPs
 * which need kernel memory in order to send a packet.
 */
455
static void mem_timer(struct timer_list *t)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
456
{
457
	struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
458
	struct list_head *list = &dev->memwait;
459
	struct rvt_qp *qp = NULL;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
460
461
	struct iowait *wait;
	unsigned long flags;
462
	struct hfi1_qp_priv *priv;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
463
464
465
466

	write_seqlock_irqsave(&dev->iowait_lock, flags);
	if (!list_empty(list)) {
		wait = list_first_entry(list, struct iowait, list);
467
468
469
		qp = iowait_to_qp(wait);
		priv = qp->priv;
		list_del_init(&priv->s_iowait.list);
470
		priv->s_iowait.lock = NULL;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
471
472
473
474
475
476
477
		/* refcount held until actual wake up */
		if (!list_empty(list))
			mod_timer(&dev->mem_timer, jiffies + 1);
	}
	write_sequnlock_irqrestore(&dev->iowait_lock, flags);

	if (qp)
478
		hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
479
480
481
482
483
484
485
486
}

/*
 * This is called with progress side lock held.
 */
/* New API */
static void verbs_sdma_complete(
	struct sdma_txreq *cookie,
487
	int status)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
488
489
490
{
	struct verbs_txreq *tx =
		container_of(cookie, struct verbs_txreq, txreq);
491
	struct rvt_qp *qp = tx->qp;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
492
493

	spin_lock(&qp->s_lock);
494
	if (tx->wqe) {
495
		rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
496
	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
497
		struct hfi1_opa_header *hdr;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
498
499
500
501
502
503
504
505
506

		hdr = &tx->phdr.hdr;
		hfi1_rc_send_complete(qp, hdr);
	}
	spin_unlock(&qp->s_lock);

	hfi1_put_txreq(tx);
}

507
508
509
static int wait_kmem(struct hfi1_ibdev *dev,
		     struct rvt_qp *qp,
		     struct hfi1_pkt_state *ps)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
510
{
511
	struct hfi1_qp_priv *priv = qp->priv;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
512
513
514
515
	unsigned long flags;
	int ret = 0;

	spin_lock_irqsave(&qp->s_lock, flags);
516
	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
517
		write_seqlock(&dev->iowait_lock);
518
		list_add_tail(&ps->s_txreq->txreq.list,
519
			      &ps->wait->tx_head);
520
		if (list_empty(&priv->s_iowait.list)) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
521
522
			if (list_empty(&dev->memwait))
				mod_timer(&dev->mem_timer, jiffies + 1);
523
			qp->s_flags |= RVT_S_WAIT_KMEM;
524
			list_add_tail(&priv->s_iowait.list, &dev->memwait);
525
			priv->s_iowait.lock = &dev->iowait_lock;
526
			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
527
			rvt_get_qp(qp);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
528
529
		}
		write_sequnlock(&dev->iowait_lock);
530
		hfi1_qp_unbusy(qp, ps->wait);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
531
532
533
534
535
536
537
538
539
540
541
542
		ret = -EBUSY;
	}
	spin_unlock_irqrestore(&qp->s_lock, flags);

	return ret;
}

/*
 * This routine calls txadds for each sg entry.
 *
 * Add failures will revert the sge cursor
 */
543
static noinline int build_verbs_ulp_payload(
Mike Marciniszyn's avatar
Mike Marciniszyn committed
544
545
546
547
	struct sdma_engine *sde,
	u32 length,
	struct verbs_txreq *tx)
{
548
	struct rvt_sge_state *ss = tx->ss;
549
550
	struct rvt_sge *sg_list = ss->sg_list;
	struct rvt_sge sge = ss->sge;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
	u8 num_sge = ss->num_sge;
	u32 len;
	int ret = 0;

	while (length) {
		len = ss->sge.length;
		if (len > length)
			len = length;
		if (len > ss->sge.sge_length)
			len = ss->sge.sge_length;
		WARN_ON_ONCE(len == 0);
		ret = sdma_txadd_kvaddr(
			sde->dd,
			&tx->txreq,
			ss->sge.vaddr,
			len);
		if (ret)
			goto bail_txadd;
569
		rvt_update_sge(ss, len, false);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
570
571
572
573
574
575
576
577
578
579
580
		length -= len;
	}
	return ret;
bail_txadd:
	/* unwind cursor */
	ss->sge = sge;
	ss->num_sge = num_sge;
	ss->sg_list = sg_list;
	return ret;
}

581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/**
 * update_tx_opstats - record stats by opcode
 * @qp; the qp
 * @ps: transmit packet state
 * @plen: the plen in dwords
 *
 * This is a routine to record the tx opstats after a
 * packet has been presented to the egress mechanism.
 */
static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
			      u32 plen)
{
#ifdef CONFIG_DEBUG_FS
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
	struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats);

	inc_opstats(plen * 4, &s->stats[ps->opcode]);
	put_cpu_ptr(s);
#endif
}

Mike Marciniszyn's avatar
Mike Marciniszyn committed
602
603
604
605
606
607
/*
 * Build the number of DMA descriptors needed to send length bytes of data.
 *
 * NOTE: DMA mapping is held in the tx until completed in the ring or
 *       the tx desc is freed without having been submitted to the ring
 *
608
 * This routine ensures all the helper routine calls succeed.
Mike Marciniszyn's avatar
Mike Marciniszyn committed
609
610
611
612
613
614
 */
/* New API */
static int build_verbs_tx_desc(
	struct sdma_engine *sde,
	u32 length,
	struct verbs_txreq *tx,
615
	struct hfi1_ahg_info *ahg_info,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
616
617
618
	u64 pbc)
{
	int ret = 0;
619
	struct hfi1_sdma_header *phdr = &tx->phdr;
620
	u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2;
621
	u8 extra_bytes = 0;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
622

623
624
625
626
627
628
629
630
	if (tx->phdr.hdr.hdr_type) {
		/*
		 * hdrbytes accounts for PBC. Need to subtract 8 bytes
		 * before calculating padding.
		 */
		extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) +
			      (SIZE_OF_CRC << 2) + SIZE_OF_LT;
	}
631
	if (!ahg_info->ahgcount) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
632
633
		ret = sdma_txinit_ahg(
			&tx->txreq,
634
			ahg_info->tx_flags,
635
636
			hdrbytes + length +
			extra_bytes,
637
			ahg_info->ahgidx,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
638
639
640
641
642
643
644
645
646
647
			0,
			NULL,
			0,
			verbs_sdma_complete);
		if (ret)
			goto bail_txadd;
		phdr->pbc = cpu_to_le64(pbc);
		ret = sdma_txadd_kvaddr(
			sde->dd,
			&tx->txreq,
648
649
			phdr,
			hdrbytes);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
650
651
652
653
654
		if (ret)
			goto bail_txadd;
	} else {
		ret = sdma_txinit_ahg(
			&tx->txreq,
655
			ahg_info->tx_flags,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
656
			length,
657
658
659
			ahg_info->ahgidx,
			ahg_info->ahgcount,
			ahg_info->ahgdesc,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
660
661
662
663
664
			hdrbytes,
			verbs_sdma_complete);
		if (ret)
			goto bail_txadd;
	}
665
	/* add the ulp payload - if any. tx->ss can be NULL for acks */
666
	if (tx->ss) {
667
		ret = build_verbs_ulp_payload(sde, length, tx);
668
669
670
671
672
		if (ret)
			goto bail_txadd;
	}

	/* add icrc, lt byte, and padding to flit */
673
	if (extra_bytes)
674
		ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
675
					(void *)trail_buf, extra_bytes);
676

Mike Marciniszyn's avatar
Mike Marciniszyn committed
677
678
679
680
bail_txadd:
	return ret;
}

681
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
682
			u64 pbc)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
683
{
684
	struct hfi1_qp_priv *priv = qp->priv;
685
	struct hfi1_ahg_info *ahg_info = priv->s_ahg;
686
	u32 hdrwords = ps->s_txreq->hdr_dwords;
687
	u32 len = ps->s_txreq->s_cur_size;
688
	u32 plen;
689
690
	struct hfi1_ibdev *dev = ps->dev;
	struct hfi1_pportdata *ppd = ps->ppd;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
691
	struct verbs_txreq *tx;
692
	u8 sc5 = priv->s_sc;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
693
	int ret;
694
695
696
697
698
699
700
701
702
703
	u32 dwords;

	if (ps->s_txreq->phdr.hdr.hdr_type) {
		u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len);

		dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) +
			  SIZE_OF_LT) >> 2;
	} else {
		dwords = (len + 3) >> 2;
	}
704
	plen = hdrwords + dwords + sizeof(pbc) / 4;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
705

706
	tx = ps->s_txreq;
707
708
709
	if (!sdma_txreq_built(&tx->txreq)) {
		if (likely(pbc == 0)) {
			u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
710

711
			/* No vl15 here */
712
713
714
715
716
717
			/* set PBC_DC_INFO bit (aka SC[4]) in pbc */
			if (ps->s_txreq->phdr.hdr.hdr_type)
				pbc |= PBC_PACKET_BYPASS |
				       PBC_INSERT_BYPASS_ICRC;
			else
				pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
718

719
			if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
720
				pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
721
			pbc = create_pbc(ppd,
722
					 pbc,
723
724
725
726
727
					 qp->srate_mbps,
					 vl,
					 plen);
		}
		tx->wqe = qp->s_wqe;
728
		ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
729
730
		if (unlikely(ret))
			goto bail_build;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
731
	}
732
	ret =  sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
733
734
735
736
737
	if (unlikely(ret < 0)) {
		if (ret == -ECOMM)
			goto bail_ecomm;
		return ret;
	}
738
739

	update_tx_opstats(qp, ps, plen);
740
	trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
741
				&ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
Mike Marciniszyn's avatar
Mike Marciniszyn committed
742
743
744
745
746
747
	return ret;

bail_ecomm:
	/* The current one got "sent" */
	return 0;
bail_build:
748
749
750
751
752
753
754
	ret = wait_kmem(dev, qp, ps);
	if (!ret) {
		/* free txreq - bad state */
		hfi1_put_txreq(ps->s_txreq);
		ps->s_txreq = NULL;
	}
	return ret;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
755
756
757
758
759
760
}

/*
 * If we are now in the error state, return zero to flush the
 * send work request.
 */
761
762
763
764
static int pio_wait(struct rvt_qp *qp,
		    struct send_context *sc,
		    struct hfi1_pkt_state *ps,
		    u32 flag)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
765
{
766
	struct hfi1_qp_priv *priv = qp->priv;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
767
768
769
770
771
772
773
774
775
776
777
778
	struct hfi1_devdata *dd = sc->dd;
	struct hfi1_ibdev *dev = &dd->verbs_dev;
	unsigned long flags;
	int ret = 0;

	/*
	 * Note that as soon as want_buffer() is called and
	 * possibly before it returns, sc_piobufavail()
	 * could be called. Therefore, put QP on the I/O wait list before
	 * enabling the PIO avail interrupt.
	 */
	spin_lock_irqsave(&qp->s_lock, flags);
779
	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
780
		write_seqlock(&dev->iowait_lock);
781
		list_add_tail(&ps->s_txreq->txreq.list,
782
			      &ps->wait->tx_head);
783
		if (list_empty(&priv->s_iowait.list)) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
784
785
786
			struct hfi1_ibdev *dev = &dd->verbs_dev;
			int was_empty;

787
			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
788
			dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
789
			qp->s_flags |= flag;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
790
			was_empty = list_empty(&sc->piowait);
791
792
			iowait_queue(ps->pkts_sent, &priv->s_iowait,
				     &sc->piowait);
793
			priv->s_iowait.lock = &dev->iowait_lock;
794
			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
795
			rvt_get_qp(qp);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
796
797
798
799
800
			/* counting: only call wantpiobuf_intr if first user */
			if (was_empty)
				hfi1_sc_wantpiobuf_intr(sc, 1);
		}
		write_sequnlock(&dev->iowait_lock);
801
		hfi1_qp_unbusy(qp, ps->wait);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
802
803
804
805
806
807
		ret = -EBUSY;
	}
	spin_unlock_irqrestore(&qp->s_lock, flags);
	return ret;
}

808
809
810
811
812
813
814
815
816
static void verbs_pio_complete(void *arg, int code)
{
	struct rvt_qp *qp = (struct rvt_qp *)arg;
	struct hfi1_qp_priv *priv = qp->priv;

	if (iowait_pio_dec(&priv->s_iowait))
		iowait_drain_wakeup(&priv->s_iowait);
}

817
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
818
			u64 pbc)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
819
{
820
	struct hfi1_qp_priv *priv = qp->priv;
821
	u32 hdrwords = ps->s_txreq->hdr_dwords;
822
	struct rvt_sge_state *ss = ps->s_txreq->ss;
823
	u32 len = ps->s_txreq->s_cur_size;
824
825
	u32 dwords;
	u32 plen;
826
	struct hfi1_pportdata *ppd = ps->ppd;
827
	u32 *hdr;
828
	u8 sc5;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
829
830
831
832
	unsigned long flags = 0;
	struct send_context *sc;
	struct pio_buf *pbuf;
	int wc_status = IB_WC_SUCCESS;
833
	int ret = 0;
834
	pio_release_cb cb = NULL;
835
836
837
838
839
840
841
842
843
844
845
846
	u8 extra_bytes = 0;

	if (ps->s_txreq->phdr.hdr.hdr_type) {
		u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len);

		extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT;
		dwords = (len + extra_bytes) >> 2;
		hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah;
	} else {
		dwords = (len + 3) >> 2;
		hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh;
	}
847
	plen = hdrwords + dwords + sizeof(pbc) / 4;
848
849
850
851
852
853
854
855
856
857

	/* only RC/UC use complete */
	switch (qp->ibqp.qp_type) {
	case IB_QPT_RC:
	case IB_QPT_UC:
		cb = verbs_pio_complete;
		break;
	default:
		break;
	}
Mike Marciniszyn's avatar
Mike Marciniszyn committed
858
859

	/* vl15 special case taken care of in ud.c */
860
	sc5 = priv->s_sc;
861
	sc = ps->s_txreq->psc;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
862
863

	if (likely(pbc == 0)) {
864
		u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
865

866
867
868
869
870
		/* set PBC_DC_INFO bit (aka SC[4]) in pbc */
		if (ps->s_txreq->phdr.hdr.hdr_type)
			pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
		else
			pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
871
872

		if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
873
			pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
874
		pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
875
	}
876
877
878
	if (cb)
		iowait_pio_inc(&priv->s_iowait);
	pbuf = sc_buffer_alloc(sc, plen, cb, qp);
879
	if (unlikely(!pbuf)) {
880
881
		if (cb)
			verbs_pio_complete(qp, 0);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
		if (ppd->host_link_state != HLS_UP_ACTIVE) {
			/*
			 * If we have filled the PIO buffers to capacity and are
			 * not in an active state this request is not going to
			 * go out to so just complete it with an error or else a
			 * ULP or the core may be stuck waiting.
			 */
			hfi1_cdbg(
				PIO,
				"alloc failed. state not active, completing");
			wc_status = IB_WC_GENERAL_ERR;
			goto pio_bail;
		} else {
			/*
			 * This is a normal occurrence. The PIO buffs are full
			 * up but we are still happily sending, well we could be
			 * so lets continue to queue the request.
			 */
			hfi1_cdbg(PIO, "alloc failed. state active, queuing");
901
			ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
902
			if (!ret)
903
				/* txreq not queued - free */
904
905
906
				goto bail;
			/* tx consumed in wait */
			return ret;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
907
908
909
		}
	}

910
	if (dwords == 0) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
911
912
		pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
	} else {
913
914
		seg_pio_copy_start(pbuf, pbc,
				   hdr, hdrwords * 4);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
915
916
917
918
919
920
921
		if (ss) {
			while (len) {
				void *addr = ss->sge.vaddr;
				u32 slen = ss->sge.length;

				if (slen > len)
					slen = len;
922
				rvt_update_sge(ss, slen, false);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
923
924
925
926
				seg_pio_copy_mid(pbuf, addr, slen);
				len -= slen;
			}
		}
927
928
929
		/* add icrc, lt byte, and padding to flit */
		if (extra_bytes)
			seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
930
931

		seg_pio_copy_end(pbuf);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
932
933
	}

934
	update_tx_opstats(qp, ps, plen);
935
	trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
936
			       &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
Mike Marciniszyn's avatar
Mike Marciniszyn committed
937
938
939
940

pio_bail:
	if (qp->s_wqe) {
		spin_lock_irqsave(&qp->s_lock, flags);
941
		rvt_send_complete(qp, qp->s_wqe, wc_status);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
942
943
944
		spin_unlock_irqrestore(&qp->s_lock, flags);
	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
		spin_lock_irqsave(&qp->s_lock, flags);
945
		hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
946
947
		spin_unlock_irqrestore(&qp->s_lock, flags);
	}
948
949
950
951
952
953

	ret = 0;

bail:
	hfi1_put_txreq(ps->s_txreq);
	return ret;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
954
}
955

Mike Marciniszyn's avatar
Mike Marciniszyn committed
956
957
/*
 * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent
958
 * being an entry from the partition key table), return 0
Mike Marciniszyn's avatar
Mike Marciniszyn committed
959
960
961
962
963
964
 * otherwise. Use the matching criteria for egress partition keys
 * specified in the OPAv1 spec., section 9.1l.7.
 */
static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
{
	u16 mkey = pkey & PKEY_LOW_15_MASK;
965
	u16 mentry = ent & PKEY_LOW_15_MASK;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
966

967
	if (mkey == mentry) {
Mike Marciniszyn's avatar
Mike Marciniszyn committed
968
969
970
971
972
973
974
975
976
977
978
979
		/*
		 * If pkey[15] is set (full partition member),
		 * is bit 15 in the corresponding table element
		 * clear (limited member)?
		 */
		if (pkey & PKEY_MEMBER_MASK)
			return !!(ent & PKEY_MEMBER_MASK);
		return 1;
	}
	return 0;
}

980
981
/**
 * egress_pkey_check - check P_KEY of a packet
982
983
984
985
 * @ppd:  Physical IB port data
 * @slid: SLID for packet
 * @bkey: PKEY for header
 * @sc5:  SC for packet
986
987
988
989
990
991
992
 * @s_pkey_index: It will be used for look up optimization for kernel contexts
 * only. If it is negative value, then it means user contexts is calling this
 * function.
 *
 * It checks if hdr's pkey is valid.
 *
 * Return: 0 on success, otherwise, 1
Mike Marciniszyn's avatar
Mike Marciniszyn committed
993
 */
994
int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
995
		      u8 sc5, int8_t s_pkey_index)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
996
997
{
	struct hfi1_devdata *dd;
998
999
	int i;
	int is_user_ctxt_mechanism = (s_pkey_index < 0);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011

	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
		return 0;

	/* If SC15, pkey[0:14] must be 0x7fff */
	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
		goto bad;

	/* Is the pkey = 0x0, or 0x8000? */
	if ((pkey & PKEY_LOW_15_MASK) == 0)
		goto bad;

1012
1013
1014
1015
1016
1017
1018
	/*
	 * For the kernel contexts only, if a qp is passed into the function,
	 * the most likely matching pkey has index qp->s_pkey_index
	 */
	if (!is_user_ctxt_mechanism &&
	    egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) {
		return 0;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1019
1020
	}

1021
1022
1023
1024
	for (i = 0; i < MAX_PKEY_VALUES; i++) {
		if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
			return 0;
	}
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1025
bad:
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
	/*
	 * For the user-context mechanism, the P_KEY check would only happen
	 * once per SDMA request, not once per packet.  Therefore, there's no
	 * need to increment the counter for the user-context mechanism.
	 */
	if (!is_user_ctxt_mechanism) {
		incr_cntr64(&ppd->port_xmit_constraint_errors);
		dd = ppd->dd;
		if (!(dd->err_info_xmit_constraint.status &
		      OPA_EI_STATUS_SMASK)) {
			dd->err_info_xmit_constraint.status |=
				OPA_EI_STATUS_SMASK;
			dd->err_info_xmit_constraint.slid = slid;
			dd->err_info_xmit_constraint.pkey = pkey;
		}
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1041
1042
1043
1044
	}
	return 1;
}

1045
1046
1047
1048
1049
1050
1051
/**
 * get_send_routine - choose an egress routine
 *
 * Choose an egress routine based on QP type
 * and size
 */
static inline send_routine get_send_routine(struct rvt_qp *qp,
1052
					    struct hfi1_pkt_state *ps)
1053
1054
1055
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
	struct hfi1_qp_priv *priv = qp->priv;
1056
	struct verbs_txreq *tx = ps->s_txreq;
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066

	if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
		return dd->process_pio_send;
	switch (qp->ibqp.qp_type) {
	case IB_QPT_SMI:
		return dd->process_pio_send;
	case IB_QPT_GSI:
	case IB_QPT_UD:
		break;
	case IB_QPT_UC:
1067
	case IB_QPT_RC: {
1068
		if (piothreshold &&
1069
		    tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
1070
		    (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) &&
1071
1072
		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
		    !sdma_txreq_built(&tx->txreq))
1073
1074
			return dd->process_pio_send;
		break;
1075
	}
1076
1077
1078
1079
1080
1081
	default:
		break;
	}
	return dd->process_dma_send;
}

Mike Marciniszyn's avatar
Mike Marciniszyn committed
1082
1083
1084
/**
 * hfi1_verbs_send - send a packet
 * @qp: the QP to send on
1085
 * @ps: the state of the packet to send
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1086
1087
 *
 * Return zero if packet is sent or queued OK.
1088
 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1089
 */
1090
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1091
1092
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1093
	struct hfi1_qp_priv *priv = qp->priv;
1094
	struct ib_other_headers *ohdr = NULL;
1095
	send_routine sr;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1096
	int ret;
1097
1098
	u16 pkey;
	u32 slid;
1099
	u8 l4 = 0;
1100
1101

	/* locate the pkey within the headers */
1102
1103
1104
	if (ps->s_txreq->phdr.hdr.hdr_type) {
		struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;

1105
1106
		l4 = hfi1_16B_get_l4(hdr);
		if (l4 == OPA_16B_L4_IB_LOCAL)
1107
			ohdr = &hdr->u.oth;
1108
1109
1110
		else if (l4 == OPA_16B_L4_IB_GLOBAL)
			ohdr = &hdr->u.l.oth;

1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
		slid = hfi1_16B_get_slid(hdr);
		pkey = hfi1_16B_get_pkey(hdr);
	} else {
		struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh;
		u8 lnh = ib_get_lnh(hdr);

		if (lnh == HFI1_LRH_GRH)
			ohdr = &hdr->u.l.oth;
		else
			ohdr = &hdr->u.oth;
		slid = ib_get_slid(hdr);
		pkey = ib_bth_get_pkey(ohdr);
	}

1125
1126
1127
1128
1129
	if (likely(l4 != OPA_16B_L4_FM))
		ps->opcode = ib_bth_get_opcode(ohdr);
	else
		ps->opcode = IB_OPCODE_UD_SEND_ONLY;

1130
1131
1132
	sr = get_send_routine(qp, ps);
	ret = egress_pkey_check(dd->pport, slid, pkey,
				priv->s_sc, qp->s_pkey_index);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1133
1134
1135
1136
1137
1138
1139
1140
1141
	if (unlikely(ret)) {
		/*
		 * The value we are returning here does not get propagated to
		 * the verbs caller. Thus we need to complete the request with
		 * error otherwise the caller could be sitting waiting on the
		 * completion event. Only do this for PIO. SDMA has its own
		 * mechanism for handling the errors. So for SDMA we can just
		 * return.
		 */
1142
1143
1144
		if (sr == dd->process_pio_send) {
			unsigned long flags;

Mike Marciniszyn's avatar
Mike Marciniszyn committed
1145
1146
1147
			hfi1_cdbg(PIO, "%s() Failed. Completing with err",
				  __func__);
			spin_lock_irqsave(&qp->s_lock, flags);
1148
			rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1149
1150
1151
1152
			spin_unlock_irqrestore(&qp->s_lock, flags);
		}
		return -EINVAL;
	}
1153
1154
1155
1156
	if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
		return pio_wait(qp,
				ps->s_txreq->psc,
				ps,
1157
				HFI1_S_WAIT_PIO_DRAIN);
1158
	return sr(qp, ps, 0);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1159
1160
}

1161
1162
1163
1164
1165
/**
 * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
 * @dd: the device data structure
 */
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1166
{
1167
	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1168
	u32 ver = dd->dc8051_ver;
1169
1170
1171

	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));

1172
1173
1174
1175
	rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
		((u64)(dc8051_ver_min(ver)) << 16) |
		(u64)dc8051_ver_patch(ver);

1176
1177
1178
	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
			IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1179
			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
1180
1181
			IB_DEVICE_MEM_MGT_EXTENSIONS |
			IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
1182
1183
1184
1185
1186
	rdi->dparms.props.page_size_cap = PAGE_SIZE;
	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
	rdi->dparms.props.hw_ver = dd->minrev;
	rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
1187
1188
	rdi->dparms.props.max_mr_size = U64_MAX;
	rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
1189
1190
	rdi->dparms.props.max_qp = hfi1_max_qps;
	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
1191
1192
	rdi->dparms.props.max_send_sge = hfi1_max_sges;
	rdi->dparms.props.max_recv_sge = hfi1_max_sges;
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
	rdi->dparms.props.max_cq = hfi1_max_cqs;
	rdi->dparms.props.max_ah = hfi1_max_ahs;
	rdi->dparms.props.max_cqe = hfi1_max_cqes;
	rdi->dparms.props.max_mr = rdi->lkey_table.max;
	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
	rdi->dparms.props.max_map_per_fmr = 32767;
	rdi->dparms.props.max_pd = hfi1_max_pds;
	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
	rdi->dparms.props.max_qp_init_rd_atom = 255;
	rdi->dparms.props.max_srq = hfi1_max_srqs;
	rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
	rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
	rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
	rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
	rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
	rdi->dparms.props.max_total_mcast_qp_attach =
					rdi->dparms.props.max_mcast_qp_attach *
					rdi->dparms.props.max_mcast_grp;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
}

static inline u16 opa_speed_to_ib(u16 in)
{
	u16 out = 0;

	if (in & OPA_LINK_SPEED_25G)
		out |= IB_SPEED_EDR;
	if (in & OPA_LINK_SPEED_12_5G)
		out |= IB_SPEED_FDR;

	return out;
}

/*
 * Convert a single OPA link width (no multiple flags) to an IB value.
 * A zero OPA link width means link down, which means the IB width value
 * is a don't care.
 */
static inline u16 opa_width_to_ib(u16 in)
{
	switch (in) {
	case OPA_LINK_WIDTH_1X:
	/* map 2x and 3x to 1x as they don't exist in IB */
	case OPA_LINK_WIDTH_2X:
	case OPA_LINK_WIDTH_3X:
		return IB_WIDTH_1X;
	default: /* link down or unknown, return our largest width */
	case OPA_LINK_WIDTH_4X:
		return IB_WIDTH_4X;
	}
}

1246
static int query_port(struct rvt_dev_info *rdi, u8 port_num,
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1247
1248
		      struct ib_port_attr *props)
{
1249
1250
1251
	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
1252
	u32 lid = ppd->lid;
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1253

1254
	/* props being zeroed by the caller, avoid zeroing it here */
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1255
1256
1257
1258
	props->lid = lid ? lid : 0;
	props->lmc = ppd->lmc;
	/* OPA logical states match IB logical states */
	props->state = driver_lstate(ppd);
1259
	props->phys_state = driver_pstate(ppd);
Mike Marciniszyn's avatar
Mike Marciniszyn committed
1260