addr.c 20.5 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
Sean Hefty's avatar
Sean Hefty committed
7
8
9
10
11
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
12
 *
Sean Hefty's avatar
Sean Hefty committed
13
14
15
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
16
 *
Sean Hefty's avatar
Sean Hefty committed
17
18
19
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
20
 *
Sean Hefty's avatar
Sean Hefty committed
21
22
23
24
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
25
 *
Sean Hefty's avatar
Sean Hefty committed
26
27
28
29
30
31
32
33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
34
35
36
37
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
38
#include <linux/slab.h>
39
#include <linux/workqueue.h>
40
#include <linux/module.h>
41
42
43
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
44
#include <net/netevent.h>
45
46
#include <net/addrconf.h>
#include <net/ip6_route.h>
47
#include <rdma/ib_addr.h>
48
#include <rdma/ib.h>
49
50
51
52
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>

#include "core_priv.h"
53
54
55

struct addr_req {
	struct list_head list;
56
57
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
58
	struct rdma_dev_addr *addr;
59
	struct rdma_addr_client *client;
60
61
62
63
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
64
	struct delayed_work work;
65
	int status;
66
	u32 seq;
67
68
};

69
70
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);

David Howells's avatar
David Howells committed
71
static void process_req(struct work_struct *work);
72
73
74

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
David Howells's avatar
David Howells committed
75
static DECLARE_DELAYED_WORK(work, process_req);
76
77
static struct workqueue_struct *addr_wq;

78
79
80
81
82
83
84
85
86
87
88
89
90
91
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
	[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
		.len = sizeof(struct rdma_nla_ls_gid)},
};

static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
{
	struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
	int ret;

	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
		return false;

	ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
92
			nlmsg_len(nlh), ib_nl_addr_policy, NULL);
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
	if (ret)
		return false;

	return true;
}

static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
{
	const struct nlattr *head, *curr;
	union ib_gid gid;
	struct addr_req *req;
	int len, rem;
	int found = 0;

	head = (const struct nlattr *)nlmsg_data(nlh);
	len = nlmsg_len(nlh);

	nla_for_each_attr(curr, head, len, rem) {
		if (curr->nla_type == LS_NLA_TYPE_DGID)
			memcpy(&gid, nla_data(curr), nla_len(curr));
	}

	mutex_lock(&lock);
	list_for_each_entry(req, &req_list, list) {
		if (nlh->nlmsg_seq != req->seq)
			continue;
		/* We set the DGID part, the rest was set earlier */
		rdma_addr_set_dgid(req->addr, &gid);
		req->status = 0;
		found = 1;
		break;
	}
	mutex_unlock(&lock);

	if (!found)
		pr_info("Couldn't find request waiting for DGID: %pI6\n",
			&gid);
}

int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
133
134
			     struct nlmsghdr *nlh,
			     struct netlink_ext_ack *extack)
135
136
{
	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
137
	    !(NETLINK_CB(skb).sk))
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
		return -EPERM;

	if (ib_nl_is_good_ip_resp(nlh))
		ib_nl_process_good_ip_rsep(nlh);

	return skb->len;
}

static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
			     const void *daddr,
			     u32 seq, u16 family)
{
	struct sk_buff *skb = NULL;
	struct nlmsghdr *nlh;
	struct rdma_ls_ip_resolve_header *header;
	void *data;
	size_t size;
	int attrtype;
	int len;

	if (family == AF_INET) {
		size = sizeof(struct in_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
	} else {
		size = sizeof(struct in6_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
	}

	len = nla_total_size(sizeof(size));
	len += NLMSG_ALIGN(sizeof(*header));

	skb = nlmsg_new(len, GFP_KERNEL);
	if (!skb)
		return -ENOMEM;

	data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
			    RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
	if (!data) {
		nlmsg_free(skb);
		return -ENODATA;
	}

	/* Construct the family header first */
181
	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
182
183
184
185
186
	header->ifindex = dev_addr->bound_dev_if;
	nla_put(skb, attrtype, size, daddr);

	/* Repair the nlmsg header length */
	nlmsg_end(skb, nlh);
187
	rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
188
189
190
191
192
193
194

	/* Make the request retry, so when we get the response from userspace
	 * we will have something.
	 */
	return -ENODATA;
}

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
int rdma_addr_size(struct sockaddr *addr)
{
	switch (addr->sa_family) {
	case AF_INET:
		return sizeof(struct sockaddr_in);
	case AF_INET6:
		return sizeof(struct sockaddr_in6);
	case AF_IB:
		return sizeof(struct sockaddr_ib);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(rdma_addr_size);

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
int rdma_addr_size_in6(struct sockaddr_in6 *addr)
{
	int ret = rdma_addr_size((struct sockaddr *) addr);

	return ret <= sizeof(*addr) ? ret : 0;
}
EXPORT_SYMBOL(rdma_addr_size_in6);

int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
{
	int ret = rdma_addr_size((struct sockaddr *) addr);

	return ret <= sizeof(*addr) ? ret : 0;
}
EXPORT_SYMBOL(rdma_addr_size_kss);

226
227
static struct rdma_addr_client self;

228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

248
249
250
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
		    const struct net_device *dev,
		    const unsigned char *dst_dev_addr)
251
{
252
	dev_addr->dev_type = dev->type;
253
254
255
256
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
257
	dev_addr->bound_dev_if = dev->ifindex;
258
}
Tom Tucker's avatar
Tom Tucker committed
259
EXPORT_SYMBOL(rdma_copy_addr);
260

261
int rdma_translate_ip(const struct sockaddr *addr,
262
		      struct rdma_dev_addr *dev_addr)
263
264
265
{
	struct net_device *dev;

266
	if (dev_addr->bound_dev_if) {
267
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
268
269
		if (!dev)
			return -ENODEV;
270
		rdma_copy_addr(dev_addr, dev, NULL);
271
		dev_put(dev);
272
		return 0;
273
274
	}

275
276
	switch (addr->sa_family) {
	case AF_INET:
277
		dev = ip_dev_find(dev_addr->net,
278
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
279
280

		if (!dev)
281
			return -EADDRNOTAVAIL;
282

283
		rdma_copy_addr(dev_addr, dev, NULL);
284
285
		dev_put(dev);
		break;
286
#if IS_ENABLED(CONFIG_IPV6)
287
	case AF_INET6:
288
		rcu_read_lock();
289
290
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
291
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
292
					  dev, 1)) {
293
				rdma_copy_addr(dev_addr, dev, NULL);
294
295
296
				break;
			}
		}
297
		rcu_read_unlock();
298
		break;
299
#endif
300
	}
301
	return 0;
302
303
304
}
EXPORT_SYMBOL(rdma_translate_ip);

305
static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
306
307
308
309
{
	unsigned long delay;

	delay = time - jiffies;
310
311
	if ((long)delay < 0)
		delay = 0;
312

313
	mod_delayed_work(addr_wq, delayed_work, delay);
314
315
316
317
318
319
320
321
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
322
		if (time_after_eq(req->timeout, temp_req->timeout))
323
324
325
326
327
			break;
	}

	list_add(&req->list, &temp_req->list);

328
	set_timeout(&req->work, req->timeout);
329
330
331
	mutex_unlock(&lock);
}

332
333
334
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			  const void *daddr, u32 seq, u16 family)
{
335
	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
336
337
338
339
340
341
342
		return -EADDRNOTAVAIL;

	/* We fill in what we can, the response will fill the rest */
	rdma_copy_addr(dev_addr, dst->dev, NULL);
	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}

343
344
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			const void *daddr)
345
346
{
	struct neighbour *n;
347
	int ret = 0;
348

349
350
	n = dst_neigh_lookup(dst, daddr);

351
352
353
354
355
356
	rcu_read_lock();
	if (!n || !(n->nud_state & NUD_VALID)) {
		if (n)
			neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
357
		rdma_copy_addr(dev_addr, dst->dev, n->ha);
358
359
360
	}
	rcu_read_unlock();

361
362
363
	if (n)
		neigh_release(n);

364
365
366
	return ret;
}

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
static bool has_gateway(struct dst_entry *dst, sa_family_t family)
{
	struct rtable *rt;
	struct rt6_info *rt6;

	if (family == AF_INET) {
		rt = container_of(dst, struct rtable, dst);
		return rt->rt_uses_gateway;
	}

	rt6 = container_of(dst, struct rt6_info, dst);
	return rt6->rt6i_flags & RTF_GATEWAY;
}

static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
		    const struct sockaddr *dst_in, u32 seq)
{
	const struct sockaddr_in *dst_in4 =
		(const struct sockaddr_in *)dst_in;
	const struct sockaddr_in6 *dst_in6 =
		(const struct sockaddr_in6 *)dst_in;
	const void *daddr = (dst_in->sa_family == AF_INET) ?
		(const void *)&dst_in4->sin_addr.s_addr :
		(const void *)&dst_in6->sin6_addr;
	sa_family_t family = dst_in->sa_family;

	/* Gateway + ARPHRD_INFINIBAND -> IB router */
	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
	else
		return dst_fetch_ha(dst, dev_addr, daddr);
}

400
static int addr4_resolve(struct sockaddr_in *src_in,
401
402
403
			 const struct sockaddr_in *dst_in,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
404
{
405
406
	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
407
	struct rtable *rt;
408
	struct flowi4 fl4;
409
410
	int ret;

411
412
413
414
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = dst_ip;
	fl4.saddr = src_ip;
	fl4.flowi4_oif = addr->bound_dev_if;
415
	rt = ip_route_output_key(addr->net, &fl4);
416
417
418
419
	ret = PTR_ERR_OR_ZERO(rt);
	if (ret)
		return ret;

420
	src_in->sin_family = AF_INET;
421
	src_in->sin_addr.s_addr = fl4.saddr;
422

423
424
425
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
426
	 */
427
	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
428
429
		addr->network = RDMA_NETWORK_IPV4;

430
431
	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);

432
433
	*prt = rt;
	return 0;
434
435
}

436
#if IS_ENABLED(CONFIG_IPV6)
Sean Hefty's avatar
Sean Hefty committed
437
static int addr6_resolve(struct sockaddr_in6 *src_in,
438
439
440
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
441
{
442
	struct flowi6 fl6;
443
	struct dst_entry *dst;
444
	struct rt6_info *rt;
Sean Hefty's avatar
Sean Hefty committed
445
	int ret;
446

447
	memset(&fl6, 0, sizeof fl6);
Alexey Dobriyan's avatar
Alexey Dobriyan committed
448
449
	fl6.daddr = dst_in->sin6_addr;
	fl6.saddr = src_in->sin6_addr;
450
	fl6.flowi6_oif = addr->bound_dev_if;
451

452
453
	ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
	if (ret < 0)
454
		return ret;
Sean Hefty's avatar
Sean Hefty committed
455

456
	rt = (struct rt6_info *)dst;
457
	if (ipv6_addr_any(&src_in->sin6_addr)) {
Sean Hefty's avatar
Sean Hefty committed
458
		src_in->sin6_family = AF_INET6;
Alexey Dobriyan's avatar
Alexey Dobriyan committed
459
		src_in->sin6_addr = fl6.saddr;
Sean Hefty's avatar
Sean Hefty committed
460
461
	}

462
463
464
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
465
	 */
466
467
	if (rt->rt6i_flags & RTF_GATEWAY &&
	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
468
469
		addr->network = RDMA_NETWORK_IPV6;

470
471
	addr->hoplimit = ip6_dst_hoplimit(dst);

472
473
	*pdst = dst;
	return 0;
474
}
475
#else
Sean Hefty's avatar
Sean Hefty committed
476
static int addr6_resolve(struct sockaddr_in6 *src_in,
477
478
479
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
480
481
482
483
{
	return -EADDRNOTAVAIL;
}
#endif
484

485
486
static int addr_resolve_neigh(struct dst_entry *dst,
			      const struct sockaddr *dst_in,
487
488
			      struct rdma_dev_addr *addr,
			      u32 seq)
489
490
491
492
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;

493
		ret = rdma_translate_ip(dst_in, addr);
494
495
496
497
498
499
500
501
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);

		return ret;
	}

	/* If the device doesn't do ARP internally */
502
503
	if (!(dst->dev->flags & IFF_NOARP))
		return fetch_ha(dst, addr, dst_in, seq);
504

505
506
507
	rdma_copy_addr(addr, dst->dev, NULL);

	return 0;
508
509
}

510
static int addr_resolve(struct sockaddr *src_in,
511
512
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
513
514
			bool resolve_neigh,
			u32 seq)
515
{
516
517
518
519
	struct net_device *ndev;
	struct dst_entry *dst;
	int ret;

520
521
522
523
524
	if (!addr->net) {
		pr_warn_ratelimited("%s: missing namespace\n", __func__);
		return -EINVAL;
	}

525
	if (src_in->sa_family == AF_INET) {
526
527
528
529
530
531
532
533
534
535
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
536
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
537

538
539
540
541
542
543
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = rt->dst.dev;
			dev_hold(ndev);
		}
544
545
546
547
548
549
550
551
552
553
554
555
556

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
			return ret;

		if (resolve_neigh)
557
			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
558

559
560
561
562
563
564
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = dst->dev;
			dev_hold(ndev);
		}
565
566
567
568

		dst_release(dst);
	}

569
570
571
572
573
	if (ndev) {
		if (ndev->flags & IFF_LOOPBACK)
			ret = rdma_translate_ip(dst_in, addr);
		else
			addr->bound_dev_if = ndev->ifindex;
574
575
		dev_put(ndev);
	}
576
577

	return ret;
578
579
}

580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
static void process_one_req(struct work_struct *_work)
{
	struct addr_req *req;
	struct sockaddr *src_in, *dst_in;

	mutex_lock(&lock);
	req = container_of(_work, struct addr_req, work.work);

	if (req->status == -ENODATA) {
		src_in = (struct sockaddr *)&req->src_addr;
		dst_in = (struct sockaddr *)&req->dst_addr;
		req->status = addr_resolve(src_in, dst_in, req->addr,
					   true, req->seq);
		if (req->status && time_after_eq(jiffies, req->timeout)) {
			req->status = -ETIMEDOUT;
		} else if (req->status == -ENODATA) {
			/* requeue the work for retrying again */
			set_timeout(&req->work, req->timeout);
			mutex_unlock(&lock);
			return;
		}
	}
	list_del(&req->list);
	mutex_unlock(&lock);

605
606
607
608
609
610
611
612
613
	/*
	 * Although the work will normally have been canceled by the
	 * workqueue, it can still be requeued as long as it is on the
	 * req_list, so it could have been requeued before we grabbed &lock.
	 * We need to cancel it after it is removed from req_list to really be
	 * sure it is safe to free.
	 */
	cancel_delayed_work(&req->work);

614
615
616
617
618
619
	req->callback(req->status, (struct sockaddr *)&req->src_addr,
		req->addr, req->context);
	put_client(req->client);
	kfree(req);
}

David Howells's avatar
David Howells committed
620
static void process_req(struct work_struct *work)
621
622
{
	struct addr_req *req, *temp_req;
623
	struct sockaddr *src_in, *dst_in;
624
625
626
627
628
629
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
630
		if (req->status == -ENODATA) {
631
632
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
633
			req->status = addr_resolve(src_in, dst_in, req->addr,
634
						   true, req->seq);
635
636
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
637
638
			else if (req->status == -ENODATA) {
				set_timeout(&req->work, req->timeout);
639
				continue;
640
			}
641
		}
642
		list_move_tail(&req->list, &done_list);
643
644
645
646
647
648
	}

	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
649
650
651
652
653
		/* It is safe to cancel other work items from this work item
		 * because at a time there can be only one work item running
		 * with this single threaded work queue.
		 */
		cancel_delayed_work(&req->work);
654
655
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
			req->addr, req->context);
656
		put_client(req->client);
657
658
659
660
		kfree(req);
	}
}

661
662
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
663
664
665
666
667
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
668
	struct sockaddr *src_in, *dst_in;
669
670
671
	struct addr_req *req;
	int ret = 0;

672
	req = kzalloc(sizeof *req, GFP_KERNEL);
673
674
675
	if (!req)
		return -ENOMEM;

676
677
678
679
680
681
682
683
684
	src_in = (struct sockaddr *) &req->src_addr;
	dst_in = (struct sockaddr *) &req->dst_addr;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family) {
			ret = -EINVAL;
			goto err;
		}

685
		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
686
687
688
689
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}

690
	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
691
692
693
	req->addr = addr;
	req->callback = callback;
	req->context = context;
694
695
	req->client = client;
	atomic_inc(&client->refcount);
696
	INIT_DELAYED_WORK(&req->work, process_one_req);
697
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
698

699
	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
700
701
702
703
704
705
706
707
708
709
710
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		break;
	default:
		ret = req->status;
711
		atomic_dec(&client->refcount);
712
		goto err;
713
714
	}
	return ret;
715
716
717
err:
	kfree(req);
	return ret;
718
719
720
}
EXPORT_SYMBOL(rdma_resolve_ip);

721
722
723
724
725
726
727
int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;

728
729
730
	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
			return -EINVAL;
731
732

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
733
	} else {
734
		src_in->sa_family = dst_addr->sa_family;
735
	}
736

737
	return addr_resolve(src_in, dst_addr, addr, false, 0);
738
739
740
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

741
742
743
744
745
746
747
748
749
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
750
			list_move(&req->list, &req_list);
751
			set_timeout(&req->work, req->timeout);
752
753
754
755
756
757
758
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

759
760
struct resolve_cb_context {
	struct completion comp;
761
	int status;
762
763
764
765
766
};

static void resolve_cb(int status, struct sockaddr *src_addr,
	     struct rdma_dev_addr *addr, void *context)
{
767
	((struct resolve_cb_context *)context)->status = status;
768
769
770
	complete(&((struct resolve_cb_context *)context)->comp);
}

771
772
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
773
				 u8 *dmac, const struct net_device *ndev,
774
				 int *hoplimit)
775
776
777
778
779
780
781
782
{
	struct rdma_dev_addr dev_addr;
	struct resolve_cb_context ctx;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid_addr, dgid_addr;
783
	int ret;
784

785
786
	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
787
788

	memset(&dev_addr, 0, sizeof(dev_addr));
789
	dev_addr.bound_dev_if = ndev->ifindex;
790
	dev_addr.net = &init_net;
791
792
793
794
795
796
797
798
799

	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
			&dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
		return ret;

	wait_for_completion(&ctx.comp);

800
801
802
803
	ret = ctx.status;
	if (ret)
		return ret;

804
	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
805
806
	*hoplimit = dev_addr.hoplimit;
	return 0;
807
808
}

Roland Dreier's avatar
Roland Dreier committed
809
static int netevent_callback(struct notifier_block *self, unsigned long event,
810
	void *ctx)
811
{
Roland Dreier's avatar
Roland Dreier committed
812
	if (event == NETEVENT_NEIGH_UPDATE) {
813
		struct neighbour *neigh = ctx;
814

815
816
		if (neigh->nud_state & NUD_VALID)
			set_timeout(&work, jiffies);
817
	}
818
819
820
	return 0;
}

821
822
static struct notifier_block nb = {
	.notifier_call = netevent_callback
823
824
};

825
int addr_init(void)
826
{
827
	addr_wq = alloc_ordered_workqueue("ib_addr", 0);
828
829
830
	if (!addr_wq)
		return -ENOMEM;

831
	register_netevent_notifier(&nb);
832
	rdma_addr_register_client(&self);
833

834
835
836
	return 0;
}

837
void addr_cleanup(void)
838
{
839
	rdma_addr_unregister_client(&self);
840
	unregister_netevent_notifier(&nb);
841
842
	destroy_workqueue(addr_wq);
}