addr.c 20.2 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
Sean Hefty's avatar
Sean Hefty committed
7
8
9
10
11
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
12
 *
Sean Hefty's avatar
Sean Hefty committed
13
14
15
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
16
 *
Sean Hefty's avatar
Sean Hefty committed
17
18
19
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
20
 *
Sean Hefty's avatar
Sean Hefty committed
21
22
23
24
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
25
 *
Sean Hefty's avatar
Sean Hefty committed
26
27
28
29
30
31
32
33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
34
35
36
37
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
38
#include <linux/slab.h>
39
#include <linux/workqueue.h>
40
#include <linux/module.h>
41
42
43
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
44
#include <net/netevent.h>
45
46
#include <net/addrconf.h>
#include <net/ip6_route.h>
47
#include <rdma/ib_addr.h>
48
#include <rdma/ib.h>
49
50
51
52
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>

#include "core_priv.h"
53
54
55

struct addr_req {
	struct list_head list;
56
57
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
58
	struct rdma_dev_addr *addr;
59
	struct rdma_addr_client *client;
60
61
62
63
64
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
65
	u32 seq;
66
67
};

68
69
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);

David Howells's avatar
David Howells committed
70
static void process_req(struct work_struct *work);
71
72
73

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
David Howells's avatar
David Howells committed
74
static DECLARE_DELAYED_WORK(work, process_req);
75
76
static struct workqueue_struct *addr_wq;

77
78
79
80
81
82
83
84
85
86
87
88
89
90
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
	[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
		.len = sizeof(struct rdma_nla_ls_gid)},
};

static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
{
	struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
	int ret;

	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
		return false;

	ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
91
			nlmsg_len(nlh), ib_nl_addr_policy, NULL);
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
	if (ret)
		return false;

	return true;
}

static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
{
	const struct nlattr *head, *curr;
	union ib_gid gid;
	struct addr_req *req;
	int len, rem;
	int found = 0;

	head = (const struct nlattr *)nlmsg_data(nlh);
	len = nlmsg_len(nlh);

	nla_for_each_attr(curr, head, len, rem) {
		if (curr->nla_type == LS_NLA_TYPE_DGID)
			memcpy(&gid, nla_data(curr), nla_len(curr));
	}

	mutex_lock(&lock);
	list_for_each_entry(req, &req_list, list) {
		if (nlh->nlmsg_seq != req->seq)
			continue;
		/* We set the DGID part, the rest was set earlier */
		rdma_addr_set_dgid(req->addr, &gid);
		req->status = 0;
		found = 1;
		break;
	}
	mutex_unlock(&lock);

	if (!found)
		pr_info("Couldn't find request waiting for DGID: %pI6\n",
			&gid);
}

int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
			     struct netlink_callback *cb)
{
	const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;

	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
137
	    !(NETLINK_CB(skb).sk))
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
		return -EPERM;

	if (ib_nl_is_good_ip_resp(nlh))
		ib_nl_process_good_ip_rsep(nlh);

	return skb->len;
}

static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
			     const void *daddr,
			     u32 seq, u16 family)
{
	struct sk_buff *skb = NULL;
	struct nlmsghdr *nlh;
	struct rdma_ls_ip_resolve_header *header;
	void *data;
	size_t size;
	int attrtype;
	int len;

	if (family == AF_INET) {
		size = sizeof(struct in_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
	} else {
		size = sizeof(struct in6_addr);
		attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
	}

	len = nla_total_size(sizeof(size));
	len += NLMSG_ALIGN(sizeof(*header));

	skb = nlmsg_new(len, GFP_KERNEL);
	if (!skb)
		return -ENOMEM;

	data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
			    RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
	if (!data) {
		nlmsg_free(skb);
		return -ENODATA;
	}

	/* Construct the family header first */
181
	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
182
183
184
185
186
	header->ifindex = dev_addr->bound_dev_if;
	nla_put(skb, attrtype, size, daddr);

	/* Repair the nlmsg header length */
	nlmsg_end(skb, nlh);
187
	rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
188
189
190
191
192
193
194

	/* Make the request retry, so when we get the response from userspace
	 * we will have something.
	 */
	return -ENODATA;
}

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
int rdma_addr_size(struct sockaddr *addr)
{
	switch (addr->sa_family) {
	case AF_INET:
		return sizeof(struct sockaddr_in);
	case AF_INET6:
		return sizeof(struct sockaddr_in6);
	case AF_IB:
		return sizeof(struct sockaddr_ib);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(rdma_addr_size);

210
211
static struct rdma_addr_client self;

212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

Tom Tucker's avatar
Tom Tucker committed
232
233
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     const unsigned char *dst_dev_addr)
234
{
235
	dev_addr->dev_type = dev->type;
236
237
238
239
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
240
	dev_addr->bound_dev_if = dev->ifindex;
241
242
	return 0;
}
Tom Tucker's avatar
Tom Tucker committed
243
EXPORT_SYMBOL(rdma_copy_addr);
244

245
246
int rdma_translate_ip(const struct sockaddr *addr,
		      struct rdma_dev_addr *dev_addr,
247
		      u16 *vlan_id)
248
249
{
	struct net_device *dev;
250
	int ret = -EADDRNOTAVAIL;
251

252
	if (dev_addr->bound_dev_if) {
253
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
254
255
256
257
258
259
260
		if (!dev)
			return -ENODEV;
		ret = rdma_copy_addr(dev_addr, dev, NULL);
		dev_put(dev);
		return ret;
	}

261
262
	switch (addr->sa_family) {
	case AF_INET:
263
		dev = ip_dev_find(dev_addr->net,
264
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
265
266
267

		if (!dev)
			return ret;
268

269
		ret = rdma_copy_addr(dev_addr, dev, NULL);
270
		dev_addr->bound_dev_if = dev->ifindex;
271
272
		if (vlan_id)
			*vlan_id = rdma_vlan_dev_vlan_id(dev);
273
274
		dev_put(dev);
		break;
275
#if IS_ENABLED(CONFIG_IPV6)
276
	case AF_INET6:
277
		rcu_read_lock();
278
279
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
280
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
281
282
					  dev, 1)) {
				ret = rdma_copy_addr(dev_addr, dev, NULL);
283
				dev_addr->bound_dev_if = dev->ifindex;
284
285
				if (vlan_id)
					*vlan_id = rdma_vlan_dev_vlan_id(dev);
286
287
288
				break;
			}
		}
289
		rcu_read_unlock();
290
		break;
291
#endif
292
	}
293
294
295
296
297
298
299
300
301
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	delay = time - jiffies;
302
303
	if ((long)delay < 0)
		delay = 0;
304

305
	mod_delayed_work(addr_wq, &work, delay);
306
307
308
309
310
311
312
313
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
314
		if (time_after_eq(req->timeout, temp_req->timeout))
315
316
317
318
319
320
321
322
323
324
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

325
326
327
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			  const void *daddr, u32 seq, u16 family)
{
328
	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
329
330
331
332
333
334
335
		return -EADDRNOTAVAIL;

	/* We fill in what we can, the response will fill the rest */
	rdma_copy_addr(dev_addr, dst->dev, NULL);
	return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}

336
337
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			const void *daddr)
338
339
340
341
{
	struct neighbour *n;
	int ret;

342
343
	n = dst_neigh_lookup(dst, daddr);

344
345
346
347
348
349
	rcu_read_lock();
	if (!n || !(n->nud_state & NUD_VALID)) {
		if (n)
			neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
350
		ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
351
352
353
	}
	rcu_read_unlock();

354
355
356
	if (n)
		neigh_release(n);

357
358
359
	return ret;
}

360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
static bool has_gateway(struct dst_entry *dst, sa_family_t family)
{
	struct rtable *rt;
	struct rt6_info *rt6;

	if (family == AF_INET) {
		rt = container_of(dst, struct rtable, dst);
		return rt->rt_uses_gateway;
	}

	rt6 = container_of(dst, struct rt6_info, dst);
	return rt6->rt6i_flags & RTF_GATEWAY;
}

static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
		    const struct sockaddr *dst_in, u32 seq)
{
	const struct sockaddr_in *dst_in4 =
		(const struct sockaddr_in *)dst_in;
	const struct sockaddr_in6 *dst_in6 =
		(const struct sockaddr_in6 *)dst_in;
	const void *daddr = (dst_in->sa_family == AF_INET) ?
		(const void *)&dst_in4->sin_addr.s_addr :
		(const void *)&dst_in6->sin6_addr;
	sa_family_t family = dst_in->sa_family;

	/* Gateway + ARPHRD_INFINIBAND -> IB router */
	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
	else
		return dst_fetch_ha(dst, dev_addr, daddr);
}

393
static int addr4_resolve(struct sockaddr_in *src_in,
394
395
396
			 const struct sockaddr_in *dst_in,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
397
{
398
399
	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
400
	struct rtable *rt;
401
	struct flowi4 fl4;
402
403
	int ret;

404
405
406
407
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = dst_ip;
	fl4.saddr = src_ip;
	fl4.flowi4_oif = addr->bound_dev_if;
408
	rt = ip_route_output_key(addr->net, &fl4);
409
410
411
412
	ret = PTR_ERR_OR_ZERO(rt);
	if (ret)
		return ret;

413
	src_in->sin_family = AF_INET;
414
	src_in->sin_addr.s_addr = fl4.saddr;
415

416
417
418
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
419
	 */
420
	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
421
422
		addr->network = RDMA_NETWORK_IPV4;

423
424
	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);

425
426
	*prt = rt;
	return 0;
427
428
}

429
#if IS_ENABLED(CONFIG_IPV6)
Sean Hefty's avatar
Sean Hefty committed
430
static int addr6_resolve(struct sockaddr_in6 *src_in,
431
432
433
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
434
{
435
	struct flowi6 fl6;
436
	struct dst_entry *dst;
437
	struct rt6_info *rt;
Sean Hefty's avatar
Sean Hefty committed
438
	int ret;
439

440
	memset(&fl6, 0, sizeof fl6);
Alexey Dobriyan's avatar
Alexey Dobriyan committed
441
442
	fl6.daddr = dst_in->sin6_addr;
	fl6.saddr = src_in->sin6_addr;
443
	fl6.flowi6_oif = addr->bound_dev_if;
444

445
446
	ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
	if (ret < 0)
447
		return ret;
Sean Hefty's avatar
Sean Hefty committed
448

449
	rt = (struct rt6_info *)dst;
450
	if (ipv6_addr_any(&src_in->sin6_addr)) {
Sean Hefty's avatar
Sean Hefty committed
451
		src_in->sin6_family = AF_INET6;
Alexey Dobriyan's avatar
Alexey Dobriyan committed
452
		src_in->sin6_addr = fl6.saddr;
Sean Hefty's avatar
Sean Hefty committed
453
454
	}

455
456
457
	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
	 * type accordingly.
458
	 */
459
460
	if (rt->rt6i_flags & RTF_GATEWAY &&
	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
461
462
		addr->network = RDMA_NETWORK_IPV6;

463
464
	addr->hoplimit = ip6_dst_hoplimit(dst);

465
466
	*pdst = dst;
	return 0;
467
}
468
#else
Sean Hefty's avatar
Sean Hefty committed
469
static int addr6_resolve(struct sockaddr_in6 *src_in,
470
471
472
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
473
474
475
476
{
	return -EADDRNOTAVAIL;
}
#endif
477

478
479
static int addr_resolve_neigh(struct dst_entry *dst,
			      const struct sockaddr *dst_in,
480
481
			      struct rdma_dev_addr *addr,
			      u32 seq)
482
483
484
485
486
487
488
489
490
491
492
493
494
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;

		ret = rdma_translate_ip(dst_in, addr, NULL);
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);

		return ret;
	}

	/* If the device doesn't do ARP internally */
495
496
	if (!(dst->dev->flags & IFF_NOARP))
		return fetch_ha(dst, addr, dst_in, seq);
497
498
499
500

	return rdma_copy_addr(addr, dst->dev, NULL);
}

501
static int addr_resolve(struct sockaddr *src_in,
502
503
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
504
505
			bool resolve_neigh,
			u32 seq)
506
{
507
508
509
510
	struct net_device *ndev;
	struct dst_entry *dst;
	int ret;

511
512
513
514
515
	if (!addr->net) {
		pr_warn_ratelimited("%s: missing namespace\n", __func__);
		return -EINVAL;
	}

516
	if (src_in->sa_family == AF_INET) {
517
518
519
520
521
522
523
524
525
526
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
527
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
528

529
530
531
532
533
534
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = rt->dst.dev;
			dev_hold(ndev);
		}
535
536
537
538
539
540
541
542
543
544
545
546
547

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
			return ret;

		if (resolve_neigh)
548
			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
549

550
551
552
553
554
555
		if (addr->bound_dev_if) {
			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
		} else {
			ndev = dst->dev;
			dev_hold(ndev);
		}
556
557
558
559

		dst_release(dst);
	}

560
561
562
563
564
565
566
567
568
569
570
	if (ndev->flags & IFF_LOOPBACK) {
		ret = rdma_translate_ip(dst_in, addr, NULL);
		/*
		 * Put the loopback device and get the translated
		 * device instead.
		 */
		dev_put(ndev);
		ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
	} else {
		addr->bound_dev_if = ndev->ifindex;
	}
571
572
573
	dev_put(ndev);

	return ret;
574
575
}

David Howells's avatar
David Howells committed
576
static void process_req(struct work_struct *work)
577
578
{
	struct addr_req *req, *temp_req;
579
	struct sockaddr *src_in, *dst_in;
580
581
582
583
584
585
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
586
		if (req->status == -ENODATA) {
587
588
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
589
			req->status = addr_resolve(src_in, dst_in, req->addr,
590
						   true, req->seq);
591
592
593
594
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
			else if (req->status == -ENODATA)
				continue;
595
		}
596
		list_move_tail(&req->list, &done_list);
597
598
599
600
601
602
603
604
605
606
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
607
608
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
			req->addr, req->context);
609
		put_client(req->client);
610
611
612
613
		kfree(req);
	}
}

614
615
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
616
617
618
619
620
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
621
	struct sockaddr *src_in, *dst_in;
622
623
624
	struct addr_req *req;
	int ret = 0;

625
	req = kzalloc(sizeof *req, GFP_KERNEL);
626
627
628
	if (!req)
		return -ENOMEM;

629
630
631
632
633
634
635
636
637
	src_in = (struct sockaddr *) &req->src_addr;
	dst_in = (struct sockaddr *) &req->dst_addr;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family) {
			ret = -EINVAL;
			goto err;
		}

638
		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
639
640
641
642
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}

643
	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
644
645
646
	req->addr = addr;
	req->callback = callback;
	req->context = context;
647
648
	req->client = client;
	atomic_inc(&client->refcount);
649
	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
650

651
	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
652
653
654
655
656
657
658
659
660
661
662
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		break;
	default:
		ret = req->status;
663
		atomic_dec(&client->refcount);
664
		goto err;
665
666
	}
	return ret;
667
668
669
err:
	kfree(req);
	return ret;
670
671
672
}
EXPORT_SYMBOL(rdma_resolve_ip);

673
674
675
676
677
678
679
int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;

680
681
682
	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
			return -EINVAL;
683
684

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
685
	} else {
686
		src_in->sa_family = dst_addr->sa_family;
687
	}
688

689
	return addr_resolve(src_in, dst_addr, addr, false, 0);
690
691
692
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

693
694
695
696
697
698
699
700
701
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
702
			list_move(&req->list, &req_list);
703
704
705
706
707
708
709
710
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

711
712
713
struct resolve_cb_context {
	struct rdma_dev_addr *addr;
	struct completion comp;
714
	int status;
715
716
717
718
719
};

static void resolve_cb(int status, struct sockaddr *src_addr,
	     struct rdma_dev_addr *addr, void *context)
{
720
721
722
723
	if (!status)
		memcpy(((struct resolve_cb_context *)context)->addr,
		       addr, sizeof(struct rdma_dev_addr));
	((struct resolve_cb_context *)context)->status = status;
724
725
726
	complete(&((struct resolve_cb_context *)context)->comp);
}

727
728
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
729
730
				 u8 *dmac, u16 *vlan_id, int *if_index,
				 int *hoplimit)
731
732
733
734
735
736
737
738
739
740
741
742
743
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	struct resolve_cb_context ctx;
	struct net_device *dev;

	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid_addr, dgid_addr;


744
745
	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
746
747

	memset(&dev_addr, 0, sizeof(dev_addr));
748
749
	if (if_index)
		dev_addr.bound_dev_if = *if_index;
750
	dev_addr.net = &init_net;
751
752
753
754
755
756
757
758
759
760

	ctx.addr = &dev_addr;
	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
			&dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
		return ret;

	wait_for_completion(&ctx.comp);

761
762
763
764
	ret = ctx.status;
	if (ret)
		return ret;

765
766
767
768
	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
	if (!dev)
		return -ENODEV;
769
770
	if (if_index)
		*if_index = dev_addr.bound_dev_if;
771
772
	if (vlan_id)
		*vlan_id = rdma_vlan_dev_vlan_id(dev);
773
774
	if (hoplimit)
		*hoplimit = dev_addr.hoplimit;
775
776
777
	dev_put(dev);
	return ret;
}
778
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
779
780
781
782
783
784
785
786
787
788
789

int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} gid_addr;

790
	rdma_gid2ip(&gid_addr._sockaddr, sgid);
791
792

	memset(&dev_addr, 0, sizeof(dev_addr));
793
	dev_addr.net = &init_net;
794
795
796
797
798
799
800
801
802
	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
	if (ret)
		return ret;

	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
	return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);

Roland Dreier's avatar
Roland Dreier committed
803
static int netevent_callback(struct notifier_block *self, unsigned long event,
804
	void *ctx)
805
{
Roland Dreier's avatar
Roland Dreier committed
806
	if (event == NETEVENT_NEIGH_UPDATE) {
807
		struct neighbour *neigh = ctx;
808

809
		if (neigh->nud_state & NUD_VALID) {
810
811
812
			set_timeout(jiffies);
		}
	}
813
814
815
	return 0;
}

816
817
static struct notifier_block nb = {
	.notifier_call = netevent_callback
818
819
};

820
int addr_init(void)
821
{
822
	addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
823
824
825
	if (!addr_wq)
		return -ENOMEM;

826
	register_netevent_notifier(&nb);
827
	rdma_addr_register_client(&self);
828

829
830
831
	return 0;
}

832
void addr_cleanup(void)
833
{
834
	rdma_addr_unregister_client(&self);
835
	unregister_netevent_notifier(&nb);
836
837
	destroy_workqueue(addr_wq);
}