addr.c 15.4 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
Sean Hefty's avatar
Sean Hefty committed
7
8
9
10
11
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
12
 *
Sean Hefty's avatar
Sean Hefty committed
13
14
15
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
16
 *
Sean Hefty's avatar
Sean Hefty committed
17
18
19
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
20
 *
Sean Hefty's avatar
Sean Hefty committed
21
22
23
24
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
25
 *
Sean Hefty's avatar
Sean Hefty committed
26
27
28
29
30
31
32
33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
34
35
36
37
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
38
#include <linux/slab.h>
39
#include <linux/workqueue.h>
40
#include <linux/module.h>
41
42
43
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
44
#include <net/netevent.h>
45
46
#include <net/addrconf.h>
#include <net/ip6_route.h>
47
#include <rdma/ib_addr.h>
48
#include <rdma/ib.h>
49
50
51

struct addr_req {
	struct list_head list;
52
53
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
54
	struct rdma_dev_addr *addr;
55
	struct rdma_addr_client *client;
56
57
58
59
60
61
62
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
};

David Howells's avatar
David Howells committed
63
static void process_req(struct work_struct *work);
64
65
66

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
David Howells's avatar
David Howells committed
67
static DECLARE_DELAYED_WORK(work, process_req);
68
69
static struct workqueue_struct *addr_wq;

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
int rdma_addr_size(struct sockaddr *addr)
{
	switch (addr->sa_family) {
	case AF_INET:
		return sizeof(struct sockaddr_in);
	case AF_INET6:
		return sizeof(struct sockaddr_in6);
	case AF_IB:
		return sizeof(struct sockaddr_ib);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(rdma_addr_size);

85
86
static struct rdma_addr_client self;

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

Tom Tucker's avatar
Tom Tucker committed
107
108
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     const unsigned char *dst_dev_addr)
109
{
110
	dev_addr->dev_type = dev->type;
111
112
113
114
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
115
	dev_addr->bound_dev_if = dev->ifindex;
116
117
	return 0;
}
Tom Tucker's avatar
Tom Tucker committed
118
EXPORT_SYMBOL(rdma_copy_addr);
119

120
121
int rdma_translate_ip(const struct sockaddr *addr,
		      struct rdma_dev_addr *dev_addr,
122
		      u16 *vlan_id)
123
124
{
	struct net_device *dev;
125
	int ret = -EADDRNOTAVAIL;
126

127
	if (dev_addr->bound_dev_if) {
128
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
129
130
131
132
133
134
135
		if (!dev)
			return -ENODEV;
		ret = rdma_copy_addr(dev_addr, dev, NULL);
		dev_put(dev);
		return ret;
	}

136
137
	switch (addr->sa_family) {
	case AF_INET:
138
		dev = ip_dev_find(dev_addr->net,
139
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
140
141
142

		if (!dev)
			return ret;
143

144
		ret = rdma_copy_addr(dev_addr, dev, NULL);
145
146
		if (vlan_id)
			*vlan_id = rdma_vlan_dev_vlan_id(dev);
147
148
		dev_put(dev);
		break;
149
#if IS_ENABLED(CONFIG_IPV6)
150
	case AF_INET6:
151
		rcu_read_lock();
152
153
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
154
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
155
156
					  dev, 1)) {
				ret = rdma_copy_addr(dev_addr, dev, NULL);
157
158
				if (vlan_id)
					*vlan_id = rdma_vlan_dev_vlan_id(dev);
159
160
161
				break;
			}
		}
162
		rcu_read_unlock();
163
		break;
164
#endif
165
	}
166
167
168
169
170
171
172
173
174
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	delay = time - jiffies;
175
176
	if ((long)delay < 0)
		delay = 0;
177

178
	mod_delayed_work(addr_wq, &work, delay);
179
180
181
182
183
184
185
186
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
187
		if (time_after_eq(req->timeout, temp_req->timeout))
188
189
190
191
192
193
194
195
196
197
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

198
199
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			const void *daddr)
200
201
202
203
{
	struct neighbour *n;
	int ret;

204
205
	n = dst_neigh_lookup(dst, daddr);

206
207
208
209
210
211
	rcu_read_lock();
	if (!n || !(n->nud_state & NUD_VALID)) {
		if (n)
			neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
212
		ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
213
214
215
	}
	rcu_read_unlock();

216
217
218
	if (n)
		neigh_release(n);

219
220
221
	return ret;
}

222
static int addr4_resolve(struct sockaddr_in *src_in,
223
224
225
			 const struct sockaddr_in *dst_in,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
226
{
227
228
	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
229
	struct rtable *rt;
230
	struct flowi4 fl4;
231
232
	int ret;

233
234
235
236
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = dst_ip;
	fl4.saddr = src_ip;
	fl4.flowi4_oif = addr->bound_dev_if;
237
	rt = ip_route_output_key(addr->net, &fl4);
238
239
	if (IS_ERR(rt)) {
		ret = PTR_ERR(rt);
240
		goto out;
241
	}
242
	src_in->sin_family = AF_INET;
243
	src_in->sin_addr.s_addr = fl4.saddr;
244

245
246
247
248
249
250
	/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
	 * routable) and we could set the network type accordingly.
	 */
	if (rt->rt_uses_gateway)
		addr->network = RDMA_NETWORK_IPV4;

251
252
	addr->hoplimit = ip4_dst_hoplimit(&rt->dst);

253
254
	*prt = rt;
	return 0;
255
256
257
258
out:
	return ret;
}

259
#if IS_ENABLED(CONFIG_IPV6)
Sean Hefty's avatar
Sean Hefty committed
260
static int addr6_resolve(struct sockaddr_in6 *src_in,
261
262
263
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
264
{
265
	struct flowi6 fl6;
266
	struct dst_entry *dst;
267
	struct rt6_info *rt;
Sean Hefty's avatar
Sean Hefty committed
268
	int ret;
269

270
	memset(&fl6, 0, sizeof fl6);
Alexey Dobriyan's avatar
Alexey Dobriyan committed
271
272
	fl6.daddr = dst_in->sin6_addr;
	fl6.saddr = src_in->sin6_addr;
273
	fl6.flowi6_oif = addr->bound_dev_if;
274

275
	dst = ip6_route_output(addr->net, NULL, &fl6);
Sean Hefty's avatar
Sean Hefty committed
276
277
278
	if ((ret = dst->error))
		goto put;

279
	rt = (struct rt6_info *)dst;
280
	if (ipv6_addr_any(&fl6.saddr)) {
281
		ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
282
					 &fl6.daddr, 0, &fl6.saddr);
Sean Hefty's avatar
Sean Hefty committed
283
284
		if (ret)
			goto put;
285

Sean Hefty's avatar
Sean Hefty committed
286
		src_in->sin6_family = AF_INET6;
Alexey Dobriyan's avatar
Alexey Dobriyan committed
287
		src_in->sin6_addr = fl6.saddr;
Sean Hefty's avatar
Sean Hefty committed
288
289
	}

290
291
292
293
294
295
	/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
	 * routable) and we could set the network type accordingly.
	 */
	if (rt->rt6i_flags & RTF_GATEWAY)
		addr->network = RDMA_NETWORK_IPV6;

296
297
	addr->hoplimit = ip6_dst_hoplimit(dst);

298
299
	*pdst = dst;
	return 0;
Sean Hefty's avatar
Sean Hefty committed
300
put:
301
302
303
	dst_release(dst);
	return ret;
}
304
#else
Sean Hefty's avatar
Sean Hefty committed
305
static int addr6_resolve(struct sockaddr_in6 *src_in,
306
307
308
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
309
310
311
312
{
	return -EADDRNOTAVAIL;
}
#endif
313

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
static int addr_resolve_neigh(struct dst_entry *dst,
			      const struct sockaddr *dst_in,
			      struct rdma_dev_addr *addr)
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;

		ret = rdma_translate_ip(dst_in, addr, NULL);
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);

		return ret;
	}

	/* If the device doesn't do ARP internally */
	if (!(dst->dev->flags & IFF_NOARP)) {
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		return dst_fetch_ha(dst, addr,
				    dst_in->sa_family == AF_INET ?
				    (const void *)&dst_in4->sin_addr.s_addr :
				    (const void *)&dst_in6->sin6_addr);
	}

	return rdma_copy_addr(addr, dst->dev, NULL);
}

345
static int addr_resolve(struct sockaddr *src_in,
346
347
348
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
			bool resolve_neigh)
349
{
350
351
352
353
	struct net_device *ndev;
	struct dst_entry *dst;
	int ret;

354
	if (src_in->sa_family == AF_INET) {
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr);

		ndev = rt->dst.dev;
		dev_hold(ndev);

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(dst, dst_in, addr);

		ndev = dst->dev;
		dev_hold(ndev);

		dst_release(dst);
	}

	addr->bound_dev_if = ndev->ifindex;
	addr->net = dev_net(ndev);
	dev_put(ndev);

	return ret;
395
396
}

David Howells's avatar
David Howells committed
397
static void process_req(struct work_struct *work)
398
399
{
	struct addr_req *req, *temp_req;
400
	struct sockaddr *src_in, *dst_in;
401
402
403
404
405
406
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
407
		if (req->status == -ENODATA) {
408
409
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
410
411
			req->status = addr_resolve(src_in, dst_in, req->addr,
						   true);
412
413
414
415
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
			else if (req->status == -ENODATA)
				continue;
416
		}
417
		list_move_tail(&req->list, &done_list);
418
419
420
421
422
423
424
425
426
427
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
428
429
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
			req->addr, req->context);
430
		put_client(req->client);
431
432
433
434
		kfree(req);
	}
}

435
436
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
437
438
439
440
441
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
442
	struct sockaddr *src_in, *dst_in;
443
444
445
	struct addr_req *req;
	int ret = 0;

446
	req = kzalloc(sizeof *req, GFP_KERNEL);
447
448
449
	if (!req)
		return -ENOMEM;

450
451
452
453
454
455
456
457
458
	src_in = (struct sockaddr *) &req->src_addr;
	dst_in = (struct sockaddr *) &req->dst_addr;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family) {
			ret = -EINVAL;
			goto err;
		}

459
		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
460
461
462
463
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}

464
	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
465
466
467
	req->addr = addr;
	req->callback = callback;
	req->context = context;
468
469
	req->client = client;
	atomic_inc(&client->refcount);
470

471
	req->status = addr_resolve(src_in, dst_in, addr, true);
472
473
474
475
476
477
478
479
480
481
482
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		break;
	default:
		ret = req->status;
483
		atomic_dec(&client->refcount);
484
		goto err;
485
486
	}
	return ret;
487
488
489
err:
	kfree(req);
	return ret;
490
491
492
}
EXPORT_SYMBOL(rdma_resolve_ip);

493
494
495
496
497
498
499
int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;

500
501
502
	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
			return -EINVAL;
503
504

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
505
	} else {
506
		src_in->sa_family = dst_addr->sa_family;
507
	}
508
509
510
511
512

	return addr_resolve(src_in, dst_addr, addr, false);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

513
514
515
516
517
518
519
520
521
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
522
			list_move(&req->list, &req_list);
523
524
525
526
527
528
529
530
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

531
532
533
534
535
536
537
538
539
540
541
542
543
struct resolve_cb_context {
	struct rdma_dev_addr *addr;
	struct completion comp;
};

static void resolve_cb(int status, struct sockaddr *src_addr,
	     struct rdma_dev_addr *addr, void *context)
{
	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
				rdma_dev_addr));
	complete(&((struct resolve_cb_context *)context)->comp);
}

544
545
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
546
547
				 u8 *dmac, u16 *vlan_id, int *if_index,
				 int *hoplimit)
548
549
550
551
552
553
554
555
556
557
558
559
560
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	struct resolve_cb_context ctx;
	struct net_device *dev;

	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid_addr, dgid_addr;


561
562
	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
563
564

	memset(&dev_addr, 0, sizeof(dev_addr));
565
566
	if (if_index)
		dev_addr.bound_dev_if = *if_index;
567
	dev_addr.net = &init_net;
568
569
570
571
572
573
574
575
576
577
578
579
580
581

	ctx.addr = &dev_addr;
	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
			&dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
		return ret;

	wait_for_completion(&ctx.comp);

	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
	if (!dev)
		return -ENODEV;
582
583
	if (if_index)
		*if_index = dev_addr.bound_dev_if;
584
585
	if (vlan_id)
		*vlan_id = rdma_vlan_dev_vlan_id(dev);
586
587
	if (hoplimit)
		*hoplimit = dev_addr.hoplimit;
588
589
590
	dev_put(dev);
	return ret;
}
591
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
592
593
594
595
596
597
598
599
600
601
602

int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} gid_addr;

603
	rdma_gid2ip(&gid_addr._sockaddr, sgid);
604
605

	memset(&dev_addr, 0, sizeof(dev_addr));
606
	dev_addr.net = &init_net;
607
608
609
610
611
612
613
614
615
	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
	if (ret)
		return ret;

	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
	return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);

Roland Dreier's avatar
Roland Dreier committed
616
static int netevent_callback(struct notifier_block *self, unsigned long event,
617
	void *ctx)
618
{
Roland Dreier's avatar
Roland Dreier committed
619
	if (event == NETEVENT_NEIGH_UPDATE) {
620
		struct neighbour *neigh = ctx;
621

622
		if (neigh->nud_state & NUD_VALID) {
623
624
625
			set_timeout(jiffies);
		}
	}
626
627
628
	return 0;
}

629
630
static struct notifier_block nb = {
	.notifier_call = netevent_callback
631
632
};

633
int addr_init(void)
634
{
635
	addr_wq = create_singlethread_workqueue("ib_addr");
636
637
638
	if (!addr_wq)
		return -ENOMEM;

639
	register_netevent_notifier(&nb);
640
	rdma_addr_register_client(&self);
641
642
643
	return 0;
}

644
void addr_cleanup(void)
645
{
646
	rdma_addr_unregister_client(&self);
647
	unregister_netevent_notifier(&nb);
648
649
	destroy_workqueue(addr_wq);
}