addr.c 15.4 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
Sean Hefty's avatar
Sean Hefty committed
7
8
9
10
11
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
12
 *
Sean Hefty's avatar
Sean Hefty committed
13
14
15
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
16
 *
Sean Hefty's avatar
Sean Hefty committed
17
18
19
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
20
 *
Sean Hefty's avatar
Sean Hefty committed
21
22
23
24
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
25
 *
Sean Hefty's avatar
Sean Hefty committed
26
27
28
29
30
31
32
33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
34
35
36
37
 */

#include <linux/mutex.h>
#include <linux/inetdevice.h>
38
#include <linux/slab.h>
39
#include <linux/workqueue.h>
40
#include <linux/module.h>
41
42
43
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
44
#include <net/netevent.h>
45
46
#include <net/addrconf.h>
#include <net/ip6_route.h>
47
#include <rdma/ib_addr.h>
48
#include <rdma/ib.h>
49
50
51
52
53
54
55

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");

struct addr_req {
	struct list_head list;
56
57
	struct sockaddr_storage src_addr;
	struct sockaddr_storage dst_addr;
58
	struct rdma_dev_addr *addr;
59
	struct rdma_addr_client *client;
60
61
62
63
64
65
66
	void *context;
	void (*callback)(int status, struct sockaddr *src_addr,
			 struct rdma_dev_addr *addr, void *context);
	unsigned long timeout;
	int status;
};

David Howells's avatar
David Howells committed
67
static void process_req(struct work_struct *work);
68
69
70

static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
David Howells's avatar
David Howells committed
71
static DECLARE_DELAYED_WORK(work, process_req);
72
73
static struct workqueue_struct *addr_wq;

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
int rdma_addr_size(struct sockaddr *addr)
{
	switch (addr->sa_family) {
	case AF_INET:
		return sizeof(struct sockaddr_in);
	case AF_INET6:
		return sizeof(struct sockaddr_in6);
	case AF_IB:
		return sizeof(struct sockaddr_ib);
	default:
		return 0;
	}
}
EXPORT_SYMBOL(rdma_addr_size);

89
90
static struct rdma_addr_client self;

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
void rdma_addr_register_client(struct rdma_addr_client *client)
{
	atomic_set(&client->refcount, 1);
	init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);

static inline void put_client(struct rdma_addr_client *client)
{
	if (atomic_dec_and_test(&client->refcount))
		complete(&client->comp);
}

void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
	put_client(client);
	wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);

Tom Tucker's avatar
Tom Tucker committed
111
112
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
		     const unsigned char *dst_dev_addr)
113
{
114
	dev_addr->dev_type = dev->type;
115
116
117
118
	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
	if (dst_dev_addr)
		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
119
	dev_addr->bound_dev_if = dev->ifindex;
120
121
	return 0;
}
Tom Tucker's avatar
Tom Tucker committed
122
EXPORT_SYMBOL(rdma_copy_addr);
123

124
125
int rdma_translate_ip(const struct sockaddr *addr,
		      struct rdma_dev_addr *dev_addr,
126
		      u16 *vlan_id)
127
128
{
	struct net_device *dev;
129
	int ret = -EADDRNOTAVAIL;
130

131
	if (dev_addr->bound_dev_if) {
132
		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
133
134
135
136
137
138
139
		if (!dev)
			return -ENODEV;
		ret = rdma_copy_addr(dev_addr, dev, NULL);
		dev_put(dev);
		return ret;
	}

140
141
	switch (addr->sa_family) {
	case AF_INET:
142
		dev = ip_dev_find(dev_addr->net,
143
			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
144
145
146

		if (!dev)
			return ret;
147

148
		ret = rdma_copy_addr(dev_addr, dev, NULL);
149
150
		if (vlan_id)
			*vlan_id = rdma_vlan_dev_vlan_id(dev);
151
152
		dev_put(dev);
		break;
153
#if IS_ENABLED(CONFIG_IPV6)
154
	case AF_INET6:
155
		rcu_read_lock();
156
157
		for_each_netdev_rcu(dev_addr->net, dev) {
			if (ipv6_chk_addr(dev_addr->net,
158
					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
159
160
					  dev, 1)) {
				ret = rdma_copy_addr(dev_addr, dev, NULL);
161
162
				if (vlan_id)
					*vlan_id = rdma_vlan_dev_vlan_id(dev);
163
164
165
				break;
			}
		}
166
		rcu_read_unlock();
167
		break;
168
#endif
169
	}
170
171
172
173
174
175
176
177
178
	return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	delay = time - jiffies;
179
180
	if ((long)delay < 0)
		delay = 0;
181

182
	mod_delayed_work(addr_wq, &work, delay);
183
184
185
186
187
188
189
190
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_reverse(temp_req, &req_list, list) {
191
		if (time_after_eq(req->timeout, temp_req->timeout))
192
193
194
195
196
197
198
199
200
201
			break;
	}

	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
	mutex_unlock(&lock);
}

202
203
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
			const void *daddr)
204
205
206
207
{
	struct neighbour *n;
	int ret;

208
209
	n = dst_neigh_lookup(dst, daddr);

210
211
212
213
214
215
	rcu_read_lock();
	if (!n || !(n->nud_state & NUD_VALID)) {
		if (n)
			neigh_event_send(n, NULL);
		ret = -ENODATA;
	} else {
216
		ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
217
218
219
	}
	rcu_read_unlock();

220
221
222
	if (n)
		neigh_release(n);

223
224
225
	return ret;
}

226
static int addr4_resolve(struct sockaddr_in *src_in,
227
228
229
			 const struct sockaddr_in *dst_in,
			 struct rdma_dev_addr *addr,
			 struct rtable **prt)
230
{
231
232
	__be32 src_ip = src_in->sin_addr.s_addr;
	__be32 dst_ip = dst_in->sin_addr.s_addr;
233
	struct rtable *rt;
234
	struct flowi4 fl4;
235
236
	int ret;

237
238
239
240
	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = dst_ip;
	fl4.saddr = src_ip;
	fl4.flowi4_oif = addr->bound_dev_if;
241
	rt = ip_route_output_key(addr->net, &fl4);
242
243
	if (IS_ERR(rt)) {
		ret = PTR_ERR(rt);
244
		goto out;
245
	}
246
	src_in->sin_family = AF_INET;
247
	src_in->sin_addr.s_addr = fl4.saddr;
248

249
250
251
252
253
254
	/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
	 * routable) and we could set the network type accordingly.
	 */
	if (rt->rt_uses_gateway)
		addr->network = RDMA_NETWORK_IPV4;

255
256
	*prt = rt;
	return 0;
257
258
259
260
out:
	return ret;
}

261
#if IS_ENABLED(CONFIG_IPV6)
Sean Hefty's avatar
Sean Hefty committed
262
static int addr6_resolve(struct sockaddr_in6 *src_in,
263
264
265
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
266
{
267
	struct flowi6 fl6;
268
	struct dst_entry *dst;
269
	struct rt6_info *rt;
Sean Hefty's avatar
Sean Hefty committed
270
	int ret;
271

272
	memset(&fl6, 0, sizeof fl6);
Alexey Dobriyan's avatar
Alexey Dobriyan committed
273
274
	fl6.daddr = dst_in->sin6_addr;
	fl6.saddr = src_in->sin6_addr;
275
	fl6.flowi6_oif = addr->bound_dev_if;
276

277
	dst = ip6_route_output(addr->net, NULL, &fl6);
Sean Hefty's avatar
Sean Hefty committed
278
279
280
	if ((ret = dst->error))
		goto put;

281
	rt = (struct rt6_info *)dst;
282
	if (ipv6_addr_any(&fl6.saddr)) {
283
		ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
284
					 &fl6.daddr, 0, &fl6.saddr);
Sean Hefty's avatar
Sean Hefty committed
285
286
		if (ret)
			goto put;
287

Sean Hefty's avatar
Sean Hefty committed
288
		src_in->sin6_family = AF_INET6;
Alexey Dobriyan's avatar
Alexey Dobriyan committed
289
		src_in->sin6_addr = fl6.saddr;
Sean Hefty's avatar
Sean Hefty committed
290
291
	}

292
293
294
295
296
297
	/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
	 * routable) and we could set the network type accordingly.
	 */
	if (rt->rt6i_flags & RTF_GATEWAY)
		addr->network = RDMA_NETWORK_IPV6;

298
299
	*pdst = dst;
	return 0;
Sean Hefty's avatar
Sean Hefty committed
300
put:
301
302
303
	dst_release(dst);
	return ret;
}
304
#else
Sean Hefty's avatar
Sean Hefty committed
305
static int addr6_resolve(struct sockaddr_in6 *src_in,
306
307
308
			 const struct sockaddr_in6 *dst_in,
			 struct rdma_dev_addr *addr,
			 struct dst_entry **pdst)
309
310
311
312
{
	return -EADDRNOTAVAIL;
}
#endif
313

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
static int addr_resolve_neigh(struct dst_entry *dst,
			      const struct sockaddr *dst_in,
			      struct rdma_dev_addr *addr)
{
	if (dst->dev->flags & IFF_LOOPBACK) {
		int ret;

		ret = rdma_translate_ip(dst_in, addr, NULL);
		if (!ret)
			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
			       MAX_ADDR_LEN);

		return ret;
	}

	/* If the device doesn't do ARP internally */
	if (!(dst->dev->flags & IFF_NOARP)) {
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		return dst_fetch_ha(dst, addr,
				    dst_in->sa_family == AF_INET ?
				    (const void *)&dst_in4->sin_addr.s_addr :
				    (const void *)&dst_in6->sin6_addr);
	}

	return rdma_copy_addr(addr, dst->dev, NULL);
}

345
static int addr_resolve(struct sockaddr *src_in,
346
347
348
			const struct sockaddr *dst_in,
			struct rdma_dev_addr *addr,
			bool resolve_neigh)
349
{
350
351
352
353
	struct net_device *ndev;
	struct dst_entry *dst;
	int ret;

354
	if (src_in->sa_family == AF_INET) {
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
		struct rtable *rt = NULL;
		const struct sockaddr_in *dst_in4 =
			(const struct sockaddr_in *)dst_in;

		ret = addr4_resolve((struct sockaddr_in *)src_in,
				    dst_in4, addr, &rt);
		if (ret)
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(&rt->dst, dst_in, addr);

		ndev = rt->dst.dev;
		dev_hold(ndev);

		ip_rt_put(rt);
	} else {
		const struct sockaddr_in6 *dst_in6 =
			(const struct sockaddr_in6 *)dst_in;

		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
				    dst_in6, addr,
				    &dst);
		if (ret)
			return ret;

		if (resolve_neigh)
			ret = addr_resolve_neigh(dst, dst_in, addr);

		ndev = dst->dev;
		dev_hold(ndev);

		dst_release(dst);
	}

	addr->bound_dev_if = ndev->ifindex;
	addr->net = dev_net(ndev);
	dev_put(ndev);

	return ret;
395
396
}

David Howells's avatar
David Howells committed
397
static void process_req(struct work_struct *work)
398
399
{
	struct addr_req *req, *temp_req;
400
	struct sockaddr *src_in, *dst_in;
401
402
403
404
405
406
	struct list_head done_list;

	INIT_LIST_HEAD(&done_list);

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
407
		if (req->status == -ENODATA) {
408
409
			src_in = (struct sockaddr *) &req->src_addr;
			dst_in = (struct sockaddr *) &req->dst_addr;
410
411
			req->status = addr_resolve(src_in, dst_in, req->addr,
						   true);
412
413
414
415
			if (req->status && time_after_eq(jiffies, req->timeout))
				req->status = -ETIMEDOUT;
			else if (req->status == -ENODATA)
				continue;
416
		}
417
		list_move_tail(&req->list, &done_list);
418
419
420
421
422
423
424
425
426
427
	}

	if (!list_empty(&req_list)) {
		req = list_entry(req_list.next, struct addr_req, list);
		set_timeout(req->timeout);
	}
	mutex_unlock(&lock);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
428
429
		req->callback(req->status, (struct sockaddr *) &req->src_addr,
			req->addr, req->context);
430
		put_client(req->client);
431
432
433
434
		kfree(req);
	}
}

435
436
int rdma_resolve_ip(struct rdma_addr_client *client,
		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
437
438
439
440
441
		    struct rdma_dev_addr *addr, int timeout_ms,
		    void (*callback)(int status, struct sockaddr *src_addr,
				     struct rdma_dev_addr *addr, void *context),
		    void *context)
{
442
	struct sockaddr *src_in, *dst_in;
443
444
445
	struct addr_req *req;
	int ret = 0;

446
	req = kzalloc(sizeof *req, GFP_KERNEL);
447
448
449
	if (!req)
		return -ENOMEM;

450
451
452
453
454
455
456
457
458
	src_in = (struct sockaddr *) &req->src_addr;
	dst_in = (struct sockaddr *) &req->dst_addr;

	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family) {
			ret = -EINVAL;
			goto err;
		}

459
		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
460
461
462
463
	} else {
		src_in->sa_family = dst_addr->sa_family;
	}

464
	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
465
466
467
	req->addr = addr;
	req->callback = callback;
	req->context = context;
468
469
	req->client = client;
	atomic_inc(&client->refcount);
470

471
	req->status = addr_resolve(src_in, dst_in, addr, true);
472
473
474
475
476
477
478
479
480
481
482
	switch (req->status) {
	case 0:
		req->timeout = jiffies;
		queue_req(req);
		break;
	case -ENODATA:
		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
		queue_req(req);
		break;
	default:
		ret = req->status;
483
		atomic_dec(&client->refcount);
484
		goto err;
485
486
	}
	return ret;
487
488
489
err:
	kfree(req);
	return ret;
490
491
492
}
EXPORT_SYMBOL(rdma_resolve_ip);

493
494
495
496
497
498
499
int rdma_resolve_ip_route(struct sockaddr *src_addr,
			  const struct sockaddr *dst_addr,
			  struct rdma_dev_addr *addr)
{
	struct sockaddr_storage ssrc_addr = {};
	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;

500
501
502
	if (src_addr) {
		if (src_addr->sa_family != dst_addr->sa_family)
			return -EINVAL;
503
504

		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
505
	} else {
506
		src_in->sa_family = dst_addr->sa_family;
507
	}
508
509
510
511
512

	return addr_resolve(src_in, dst_addr, addr, false);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

513
514
515
516
517
518
519
520
521
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
	struct addr_req *req, *temp_req;

	mutex_lock(&lock);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
522
			list_move(&req->list, &req_list);
523
524
525
526
527
528
529
530
			set_timeout(req->timeout);
			break;
		}
	}
	mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);

531
532
533
534
535
536
537
538
539
540
541
542
543
struct resolve_cb_context {
	struct rdma_dev_addr *addr;
	struct completion comp;
};

static void resolve_cb(int status, struct sockaddr *src_addr,
	     struct rdma_dev_addr *addr, void *context)
{
	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
				rdma_dev_addr));
	complete(&((struct resolve_cb_context *)context)->comp);
}

544
545
546
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
				 const union ib_gid *dgid,
				 u8 *dmac, u16 *vlan_id, int *if_index)
547
548
549
550
551
552
553
554
555
556
557
558
559
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	struct resolve_cb_context ctx;
	struct net_device *dev;

	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} sgid_addr, dgid_addr;


560
561
	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
562
563

	memset(&dev_addr, 0, sizeof(dev_addr));
564
565
	if (if_index)
		dev_addr.bound_dev_if = *if_index;
566
	dev_addr.net = &init_net;
567
568
569
570
571
572
573
574
575
576
577
578
579
580

	ctx.addr = &dev_addr;
	init_completion(&ctx.comp);
	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
			&dev_addr, 1000, resolve_cb, &ctx);
	if (ret)
		return ret;

	wait_for_completion(&ctx.comp);

	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
	if (!dev)
		return -ENODEV;
581
582
	if (if_index)
		*if_index = dev_addr.bound_dev_if;
583
584
585
586
587
	if (vlan_id)
		*vlan_id = rdma_vlan_dev_vlan_id(dev);
	dev_put(dev);
	return ret;
}
588
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
589
590
591
592
593
594
595
596
597
598
599

int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
	int ret = 0;
	struct rdma_dev_addr dev_addr;
	union {
		struct sockaddr     _sockaddr;
		struct sockaddr_in  _sockaddr_in;
		struct sockaddr_in6 _sockaddr_in6;
	} gid_addr;

600
	rdma_gid2ip(&gid_addr._sockaddr, sgid);
601
602

	memset(&dev_addr, 0, sizeof(dev_addr));
603
	dev_addr.net = &init_net;
604
605
606
607
608
609
610
611
612
	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
	if (ret)
		return ret;

	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
	return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);

Roland Dreier's avatar
Roland Dreier committed
613
static int netevent_callback(struct notifier_block *self, unsigned long event,
614
	void *ctx)
615
{
Roland Dreier's avatar
Roland Dreier committed
616
	if (event == NETEVENT_NEIGH_UPDATE) {
617
		struct neighbour *neigh = ctx;
618

619
		if (neigh->nud_state & NUD_VALID) {
620
621
622
			set_timeout(jiffies);
		}
	}
623
624
625
	return 0;
}

626
627
static struct notifier_block nb = {
	.notifier_call = netevent_callback
628
629
};

630
static int __init addr_init(void)
631
{
632
	addr_wq = create_singlethread_workqueue("ib_addr");
633
634
635
	if (!addr_wq)
		return -ENOMEM;

636
	register_netevent_notifier(&nb);
637
	rdma_addr_register_client(&self);
638
639
640
	return 0;
}

641
static void __exit addr_cleanup(void)
642
{
643
	rdma_addr_unregister_client(&self);
644
	unregister_netevent_notifier(&nb);
645
646
647
648
649
	destroy_workqueue(addr_wq);
}

module_init(addr_init);
module_exit(addr_cleanup);