neighbour.c 68.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 *	Generic address resolution entity
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
 *	Harald Welte		Add neighbour cache statistics like rtstat
 */

18
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
19
20
21
22
23
24
25
26
27
28
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
29
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
32
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
33
#include <net/netevent.h>
34
#include <net/netlink.h>
Linus Torvalds's avatar
Linus Torvalds committed
35
36
#include <linux/rtnetlink.h>
#include <linux/random.h>
37
#include <linux/string.h>
38
#include <linux/log2.h>
Linus Torvalds's avatar
Linus Torvalds committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

#define NEIGH_DEBUG 1

#define NEIGH_PRINTK(x...) printk(x)
#define NEIGH_NOPRINTK(x...) do { ; } while(0)
#define NEIGH_PRINTK0 NEIGH_PRINTK
#define NEIGH_PRINTK1 NEIGH_NOPRINTK
#define NEIGH_PRINTK2 NEIGH_NOPRINTK

#if NEIGH_DEBUG >= 1
#undef NEIGH_PRINTK1
#define NEIGH_PRINTK1 NEIGH_PRINTK
#endif
#if NEIGH_DEBUG >= 2
#undef NEIGH_PRINTK2
#define NEIGH_PRINTK2 NEIGH_PRINTK
#endif

#define PNEIGH_HASHMASK		0xF

static void neigh_timer_handler(unsigned long arg);
Thomas Graf's avatar
Thomas Graf committed
60
61
static void __neigh_notify(struct neighbour *n, int type, int flags);
static void neigh_update_notify(struct neighbour *neigh);
Linus Torvalds's avatar
Linus Torvalds committed
62
63
64
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);

static struct neigh_table *neigh_tables;
65
#ifdef CONFIG_PROC_FS
66
static const struct file_operations neigh_stat_seq_fops;
67
#endif
Linus Torvalds's avatar
Linus Torvalds committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.

   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
   list of neighbour tables. This list is used only in process context,
 */

static DEFINE_RWLOCK(neigh_tbl_lock);

static int neigh_blackhole(struct sk_buff *skb)
{
	kfree_skb(skb);
	return -ENETDOWN;
}

108
109
110
111
112
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);

Thomas Graf's avatar
Thomas Graf committed
113
	__neigh_notify(neigh, RTM_DELNEIGH, 0);
114
115
116
	neigh_release(neigh);
}

Linus Torvalds's avatar
Linus Torvalds committed
117
118
119
120
121
122
123
124
/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
Eric Dumazet's avatar
Eric Dumazet committed
125
	return base ? (net_random() % base) + (base >> 1) : 0;
Linus Torvalds's avatar
Linus Torvalds committed
126
}
127
EXPORT_SYMBOL(neigh_rand_reach_time);
Linus Torvalds's avatar
Linus Torvalds committed
128
129
130
131
132
133


static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0;
	int i;
134
	struct neigh_hash_table *nht;
Linus Torvalds's avatar
Linus Torvalds committed
135
136
137
138

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
139
140
141
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
	for (i = 0; i <= nht->hash_mask; i++) {
142
143
		struct neighbour *n;
		struct neighbour __rcu **np;
Linus Torvalds's avatar
Linus Torvalds committed
144

145
		np = &nht->hash_buckets[i];
146
147
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
Linus Torvalds's avatar
Linus Torvalds committed
148
149
150
151
152
153
154
			/* Neighbour record may be discarded if:
			 * - nobody refers to it.
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 &&
			    !(n->nud_state & NUD_PERMANENT)) {
155
156
157
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						  lockdep_is_held(&tbl->lock)));
Linus Torvalds's avatar
Linus Torvalds committed
158
159
160
				n->dead = 1;
				shrunk	= 1;
				write_unlock(&n->lock);
161
				neigh_cleanup_and_release(n);
Linus Torvalds's avatar
Linus Torvalds committed
162
163
164
165
166
167
168
169
170
171
172
173
174
175
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}

	tbl->last_flush = jiffies;

	write_unlock_bh(&tbl->lock);

	return shrunk;
}

176
177
178
179
180
181
182
183
184
185
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
	neigh_hold(n);
	if (unlikely(mod_timer(&n->timer, when))) {
		printk("NEIGH: BUG, double timer add, state is %x\n",
		       n->nud_state);
		dump_stack();
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
static int neigh_del_timer(struct neighbour *n)
{
	if ((n->nud_state & NUD_IN_TIMER) &&
	    del_timer(&n->timer)) {
		neigh_release(n);
		return 1;
	}
	return 0;
}

static void pneigh_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(list)) != NULL) {
		dev_put(skb->dev);
		kfree_skb(skb);
	}
}

206
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
207
208
{
	int i;
209
	struct neigh_hash_table *nht;
Linus Torvalds's avatar
Linus Torvalds committed
210

211
212
213
214
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

	for (i = 0; i <= nht->hash_mask; i++) {
215
216
		struct neighbour *n;
		struct neighbour __rcu **np = &nht->hash_buckets[i];
Linus Torvalds's avatar
Linus Torvalds committed
217

218
219
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
Linus Torvalds's avatar
Linus Torvalds committed
220
221
222
223
			if (dev && n->dev != dev) {
				np = &n->next;
				continue;
			}
224
225
226
			rcu_assign_pointer(*np,
				   rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
Linus Torvalds's avatar
Linus Torvalds committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
			write_lock(&n->lock);
			neigh_del_timer(n);
			n->dead = 1;

			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
				skb_queue_purge(&n->arp_queue);
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
				NEIGH_PRINTK2("neigh %p is stray.\n", n);
			}
			write_unlock(&n->lock);
250
			neigh_cleanup_and_release(n);
Linus Torvalds's avatar
Linus Torvalds committed
251
252
		}
	}
253
}
Linus Torvalds's avatar
Linus Torvalds committed
254

255
256
257
258
259
260
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}
261
EXPORT_SYMBOL(neigh_changeaddr);
262
263
264
265
266

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
Linus Torvalds's avatar
Linus Torvalds committed
267
268
269
270
271
272
273
	pneigh_ifdown(tbl, dev);
	write_unlock_bh(&tbl->lock);

	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}
274
EXPORT_SYMBOL(neigh_ifdown);
Linus Torvalds's avatar
Linus Torvalds committed
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) &&
		    entries >= tbl->gc_thresh3)
			goto out_entries;
	}

291
	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
292
293
294
295
296
	if (!n)
		goto out_entries;

	skb_queue_head_init(&n->arp_queue);
	rwlock_init(&n->lock);
297
	seqlock_init(&n->ha_lock);
Linus Torvalds's avatar
Linus Torvalds committed
298
299
300
301
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
	n->parms	  = neigh_parms_clone(&tbl->parms);
302
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
Linus Torvalds's avatar
Linus Torvalds committed
303
304
305
306
307
308
309
310
311
312
313
314
315

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

316
static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
Linus Torvalds's avatar
Linus Torvalds committed
317
{
318
319
320
	size_t size = entries * sizeof(struct neighbour *);
	struct neigh_hash_table *ret;
	struct neighbour **buckets;
Linus Torvalds's avatar
Linus Torvalds committed
321

322
323
324
325
326
327
328
329
330
331
332
333
	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
	if (!ret)
		return NULL;
	if (size <= PAGE_SIZE)
		buckets = kzalloc(size, GFP_ATOMIC);
	else
		buckets = (struct neighbour **)
			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
					   get_order(size));
	if (!buckets) {
		kfree(ret);
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
334
	}
335
	rcu_assign_pointer(ret->hash_buckets, buckets);
336
337
	ret->hash_mask = entries - 1;
	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
Linus Torvalds's avatar
Linus Torvalds committed
338
339
340
	return ret;
}

341
static void neigh_hash_free_rcu(struct rcu_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
342
{
343
344
345
346
347
	struct neigh_hash_table *nht = container_of(head,
						    struct neigh_hash_table,
						    rcu);
	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
	struct neighbour **buckets = nht->hash_buckets;
Linus Torvalds's avatar
Linus Torvalds committed
348
349

	if (size <= PAGE_SIZE)
350
		kfree(buckets);
Linus Torvalds's avatar
Linus Torvalds committed
351
	else
352
353
		free_pages((unsigned long)buckets, get_order(size));
	kfree(nht);
Linus Torvalds's avatar
Linus Torvalds committed
354
355
}

356
357
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
						unsigned long new_entries)
Linus Torvalds's avatar
Linus Torvalds committed
358
{
359
360
	unsigned int i, hash;
	struct neigh_hash_table *new_nht, *old_nht;
Linus Torvalds's avatar
Linus Torvalds committed
361
362
363

	NEIGH_CACHE_STAT_INC(tbl, hash_grows);

364
	BUG_ON(!is_power_of_2(new_entries));
365
366
367
368
369
	old_nht = rcu_dereference_protected(tbl->nht,
					    lockdep_is_held(&tbl->lock));
	new_nht = neigh_hash_alloc(new_entries);
	if (!new_nht)
		return old_nht;
Linus Torvalds's avatar
Linus Torvalds committed
370

371
	for (i = 0; i <= old_nht->hash_mask; i++) {
Linus Torvalds's avatar
Linus Torvalds committed
372
373
		struct neighbour *n, *next;

374
375
		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
						   lockdep_is_held(&tbl->lock));
376
377
378
379
		     n != NULL;
		     n = next) {
			hash = tbl->hash(n->primary_key, n->dev,
					 new_nht->hash_rnd);
Linus Torvalds's avatar
Linus Torvalds committed
380

381
			hash &= new_nht->hash_mask;
382
383
384
385
386
387
388
389
			next = rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock));

			rcu_assign_pointer(n->next,
					   rcu_dereference_protected(
						new_nht->hash_buckets[hash],
						lockdep_is_held(&tbl->lock)));
			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
Linus Torvalds's avatar
Linus Torvalds committed
390
391
392
		}
	}

393
394
395
	rcu_assign_pointer(tbl->nht, new_nht);
	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
	return new_nht;
Linus Torvalds's avatar
Linus Torvalds committed
396
397
398
399
400
401
402
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
	int key_len = tbl->key_len;
403
	u32 hash_val;
404
	struct neigh_hash_table *nht;
405

Linus Torvalds's avatar
Linus Torvalds committed
406
407
	NEIGH_CACHE_STAT_INC(tbl, lookups);

408
409
410
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411
412
413
414

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
Linus Torvalds's avatar
Linus Torvalds committed
415
		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416
417
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
418
419
420
421
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
422

423
	rcu_read_unlock_bh();
Linus Torvalds's avatar
Linus Torvalds committed
424
425
	return n;
}
426
EXPORT_SYMBOL(neigh_lookup);
Linus Torvalds's avatar
Linus Torvalds committed
427

428
429
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
				     const void *pkey)
Linus Torvalds's avatar
Linus Torvalds committed
430
431
432
{
	struct neighbour *n;
	int key_len = tbl->key_len;
433
	u32 hash_val;
434
	struct neigh_hash_table *nht;
Linus Torvalds's avatar
Linus Torvalds committed
435
436
437

	NEIGH_CACHE_STAT_INC(tbl, lookups);

438
439
440
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441
442
443
444

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
445
		if (!memcmp(n->primary_key, pkey, key_len) &&
446
		    net_eq(dev_net(n->dev), net)) {
447
448
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
449
450
451
452
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
453

454
	rcu_read_unlock_bh();
Linus Torvalds's avatar
Linus Torvalds committed
455
456
	return n;
}
457
EXPORT_SYMBOL(neigh_lookup_nodev);
Linus Torvalds's avatar
Linus Torvalds committed
458
459
460
461
462
463
464
465

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466
	struct neigh_hash_table *nht;
Linus Torvalds's avatar
Linus Torvalds committed
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

	write_lock_bh(&tbl->lock);
493
494
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
Linus Torvalds's avatar
Linus Torvalds committed
495

496
497
	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
Linus Torvalds's avatar
Linus Torvalds committed
498

499
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
Linus Torvalds's avatar
Linus Torvalds committed
500
501
502
503
504
505

	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

506
507
508
509
510
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
Linus Torvalds's avatar
Linus Torvalds committed
511
512
513
514
515
516
517
518
519
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
			neigh_hold(n1);
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
	neigh_hold(n);
520
521
522
523
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
Linus Torvalds's avatar
Linus Torvalds committed
524
525
526
527
528
529
530
531
532
533
534
	write_unlock_bh(&tbl->lock);
	NEIGH_PRINTK2("neigh %p is created.\n", n);
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
535
EXPORT_SYMBOL(neigh_create);
Linus Torvalds's avatar
Linus Torvalds committed
536

537
static u32 pneigh_hash(const void *pkey, int key_len)
538
539
540
541
542
543
{
	u32 hash_val = *(u32 *)(pkey + key_len - 4);
	hash_val ^= (hash_val >> 16);
	hash_val ^= hash_val >> 8;
	hash_val ^= hash_val >> 4;
	hash_val &= PNEIGH_HASHMASK;
544
545
	return hash_val;
}
546

547
548
549
550
551
552
553
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
					      struct net *net,
					      const void *pkey,
					      int key_len,
					      struct net_device *dev)
{
	while (n) {
554
		if (!memcmp(n->key, pkey, key_len) &&
555
		    net_eq(pneigh_net(n), net) &&
556
		    (n->dev == dev || !n->dev))
557
558
			return n;
		n = n->next;
559
	}
560
561
	return NULL;
}
562

563
564
565
566
567
568
569
570
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
		struct net *net, const void *pkey, struct net_device *dev)
{
	int key_len = tbl->key_len;
	u32 hash_val = pneigh_hash(pkey, key_len);

	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
				 net, pkey, key_len, dev);
571
}
572
EXPORT_SYMBOL_GPL(__pneigh_lookup);
573

574
575
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
				    struct net *net, const void *pkey,
Linus Torvalds's avatar
Linus Torvalds committed
576
577
578
579
				    struct net_device *dev, int creat)
{
	struct pneigh_entry *n;
	int key_len = tbl->key_len;
580
	u32 hash_val = pneigh_hash(pkey, key_len);
Linus Torvalds's avatar
Linus Torvalds committed
581
582

	read_lock_bh(&tbl->lock);
583
584
	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
			      net, pkey, key_len, dev);
Linus Torvalds's avatar
Linus Torvalds committed
585
	read_unlock_bh(&tbl->lock);
586
587

	if (n || !creat)
Linus Torvalds's avatar
Linus Torvalds committed
588
589
		goto out;

590
591
	ASSERT_RTNL();

Linus Torvalds's avatar
Linus Torvalds committed
592
593
594
595
	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
	if (!n)
		goto out;

Eric Dumazet's avatar
Eric Dumazet committed
596
	write_pnet(&n->net, hold_net(net));
Linus Torvalds's avatar
Linus Torvalds committed
597
598
599
600
601
602
603
604
	memcpy(n->key, pkey, key_len);
	n->dev = dev;
	if (dev)
		dev_hold(dev);

	if (tbl->pconstructor && tbl->pconstructor(n)) {
		if (dev)
			dev_put(dev);
605
		release_net(net);
Linus Torvalds's avatar
Linus Torvalds committed
606
607
608
609
610
611
612
613
614
615
616
617
		kfree(n);
		n = NULL;
		goto out;
	}

	write_lock_bh(&tbl->lock);
	n->next = tbl->phash_buckets[hash_val];
	tbl->phash_buckets[hash_val] = n;
	write_unlock_bh(&tbl->lock);
out:
	return n;
}
618
EXPORT_SYMBOL(pneigh_lookup);
Linus Torvalds's avatar
Linus Torvalds committed
619
620


621
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
Linus Torvalds's avatar
Linus Torvalds committed
622
623
624
625
		  struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	int key_len = tbl->key_len;
626
	u32 hash_val = pneigh_hash(pkey, key_len);
Linus Torvalds's avatar
Linus Torvalds committed
627
628
629
630

	write_lock_bh(&tbl->lock);
	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
	     np = &n->next) {
631
		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632
		    net_eq(pneigh_net(n), net)) {
Linus Torvalds's avatar
Linus Torvalds committed
633
634
635
636
637
638
			*np = n->next;
			write_unlock_bh(&tbl->lock);
			if (tbl->pdestructor)
				tbl->pdestructor(n);
			if (n->dev)
				dev_put(n->dev);
639
			release_net(pneigh_net(n));
Linus Torvalds's avatar
Linus Torvalds committed
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
			kfree(n);
			return 0;
		}
	}
	write_unlock_bh(&tbl->lock);
	return -ENOENT;
}

static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
		np = &tbl->phash_buckets[h];
		while ((n = *np) != NULL) {
			if (!dev || n->dev == dev) {
				*np = n->next;
				if (tbl->pdestructor)
					tbl->pdestructor(n);
				if (n->dev)
					dev_put(n->dev);
662
				release_net(pneigh_net(n));
Linus Torvalds's avatar
Linus Torvalds committed
663
664
665
666
667
668
669
670
671
				kfree(n);
				continue;
			}
			np = &n->next;
		}
	}
	return -ENOENT;
}

672
673
674
675
676
677
678
static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
	if (atomic_dec_and_test(&parms->refcnt))
		neigh_parms_destroy(parms);
}
Linus Torvalds's avatar
Linus Torvalds committed
679

680
681
682
683
684
685
static void neigh_destroy_rcu(struct rcu_head *head)
{
	struct neighbour *neigh = container_of(head, struct neighbour, rcu);

	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}
Linus Torvalds's avatar
Linus Torvalds committed
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
/*
 *	neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
	struct hh_cache *hh;

	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) {
		printk(KERN_WARNING
		       "Destroying alive neighbour %p\n", neigh);
		dump_stack();
		return;
	}

	if (neigh_del_timer(neigh))
		printk(KERN_WARNING "Impossible event.\n");

	while ((hh = neigh->hh) != NULL) {
		neigh->hh = hh->hh_next;
		hh->hh_next = NULL;
709
710

		write_seqlock_bh(&hh->hh_lock);
Linus Torvalds's avatar
Linus Torvalds committed
711
		hh->hh_output = neigh_blackhole;
712
		write_sequnlock_bh(&hh->hh_lock);
Eric Dumazet's avatar
Eric Dumazet committed
713
		hh_cache_put(hh);
Linus Torvalds's avatar
Linus Torvalds committed
714
715
716
717
718
719
720
721
722
723
	}

	skb_queue_purge(&neigh->arp_queue);

	dev_put(neigh->dev);
	neigh_parms_put(neigh->parms);

	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);

	atomic_dec(&neigh->tbl->entries);
724
	call_rcu(&neigh->rcu, neigh_destroy_rcu);
Linus Torvalds's avatar
Linus Torvalds committed
725
}
726
EXPORT_SYMBOL(neigh_destroy);
Linus Torvalds's avatar
Linus Torvalds committed
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
	struct hh_cache *hh;

	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

	neigh->output = neigh->ops->output;

	for (hh = neigh->hh; hh; hh = hh->hh_next)
		hh->hh_output = neigh->ops->output;
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
	struct hh_cache *hh;

	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);

	neigh->output = neigh->ops->connected_output;

	for (hh = neigh->hh; hh; hh = hh->hh_next)
		hh->hh_output = neigh->ops->hh_output;
}

762
static void neigh_periodic_work(struct work_struct *work)
Linus Torvalds's avatar
Linus Torvalds committed
763
{
764
	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
765
766
	struct neighbour *n;
	struct neighbour __rcu **np;
767
	unsigned int i;
768
	struct neigh_hash_table *nht;
Linus Torvalds's avatar
Linus Torvalds committed
769
770
771

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

772
	write_lock_bh(&tbl->lock);
773
774
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
Linus Torvalds's avatar
Linus Torvalds committed
775
776
777
778
779

	/*
	 *	periodically recompute ReachableTime from random function
	 */

780
	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
Linus Torvalds's avatar
Linus Torvalds committed
781
		struct neigh_parms *p;
782
		tbl->last_rand = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
783
784
785
786
787
		for (p = &tbl->parms; p; p = p->next)
			p->reachable_time =
				neigh_rand_reach_time(p->base_reachable_time);
	}

788
789
	for (i = 0 ; i <= nht->hash_mask; i++) {
		np = &nht->hash_buckets[i];
Linus Torvalds's avatar
Linus Torvalds committed
790

791
792
		while ((n = rcu_dereference_protected(*np,
				lockdep_is_held(&tbl->lock))) != NULL) {
793
			unsigned int state;
Linus Torvalds's avatar
Linus Torvalds committed
794

795
			write_lock(&n->lock);
Linus Torvalds's avatar
Linus Torvalds committed
796

797
798
799
800
801
			state = n->nud_state;
			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
				write_unlock(&n->lock);
				goto next_elt;
			}
Linus Torvalds's avatar
Linus Torvalds committed
802

803
804
			if (time_before(n->used, n->confirmed))
				n->used = n->confirmed;
Linus Torvalds's avatar
Linus Torvalds committed
805

806
807
808
809
810
811
812
813
814
			if (atomic_read(&n->refcnt) == 1 &&
			    (state == NUD_FAILED ||
			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
				*np = n->next;
				n->dead = 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
				continue;
			}
Linus Torvalds's avatar
Linus Torvalds committed
815
816
817
			write_unlock(&n->lock);

next_elt:
818
819
820
821
822
823
824
825
826
			np = &n->next;
		}
		/*
		 * It's fine to release lock here, even if hash table
		 * grows while we are preempted.
		 */
		write_unlock_bh(&tbl->lock);
		cond_resched();
		write_lock_bh(&tbl->lock);
Linus Torvalds's avatar
Linus Torvalds committed
827
	}
828
829
830
	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
	 * base_reachable_time.
Linus Torvalds's avatar
Linus Torvalds committed
831
	 */
832
833
834
	schedule_delayed_work(&tbl->gc_work,
			      tbl->parms.base_reachable_time >> 1);
	write_unlock_bh(&tbl->lock);
Linus Torvalds's avatar
Linus Torvalds committed
835
836
837
838
839
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
	struct neigh_parms *p = n->parms;
Eric Dumazet's avatar
Eric Dumazet committed
840
	return (n->nud_state & NUD_PROBE) ?
Linus Torvalds's avatar
Linus Torvalds committed
841
		p->ucast_probes :
Eric Dumazet's avatar
Eric Dumazet committed
842
		p->ucast_probes + p->app_probes + p->mcast_probes;
Linus Torvalds's avatar
Linus Torvalds committed
843
844
}

845
static void neigh_invalidate(struct neighbour *neigh)
846
847
	__releases(neigh->lock)
	__acquires(neigh->lock)
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
{
	struct sk_buff *skb;

	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
	neigh->updated = jiffies;

	/* It is very thin place. report_unreachable is very complicated
	   routine. Particularly, it can hit the same neighbour entry!

	   So that, we try to be accurate and avoid dead loop. --ANK
	 */
	while (neigh->nud_state == NUD_FAILED &&
	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
		write_unlock(&neigh->lock);
		neigh->ops->error_report(neigh, skb);
		write_lock(&neigh->lock);
	}
	skb_queue_purge(&neigh->arp_queue);
}

Linus Torvalds's avatar
Linus Torvalds committed
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
	unsigned state;
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

	if (!(state & NUD_IN_TIMER)) {
#ifndef CONFIG_SMP
		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
		goto out;
	}

	if (state & NUD_REACHABLE) {
892
		if (time_before_eq(now,
Linus Torvalds's avatar
Linus Torvalds committed
893
894
895
896
897
898
899
				   neigh->confirmed + neigh->parms->reachable_time)) {
			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
			neigh->nud_state = NUD_DELAY;
900
			neigh->updated = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
901
902
903
904
905
			neigh_suspect(neigh);
			next = now + neigh->parms->delay_probe_time;
		} else {
			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
			neigh->nud_state = NUD_STALE;
906
			neigh->updated = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
907
			neigh_suspect(neigh);
908
			notify = 1;
Linus Torvalds's avatar
Linus Torvalds committed
909
910
		}
	} else if (state & NUD_DELAY) {
911
		if (time_before_eq(now,
Linus Torvalds's avatar
Linus Torvalds committed
912
913
914
				   neigh->confirmed + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
915
			neigh->updated = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
916
			neigh_connect(neigh);
917
			notify = 1;
Linus Torvalds's avatar
Linus Torvalds committed
918
919
920
921
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
			neigh->nud_state = NUD_PROBE;
922
			neigh->updated = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
923
924
925
926
927
928
929
930
931
932
933
934
			atomic_set(&neigh->probes, 0);
			next = now + neigh->parms->retrans_time;
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
		next = now + neigh->parms->retrans_time;
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
935
		neigh_invalidate(neigh);
Linus Torvalds's avatar
Linus Torvalds committed
936
937
938
939
940
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
941
942
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
Linus Torvalds's avatar
Linus Torvalds committed
943
944
945
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
946
947
		/* keep skb alive even if arp_queue overflows */
		if (skb)
948
			skb = skb_copy(skb, GFP_ATOMIC);
949
		write_unlock(&neigh->lock);
Linus Torvalds's avatar
Linus Torvalds committed
950
951
		neigh->ops->solicit(neigh, skb);
		atomic_inc(&neigh->probes);
952
		kfree_skb(skb);
953
	} else {
954
out:
955
956
		write_unlock(&neigh->lock);
	}
Thomas Graf's avatar
Thomas Graf committed
957

958
	if (notify)
Thomas Graf's avatar
Thomas Graf committed
959
		neigh_update_notify(neigh);
Linus Torvalds's avatar
Linus Torvalds committed
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975

	neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
	unsigned long now;

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;

	now = jiffies;
976

Linus Torvalds's avatar
Linus Torvalds committed
977
978
979
980
	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
			neigh->nud_state     = NUD_INCOMPLETE;
981
			neigh->updated = jiffies;
982
			neigh_add_timer(neigh, now + 1);
Linus Torvalds's avatar
Linus Torvalds committed
983
984
		} else {
			neigh->nud_state = NUD_FAILED;
985
			neigh->updated = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
986
987
			write_unlock_bh(&neigh->lock);

988
			kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
989
990
991
992
993
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
		neigh->nud_state = NUD_DELAY;
994
		neigh->updated = jiffies;
995
996
		neigh_add_timer(neigh,
				jiffies + neigh->parms->delay_probe_time);
Linus Torvalds's avatar
Linus Torvalds committed
997
998
999
1000
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {