dev.c 142 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
/*
 * 	NET3	Protocol independent device support routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 *	Derived from the non IP parts of dev.c 1.0.19
10
 * 		Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
 *
 *	Additional Authors:
 *		Florian la Roche <rzsfl@rz.uni-sb.de>
 *		Alan Cox <gw4pts@gw4pts.ampr.org>
 *		David Hinds <dahinds@users.sourceforge.net>
 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 *		Adam Sulmicki <adam@cfar.umd.edu>
 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 *
 *	Changes:
 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
 *              			to 2 if register_netdev gets called
 *              			before net_dev_init & also removed a
 *              			few lines of code in the process.
 *		Alan Cox	:	device private ioctl copies fields back.
 *		Alan Cox	:	Transmit queue code does relevant
 *					stunts to keep the queue safe.
 *		Alan Cox	:	Fixed double lock.
 *		Alan Cox	:	Fixed promisc NULL pointer trap
 *		????????	:	Support the full private ioctl range
 *		Alan Cox	:	Moved ioctl permission check into
 *					drivers
 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
 *		Alan Cox	:	100 backlog just doesn't cut it when
 *					you start doing multicast video 8)
 *		Alan Cox	:	Rewrote net_bh and list manager.
 *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
 *		Alan Cox	:	Took out transmit every packet pass
 *					Saved a few bytes in the ioctl handler
 *		Alan Cox	:	Network driver sets packet type before
 *					calling netif_rx. Saves a function
 *					call a packet.
 *		Alan Cox	:	Hashed net_bh()
 *		Richard Kooijman:	Timestamp fixes.
 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
 *		Alan Cox	:	Device lock protection.
 *		Alan Cox	: 	Fixed nasty side effect of device close
 *					changes.
 *		Rudi Cilibrasi	:	Pass the right thing to
 *					set_mac_address()
 *		Dave Miller	:	32bit quantity for the device lock to
 *					make it work out on a Sparc.
 *		Bjorn Ekwall	:	Added KERNELD hack.
 *		Alan Cox	:	Cleaned up the backlog initialise.
 *		Craig Metz	:	SIOCGIFCONF fix if space for under
 *					1 device.
 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
 *					is no device open function.
 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
 *		Cyrus Durgin	:	Cleaned for KMOD
 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
 *					A network device unload needs to purge
 *					the backlog queue.
 *	Paul Rusty Russell	:	SIOCSIFNAME
 *              Pekka Riikonen  :	Netdev boot-time settings code
 *              Andrew Morton   :       Make unregister_netdevice wait
 *              			indefinitely on dev->refcnt
 * 		J Hadi Salim	:	- Backlog queue sampling
 *				        - netif_rx() feedback
 */

#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
78
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
79
80
81
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
82
#include <linux/hash.h>
Linus Torvalds's avatar
Linus Torvalds committed
83
#include <linux/sched.h>
Arjan van de Ven's avatar
Arjan van de Ven committed
84
#include <linux/mutex.h>
Linus Torvalds's avatar
Linus Torvalds committed
85
86
87
88
89
90
91
92
93
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
94
#include <linux/ethtool.h>
Linus Torvalds's avatar
Linus Torvalds committed
95
96
#include <linux/notifier.h>
#include <linux/skbuff.h>
97
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
98
99
100
101
102
103
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/if_bridge.h>
Patrick McHardy's avatar
Patrick McHardy committed
104
#include <linux/if_macvlan.h>
Linus Torvalds's avatar
Linus Torvalds committed
105
106
107
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
108
#include <net/xfrm.h>
Linus Torvalds's avatar
Linus Torvalds committed
109
110
111
112
113
114
115
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/module.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
116
#include <net/wext.h>
Linus Torvalds's avatar
Linus Torvalds committed
117
118
#include <net/iw_handler.h>
#include <asm/current.h>
Steve Grubb's avatar
Steve Grubb committed
119
#include <linux/audit.h>
120
#include <linux/dmaengine.h>
121
#include <linux/err.h>
122
#include <linux/ctype.h>
123
#include <linux/if_arp.h>
124
#include <linux/if_vlan.h>
125
#include <linux/ip.h>
126
#include <net/ip.h>
127
128
#include <linux/ipv6.h>
#include <linux/in.h>
129
130
#include <linux/jhash.h>
#include <linux/random.h>
131
#include <trace/events/napi.h>
Linus Torvalds's avatar
Linus Torvalds committed
132

133
134
#include "net-sysfs.h"

135
136
137
/* Instead of increasing this, you should create a hash table. */
#define MAX_GRO_SKBS 8

Herbert Xu's avatar
Herbert Xu committed
138
139
140
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)

Linus Torvalds's avatar
Linus Torvalds committed
141
142
143
144
145
146
147
148
149
150
151
/*
 *	The list of packet types we will receive (as opposed to discard)
 *	and the routines to invoke.
 *
 *	Why 16. Because with 16 the only overlap we get on a hash of the
 *	low nibble of the protocol value is RARP/SNAP/X.25.
 *
 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 *             sure which should go first, but I bet it won't make much
 *             difference if we are running VLANs.  The good news is that
 *             this protocol won't be in the list unless compiled in, so
Stephen Hemminger's avatar
Stephen Hemminger committed
152
 *             the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds's avatar
Linus Torvalds committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
 *             --BLG
 *
 *		0800	IP
 *		8100    802.1Q VLAN
 *		0001	802.3
 *		0002	AX.25
 *		0004	802.2
 *		8035	RARP
 *		0005	SNAP
 *		0805	X.25
 *		0806	ARP
 *		8137	IPX
 *		0009	Localtalk
 *		86DD	IPv6
 */

169
170
171
#define PTYPE_HASH_SIZE	(16)
#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)

Linus Torvalds's avatar
Linus Torvalds committed
172
static DEFINE_SPINLOCK(ptype_lock);
173
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
174
static struct list_head ptype_all __read_mostly;	/* Taps */
Linus Torvalds's avatar
Linus Torvalds committed
175
176

/*
177
 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds's avatar
Linus Torvalds committed
178
179
 * semaphore.
 *
180
 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds's avatar
Linus Torvalds committed
181
182
 *
 * Writers must hold the rtnl semaphore while they loop through the
183
 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
 * actual updates.  This allows pure readers to access the list even
 * while a writer is preparing to update it.
 *
 * To put it another way, dev_base_lock is held for writing only to
 * protect against pure readers; the rtnl semaphore provides the
 * protection against other writers.
 *
 * See, for example usages, register_netdevice() and
 * unregister_netdevice(), which must be called with the rtnl
 * semaphore held.
 */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);

198
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
199
200
{
	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
201
	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds's avatar
Linus Torvalds committed
202
203
}

204
static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds's avatar
Linus Torvalds committed
205
{
206
	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds's avatar
Linus Torvalds committed
207
208
}

209
210
211
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
212
	struct net *net = dev_net(dev);
213
214
215
216

	ASSERT_RTNL();

	write_lock_bh(&dev_base_lock);
217
	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
218
	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
219
220
	hlist_add_head_rcu(&dev->index_hlist,
			   dev_index_hash(net, dev->ifindex));
221
222
223
224
	write_unlock_bh(&dev_base_lock);
	return 0;
}

225
226
227
/* Device list removal
 * caller must respect a RCU grace period before freeing/reusing dev
 */
228
229
230
231
232
233
static void unlist_netdevice(struct net_device *dev)
{
	ASSERT_RTNL();

	/* Unlink dev from the device chain */
	write_lock_bh(&dev_base_lock);
234
	list_del_rcu(&dev->dev_list);
235
	hlist_del_rcu(&dev->name_hlist);
236
	hlist_del_rcu(&dev->index_hlist);
237
238
239
	write_unlock_bh(&dev_base_lock);
}

Linus Torvalds's avatar
Linus Torvalds committed
240
241
242
243
/*
 *	Our notifier list
 */

244
static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds's avatar
Linus Torvalds committed
245
246
247
248
249

/*
 *	Device drivers call our routines to queue packets here. We empty the
 *	queue in the local softnet handler.
 */
250
251

DEFINE_PER_CPU(struct softnet_data, softnet_data);
Eric Dumazet's avatar
Eric Dumazet committed
252
EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds's avatar
Linus Torvalds committed
253

254
#ifdef CONFIG_LOCKDEP
255
/*
256
 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
 * according to dev->type
 */
static const unsigned short netdev_lock_type[] =
	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
273
	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
274
	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
275
	 ARPHRD_VOID, ARPHRD_NONE};
276

277
static const char *const netdev_lock_name[] =
278
279
280
281
282
283
284
285
286
287
288
289
290
	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
291
	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
292
	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
293
	 "_xmit_VOID", "_xmit_NONE"};
294
295

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
296
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
297
298
299
300
301
302
303
304
305
306
307
308

static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
		if (netdev_lock_type[i] == dev_type)
			return i;
	/* the last key is used by default */
	return ARRAY_SIZE(netdev_lock_type) - 1;
}

309
310
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
						 unsigned short dev_type)
311
312
313
314
315
316
317
{
	int i;

	i = netdev_lock_pos(dev_type);
	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
				   netdev_lock_name[i]);
}
318
319
320
321
322
323
324
325
326
327

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
	int i;

	i = netdev_lock_pos(dev->type);
	lockdep_set_class_and_name(&dev->addr_list_lock,
				   &netdev_addr_lock_key[i],
				   netdev_lock_name[i]);
}
328
#else
329
330
331
332
333
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
						 unsigned short dev_type)
{
}
static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
334
335
336
{
}
#endif
Linus Torvalds's avatar
Linus Torvalds committed
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

/*******************************************************************************

		Protocol management and registration routines

*******************************************************************************/

/*
 *	Add a protocol ID to the list. Now that the input handler is
 *	smarter we can dispense with all the messy stuff that used to be
 *	here.
 *
 *	BEWARE!!! Protocol handlers, mangling input packets,
 *	MUST BE last in hash buckets and checking protocol handlers
 *	MUST start from promiscuous ptype_all chain in net_bh.
 *	It is true now, do not change it.
 *	Explanation follows: if protocol handler, mangling packet, will
 *	be the first on list, it is not able to sense, that packet
 *	is cloned and should be copied-on-write, so that it will
 *	change it and subsequent readers will get broken packet.
 *							--ANK (980803)
 */

/**
 *	dev_add_pack - add packet handler
 *	@pt: packet type declaration
 *
 *	Add a protocol handler to the networking stack. The passed &packet_type
 *	is linked into kernel lists and may not be freed until it has been
 *	removed from the kernel lists.
 *
368
 *	This call does not sleep therefore it can not
Linus Torvalds's avatar
Linus Torvalds committed
369
370
371
372
373
374
375
376
377
 *	guarantee all CPU's that are in middle of receiving packets
 *	will see the new packet type (until the next received packet).
 */

void dev_add_pack(struct packet_type *pt)
{
	int hash;

	spin_lock_bh(&ptype_lock);
378
	if (pt->type == htons(ETH_P_ALL))
Linus Torvalds's avatar
Linus Torvalds committed
379
		list_add_rcu(&pt->list, &ptype_all);
380
	else {
381
		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
382
383
384
385
		list_add_rcu(&pt->list, &ptype_base[hash]);
	}
	spin_unlock_bh(&ptype_lock);
}
Eric Dumazet's avatar
Eric Dumazet committed
386
EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds's avatar
Linus Torvalds committed
387
388
389
390
391
392
393
394

/**
 *	__dev_remove_pack	 - remove packet handler
 *	@pt: packet type declaration
 *
 *	Remove a protocol handler that was previously added to the kernel
 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 *	from the kernel lists and can be freed or reused once this function
395
 *	returns.
Linus Torvalds's avatar
Linus Torvalds committed
396
397
398
399
400
401
402
403
404
405
406
407
 *
 *      The packet type might still be in use by receivers
 *	and must not be freed until after all the CPU's have gone
 *	through a quiescent state.
 */
void __dev_remove_pack(struct packet_type *pt)
{
	struct list_head *head;
	struct packet_type *pt1;

	spin_lock_bh(&ptype_lock);

408
	if (pt->type == htons(ETH_P_ALL))
Linus Torvalds's avatar
Linus Torvalds committed
409
		head = &ptype_all;
410
	else
411
		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds's avatar
Linus Torvalds committed
412
413
414
415
416
417
418
419
420
421
422
423

	list_for_each_entry(pt1, head, list) {
		if (pt == pt1) {
			list_del_rcu(&pt->list);
			goto out;
		}
	}

	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
	spin_unlock_bh(&ptype_lock);
}
Eric Dumazet's avatar
Eric Dumazet committed
424
425
EXPORT_SYMBOL(__dev_remove_pack);

Linus Torvalds's avatar
Linus Torvalds committed
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
/**
 *	dev_remove_pack	 - remove packet handler
 *	@pt: packet type declaration
 *
 *	Remove a protocol handler that was previously added to the kernel
 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 *	from the kernel lists and can be freed or reused once this function
 *	returns.
 *
 *	This call sleeps to guarantee that no CPU is looking at the packet
 *	type after return.
 */
void dev_remove_pack(struct packet_type *pt)
{
	__dev_remove_pack(pt);
441

Linus Torvalds's avatar
Linus Torvalds committed
442
443
	synchronize_net();
}
Eric Dumazet's avatar
Eric Dumazet committed
444
EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds's avatar
Linus Torvalds committed
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

/******************************************************************************

		      Device Boot-time Settings Routines

*******************************************************************************/

/* Boot time configuration table */
static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];

/**
 *	netdev_boot_setup_add	- add new setup entry
 *	@name: name of the device
 *	@map: configured settings for the device
 *
 *	Adds new setup entry to the dev_boot_setup list.  The function
 *	returns 0 on error and 1 on success.  This is a generic routine to
 *	all netdevices.
 */
static int netdev_boot_setup_add(char *name, struct ifmap *map)
{
	struct netdev_boot_setup *s;
	int i;

	s = dev_boot_setup;
	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
			memset(s[i].name, 0, sizeof(s[i].name));
473
			strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds's avatar
Linus Torvalds committed
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
			memcpy(&s[i].map, map, sizeof(s[i].map));
			break;
		}
	}

	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
}

/**
 *	netdev_boot_setup_check	- check boot time settings
 *	@dev: the netdevice
 *
 * 	Check boot time settings for the device.
 *	The found settings are set for the device to be used
 *	later in the device probing.
 *	Returns 0 if no settings found, 1 if they are.
 */
int netdev_boot_setup_check(struct net_device *dev)
{
	struct netdev_boot_setup *s = dev_boot_setup;
	int i;

	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
498
		    !strcmp(dev->name, s[i].name)) {
Linus Torvalds's avatar
Linus Torvalds committed
499
500
501
502
503
504
505
506
507
			dev->irq 	= s[i].map.irq;
			dev->base_addr 	= s[i].map.base_addr;
			dev->mem_start 	= s[i].map.mem_start;
			dev->mem_end 	= s[i].map.mem_end;
			return 1;
		}
	}
	return 0;
}
Eric Dumazet's avatar
Eric Dumazet committed
508
EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532


/**
 *	netdev_boot_base	- get address from boot time settings
 *	@prefix: prefix for network device
 *	@unit: id for network device
 *
 * 	Check boot time settings for the base address of device.
 *	The found settings are set for the device to be used
 *	later in the device probing.
 *	Returns 0 if no settings found.
 */
unsigned long netdev_boot_base(const char *prefix, int unit)
{
	const struct netdev_boot_setup *s = dev_boot_setup;
	char name[IFNAMSIZ];
	int i;

	sprintf(name, "%s%d", prefix, unit);

	/*
	 * If device already registered then return base of 1
	 * to indicate not to probe for this interface
	 */
533
	if (__dev_get_by_name(&init_net, name))
Linus Torvalds's avatar
Linus Torvalds committed
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
		return 1;

	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
		if (!strcmp(name, s[i].name))
			return s[i].map.base_addr;
	return 0;
}

/*
 * Saves at boot time configured settings for any netdevice.
 */
int __init netdev_boot_setup(char *str)
{
	int ints[5];
	struct ifmap map;

	str = get_options(str, ARRAY_SIZE(ints), ints);
	if (!str || !*str)
		return 0;

	/* Save settings */
	memset(&map, 0, sizeof(map));
	if (ints[0] > 0)
		map.irq = ints[1];
	if (ints[0] > 1)
		map.base_addr = ints[2];
	if (ints[0] > 2)
		map.mem_start = ints[3];
	if (ints[0] > 3)
		map.mem_end = ints[4];

	/* Add new entry to the list */
	return netdev_boot_setup_add(str, &map);
}

__setup("netdev=", netdev_boot_setup);

/*******************************************************************************

			    Device Interface Subroutines

*******************************************************************************/

/**
 *	__dev_get_by_name	- find a device by its name
579
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
580
581
582
583
584
585
586
587
588
 *	@name: name to find
 *
 *	Find an interface by name. Must be called under RTNL semaphore
 *	or @dev_base_lock. If the name is found a pointer to the device
 *	is returned. If the name is not found then %NULL is returned. The
 *	reference counters are not incremented so the caller must be
 *	careful with locks.
 */

589
struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
590
591
{
	struct hlist_node *p;
Eric Dumazet's avatar
Eric Dumazet committed
592
593
	struct net_device *dev;
	struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds's avatar
Linus Torvalds committed
594

Eric Dumazet's avatar
Eric Dumazet committed
595
	hlist_for_each_entry(dev, p, head, name_hlist)
Linus Torvalds's avatar
Linus Torvalds committed
596
597
		if (!strncmp(dev->name, name, IFNAMSIZ))
			return dev;
Eric Dumazet's avatar
Eric Dumazet committed
598

Linus Torvalds's avatar
Linus Torvalds committed
599
600
	return NULL;
}
Eric Dumazet's avatar
Eric Dumazet committed
601
EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds's avatar
Linus Torvalds committed
602

603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
/**
 *	dev_get_by_name_rcu	- find a device by its name
 *	@net: the applicable net namespace
 *	@name: name to find
 *
 *	Find an interface by name.
 *	If the name is found a pointer to the device is returned.
 * 	If the name is not found then %NULL is returned.
 *	The reference counters are not incremented so the caller must be
 *	careful with locks. The caller must hold RCU lock.
 */

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
{
	struct hlist_node *p;
	struct net_device *dev;
	struct hlist_head *head = dev_name_hash(net, name);

	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
		if (!strncmp(dev->name, name, IFNAMSIZ))
			return dev;

	return NULL;
}
EXPORT_SYMBOL(dev_get_by_name_rcu);

Linus Torvalds's avatar
Linus Torvalds committed
629
630
/**
 *	dev_get_by_name		- find a device by its name
631
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
632
633
634
635
636
637
638
639
640
 *	@name: name to find
 *
 *	Find an interface by name. This can be called from any
 *	context and does its own locking. The returned handle has
 *	the usage count incremented and the caller must use dev_put() to
 *	release it when it is no longer needed. %NULL is returned if no
 *	matching device is found.
 */

641
struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
642
643
644
{
	struct net_device *dev;

645
646
	rcu_read_lock();
	dev = dev_get_by_name_rcu(net, name);
Linus Torvalds's avatar
Linus Torvalds committed
647
648
	if (dev)
		dev_hold(dev);
649
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
650
651
	return dev;
}
Eric Dumazet's avatar
Eric Dumazet committed
652
EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds's avatar
Linus Torvalds committed
653
654
655

/**
 *	__dev_get_by_index - find a device by its ifindex
656
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
657
658
659
660
661
662
663
664
665
 *	@ifindex: index of device
 *
 *	Search for an interface by index. Returns %NULL if the device
 *	is not found or a pointer to the device. The device has not
 *	had its reference counter increased so the caller must be careful
 *	about locking. The caller must hold either the RTNL semaphore
 *	or @dev_base_lock.
 */

666
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds's avatar
Linus Torvalds committed
667
668
{
	struct hlist_node *p;
Eric Dumazet's avatar
Eric Dumazet committed
669
670
	struct net_device *dev;
	struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds's avatar
Linus Torvalds committed
671

Eric Dumazet's avatar
Eric Dumazet committed
672
	hlist_for_each_entry(dev, p, head, index_hlist)
Linus Torvalds's avatar
Linus Torvalds committed
673
674
		if (dev->ifindex == ifindex)
			return dev;
Eric Dumazet's avatar
Eric Dumazet committed
675

Linus Torvalds's avatar
Linus Torvalds committed
676
677
	return NULL;
}
Eric Dumazet's avatar
Eric Dumazet committed
678
EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds's avatar
Linus Torvalds committed
679

680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
/**
 *	dev_get_by_index_rcu - find a device by its ifindex
 *	@net: the applicable net namespace
 *	@ifindex: index of device
 *
 *	Search for an interface by index. Returns %NULL if the device
 *	is not found or a pointer to the device. The device has not
 *	had its reference counter increased so the caller must be careful
 *	about locking. The caller must hold RCU lock.
 */

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
{
	struct hlist_node *p;
	struct net_device *dev;
	struct hlist_head *head = dev_index_hash(net, ifindex);

	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
		if (dev->ifindex == ifindex)
			return dev;

	return NULL;
}
EXPORT_SYMBOL(dev_get_by_index_rcu);

Linus Torvalds's avatar
Linus Torvalds committed
705
706
707

/**
 *	dev_get_by_index - find a device by its ifindex
708
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
709
710
711
712
713
714
715
716
 *	@ifindex: index of device
 *
 *	Search for an interface by index. Returns NULL if the device
 *	is not found or a pointer to the device. The device returned has
 *	had a reference added and the pointer is safe until the user calls
 *	dev_put to indicate they have finished with it.
 */

717
struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds's avatar
Linus Torvalds committed
718
719
720
{
	struct net_device *dev;

721
722
	rcu_read_lock();
	dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds's avatar
Linus Torvalds committed
723
724
	if (dev)
		dev_hold(dev);
725
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
726
727
	return dev;
}
Eric Dumazet's avatar
Eric Dumazet committed
728
EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds's avatar
Linus Torvalds committed
729
730
731

/**
 *	dev_getbyhwaddr - find a device by its hardware address
732
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
733
734
735
736
737
738
739
740
741
742
743
744
 *	@type: media type of device
 *	@ha: hardware address
 *
 *	Search for an interface by MAC address. Returns NULL if the device
 *	is not found or a pointer to the device. The caller must hold the
 *	rtnl semaphore. The returned device has not had its ref count increased
 *	and the caller must therefore be careful about locking
 *
 *	BUGS:
 *	If the API was consistent this would be __dev_get_by_hwaddr
 */

745
struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds's avatar
Linus Torvalds committed
746
747
748
749
750
{
	struct net_device *dev;

	ASSERT_RTNL();

751
	for_each_netdev(net, dev)
Linus Torvalds's avatar
Linus Torvalds committed
752
753
		if (dev->type == type &&
		    !memcmp(dev->dev_addr, ha, dev->addr_len))
754
755
756
			return dev;

	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
757
}
758
759
EXPORT_SYMBOL(dev_getbyhwaddr);

760
struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds's avatar
Linus Torvalds committed
761
762
763
{
	struct net_device *dev;

764
	ASSERT_RTNL();
765
	for_each_netdev(net, dev)
766
		if (dev->type == type)
767
768
769
			return dev;

	return NULL;
770
771
772
}
EXPORT_SYMBOL(__dev_getfirstbyhwtype);

773
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
774
775
776
777
{
	struct net_device *dev;

	rtnl_lock();
778
	dev = __dev_getfirstbyhwtype(net, type);
779
780
	if (dev)
		dev_hold(dev);
Linus Torvalds's avatar
Linus Torvalds committed
781
782
783
784
785
786
787
	rtnl_unlock();
	return dev;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);

/**
 *	dev_get_by_flags - find any device with given flags
788
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
792
 *	@if_flags: IFF_* values
 *	@mask: bitmask of bits in if_flags to check
 *
 *	Search for any interface with the given flags. Returns NULL if a device
793
 *	is not found or a pointer to the device. The device returned has
Linus Torvalds's avatar
Linus Torvalds committed
794
795
796
797
 *	had a reference added and the pointer is safe until the user calls
 *	dev_put to indicate they have finished with it.
 */

Eric Dumazet's avatar
Eric Dumazet committed
798
799
struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
				    unsigned short mask)
Linus Torvalds's avatar
Linus Torvalds committed
800
{
801
	struct net_device *dev, *ret;
Linus Torvalds's avatar
Linus Torvalds committed
802

803
	ret = NULL;
804
805
	rcu_read_lock();
	for_each_netdev_rcu(net, dev) {
Linus Torvalds's avatar
Linus Torvalds committed
806
807
		if (((dev->flags ^ if_flags) & mask) == 0) {
			dev_hold(dev);
808
			ret = dev;
Linus Torvalds's avatar
Linus Torvalds committed
809
810
811
			break;
		}
	}
812
	rcu_read_unlock();
813
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
814
}
Eric Dumazet's avatar
Eric Dumazet committed
815
EXPORT_SYMBOL(dev_get_by_flags);
Linus Torvalds's avatar
Linus Torvalds committed
816
817
818
819
820
821

/**
 *	dev_valid_name - check if name is okay for network device
 *	@name: name string
 *
 *	Network device names need to be valid file names to
822
823
 *	to allow sysfs to work.  We also disallow any kind of
 *	whitespace.
Linus Torvalds's avatar
Linus Torvalds committed
824
 */
825
int dev_valid_name(const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
826
{
827
828
	if (*name == '\0')
		return 0;
829
830
	if (strlen(name) >= IFNAMSIZ)
		return 0;
831
832
833
834
835
836
837
838
839
	if (!strcmp(name, ".") || !strcmp(name, ".."))
		return 0;

	while (*name) {
		if (*name == '/' || isspace(*name))
			return 0;
		name++;
	}
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
840
}
Eric Dumazet's avatar
Eric Dumazet committed
841
EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds's avatar
Linus Torvalds committed
842
843

/**
844
845
 *	__dev_alloc_name - allocate a name for a device
 *	@net: network namespace to allocate the device name in
Linus Torvalds's avatar
Linus Torvalds committed
846
 *	@name: name format string
847
 *	@buf:  scratch buffer and result name string
Linus Torvalds's avatar
Linus Torvalds committed
848
849
 *
 *	Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger's avatar
Stephen Hemminger committed
850
851
852
853
854
855
 *	id. It scans list of devices to build up a free map, then chooses
 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 *	while allocating the name and adding the device in order to avoid
 *	duplicates.
 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 *	Returns the number of the unit assigned or a negative errno code.
Linus Torvalds's avatar
Linus Torvalds committed
856
857
 */

858
static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds's avatar
Linus Torvalds committed
859
860
861
862
{
	int i = 0;
	const char *p;
	const int max_netdevices = 8*PAGE_SIZE;
863
	unsigned long *inuse;
Linus Torvalds's avatar
Linus Torvalds committed
864
865
866
867
868
869
870
871
872
873
874
875
876
	struct net_device *d;

	p = strnchr(name, IFNAMSIZ-1, '%');
	if (p) {
		/*
		 * Verify the string as this thing may have come from
		 * the user.  There must be either one "%d" and no other "%"
		 * characters.
		 */
		if (p[1] != 'd' || strchr(p + 2, '%'))
			return -EINVAL;

		/* Use one page as a bit array of possible slots */
877
		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
878
879
880
		if (!inuse)
			return -ENOMEM;

881
		for_each_netdev(net, d) {
Linus Torvalds's avatar
Linus Torvalds committed
882
883
884
885
886
887
			if (!sscanf(d->name, name, &i))
				continue;
			if (i < 0 || i >= max_netdevices)
				continue;

			/*  avoid cases where sscanf is not exact inverse of printf */
888
			snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds's avatar
Linus Torvalds committed
889
890
891
892
893
894
895
896
			if (!strncmp(buf, d->name, IFNAMSIZ))
				set_bit(i, inuse);
		}

		i = find_first_zero_bit(inuse, max_netdevices);
		free_page((unsigned long) inuse);
	}

897
898
	if (buf != name)
		snprintf(buf, IFNAMSIZ, name, i);
899
	if (!__dev_get_by_name(net, buf))
Linus Torvalds's avatar
Linus Torvalds committed
900
901
902
903
904
905
906
907
908
		return i;

	/* It is possible to run out of possible slots
	 * when the name is long and there isn't enough space left
	 * for the digits, or if all bits are used.
	 */
	return -ENFILE;
}

909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
/**
 *	dev_alloc_name - allocate a name for a device
 *	@dev: device
 *	@name: name format string
 *
 *	Passed a format string - eg "lt%d" it will try and find a suitable
 *	id. It scans list of devices to build up a free map, then chooses
 *	the first empty slot. The caller must hold the dev_base or rtnl lock
 *	while allocating the name and adding the device in order to avoid
 *	duplicates.
 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 *	Returns the number of the unit assigned or a negative errno code.
 */

int dev_alloc_name(struct net_device *dev, const char *name)
{
	char buf[IFNAMSIZ];
	struct net *net;
	int ret;

929
930
	BUG_ON(!dev_net(dev));
	net = dev_net(dev);
931
932
933
934
935
	ret = __dev_alloc_name(net, name, buf);
	if (ret >= 0)
		strlcpy(dev->name, buf, IFNAMSIZ);
	return ret;
}
Eric Dumazet's avatar
Eric Dumazet committed
936
EXPORT_SYMBOL(dev_alloc_name);
937

938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
static int dev_get_valid_name(struct net *net, const char *name, char *buf,
			      bool fmt)
{
	if (!dev_valid_name(name))
		return -EINVAL;

	if (fmt && strchr(name, '%'))
		return __dev_alloc_name(net, name, buf);
	else if (__dev_get_by_name(net, name))
		return -EEXIST;
	else if (buf != name)
		strlcpy(buf, name, IFNAMSIZ);

	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
953
954
955
956
957
958
959
960
961

/**
 *	dev_change_name - change name of a device
 *	@dev: device
 *	@newname: name (or format string) must be at least IFNAMSIZ
 *
 *	Change name of a device, can pass format strings "eth%d".
 *	for wildcarding.
 */
962
int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds's avatar
Linus Torvalds committed
963
{
964
	char oldname[IFNAMSIZ];
Linus Torvalds's avatar
Linus Torvalds committed
965
	int err = 0;
966
	int ret;
967
	struct net *net;
Linus Torvalds's avatar
Linus Torvalds committed
968
969

	ASSERT_RTNL();
970
	BUG_ON(!dev_net(dev));
Linus Torvalds's avatar
Linus Torvalds committed
971

972
	net = dev_net(dev);
Linus Torvalds's avatar
Linus Torvalds committed
973
974
975
	if (dev->flags & IFF_UP)
		return -EBUSY;

976
977
978
	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
		return 0;

979
980
	memcpy(oldname, dev->name, IFNAMSIZ);

981
982
983
	err = dev_get_valid_name(net, newname, dev->name, 1);
	if (err < 0)
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
984

985
rollback:
986
987
988
	/* For now only devices in the initial network namespace
	 * are in sysfs.
	 */
989
	if (net_eq(net, &init_net)) {
990
991
992
993
994
		ret = device_rename(&dev->dev, dev->name);
		if (ret) {
			memcpy(dev->name, oldname, IFNAMSIZ);
			return ret;
		}
995
	}
996
997

	write_lock_bh(&dev_base_lock);
998
	hlist_del(&dev->name_hlist);
999
1000
1001
1002
1003
1004
	write_unlock_bh(&dev_base_lock);

	synchronize_rcu();

	write_lock_bh(&dev_base_lock);
	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1005
1006
	write_unlock_bh(&dev_base_lock);

1007
	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1008
1009
1010
	ret = notifier_to_errno(ret);

	if (ret) {
1011
1012
		/* err >= 0 after dev_alloc_name() or stores the first errno */
		if (err >= 0) {
1013
1014
1015
			err = ret;
			memcpy(dev->name, oldname, IFNAMSIZ);
			goto rollback;
1016
1017
1018
1019
		} else {
			printk(KERN_ERR
			       "%s: name change rollback failed: %d.\n",
			       dev->name, ret);
1020
1021
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
1022
1023
1024
1025

	return err;
}

1026
1027
1028
1029
/**
 *	dev_set_alias - change ifalias of a device
 *	@dev: device
 *	@alias: name up to IFALIASZ
1030
 *	@len: limit of bytes to copy from info
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
 *
 *	Set ifalias for a device,
 */
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
	ASSERT_RTNL();

	if (len >= IFALIASZ)
		return -EINVAL;

1041
1042
1043
1044
1045
1046
1047
1048
	if (!len) {
		if (dev->ifalias) {
			kfree(dev->ifalias);
			dev->ifalias = NULL;
		}
		return 0;
	}

Eric Dumazet's avatar
Eric Dumazet committed
1049
	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1050
1051
1052
1053
1054
1055
1056
1057
	if (!dev->ifalias)
		return -ENOMEM;

	strlcpy(dev->ifalias, alias, len+1);
	return len;
}


1058
/**
Stephen Hemminger's avatar
Stephen Hemminger committed
1059
 *	netdev_features_change - device changes features
1060
1061
1062
1063
1064
1065
 *	@dev: device to cause notification
 *
 *	Called to indicate a device has changed features.
 */
void netdev_features_change(struct net_device *dev)
{
1066
	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1067
1068
1069
}
EXPORT_SYMBOL(netdev_features_change);

Linus Torvalds's avatar
Linus Torvalds committed
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
/**
 *	netdev_state_change - device changes state
 *	@dev: device to cause notification
 *
 *	Called to indicate a device has changed state. This function calls
 *	the notifier chains for netdev_chain and sends a NEWLINK message
 *	to the routing socket.
 */
void netdev_state_change(struct net_device *dev)
{
	if (dev->flags & IFF_UP) {
1081
		call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1082
1083
1084
		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
	}
}
Eric Dumazet's avatar
Eric Dumazet committed
1085
EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds's avatar
Linus Torvalds committed
1086

1087
void netdev_bonding_change(struct net_device *dev, unsigned long event)
1088
{
1089
	call_netdevice_notifiers(event, dev);
1090
1091
1092
}
EXPORT_SYMBOL(netdev_bonding_change);

Linus Torvalds's avatar
Linus Torvalds committed
1093
1094
/**
 *	dev_load 	- load a network module
1095
 *	@net: the applicable net namespace
Linus Torvalds's avatar
Linus Torvalds committed
1096
1097
1098
1099
1100
1101
1102
 *	@name: name of interface
 *
 *	If a network interface is not present and the process has suitable
 *	privileges this function loads the module. If module loading is not
 *	available in this kernel then it becomes a nop.
 */

1103
void dev_load(struct net *net, const char *name)
Linus Torvalds's avatar
Linus Torvalds committed
1104
{
1105
	struct net_device *dev;
Linus Torvalds's avatar
Linus Torvalds committed
1106

1107
1108
1109
	rcu_read_lock();
	dev = dev_get_by_name_rcu(net, name);
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1110

1111
	if (!dev && capable(CAP_NET_ADMIN))
Linus Torvalds's avatar
Linus Torvalds committed
1112
1113
		request_module("%s", name);
}
Eric Dumazet's avatar
Eric Dumazet committed
1114
EXPORT_SYMBOL(dev_load);
Linus Torvalds's avatar
Linus Torvalds committed
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129

/**
 *	dev_open	- prepare an interface for use.
 *	@dev:	device to open
 *
 *	Takes a device from down to up state. The device's private open
 *	function is invoked and then the multicast lists are loaded. Finally
 *	the device is moved into the up state and a %NETDEV_UP message is
 *	sent to the netdev notifier chain.
 *
 *	Calling this function on an active interface is a nop. On a failure
 *	a negative errno code is returned.
 */
int dev_open(struct net_device *dev)
{
1130
	const struct net_device_ops *ops = dev->netdev_ops;
1131
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
1132

1133
1134
	ASSERT_RTNL();

Linus Torvalds's avatar
Linus Torvalds committed
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
	/*
	 *	Is it already up?
	 */

	if (dev->flags & IFF_UP)
		return 0;

	/*
	 *	Is it even present?
	 */
	if (!netif_device_present(dev))
		return -ENODEV;

1148
1149
1150
1151
1152
	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
	ret = notifier_to_errno(ret);
	if (ret)
		return ret;

Linus Torvalds's avatar
Linus Torvalds committed
1153
1154
1155
1156
	/*
	 *	Call device private open method
	 */
	set_bit(__LINK_STATE_START, &dev->state);
1157

1158
1159
	if (ops->ndo_validate_addr)
		ret = ops->ndo_validate_addr(dev);
1160

1161
1162
	if (!ret && ops->ndo_open)
		ret = ops->ndo_open(dev);
Linus Torvalds's avatar
Linus Torvalds committed
1163

1164
	/*
Linus Torvalds's avatar
Linus Torvalds committed
1165
1166
1167
	 *	If it went open OK then:
	 */

1168
1169
1170
	if (ret)
		clear_bit(__LINK_STATE_START, &dev->state);
	else {
Linus Torvalds's avatar
Linus Torvalds committed
1171
1172
1173
1174
1175
		/*
		 *	Set the flags.
		 */
		dev->flags |= IFF_UP;

1176
1177
1178
		/*
		 *	Enable NET_DMA
		 */
1179
		net_dmaengine_get();
1180

Linus Torvalds's avatar
Linus Torvalds committed
1181
1182
1183
		/*
		 *	Initialize multicasting status
		 */
1184
		dev_set_rx_mode(dev);
Linus Torvalds's avatar
Linus Torvalds committed
1185
1186
1187
1188
1189
1190
1191
1192
1193

		/*
		 *	Wakeup transmit queue engine
		 */
		dev_activate(dev);

		/*
		 *	... and announce new interface.
		 */
1194
		call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1195
	}
1196

Linus Torvalds's avatar
Linus Torvalds committed
1197
1198
	return ret;
}
Eric Dumazet's avatar
Eric Dumazet committed
1199
EXPORT_SYMBOL(dev_open);
Linus Torvalds's avatar
Linus Torvalds committed
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211

/**
 *	dev_close - shutdown an interface.
 *	@dev: device to shutdown
 *
 *	This function moves an active device into down state. A
 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 *	chain.
 */
int dev_close(struct net_device *dev)
{
1212
	const struct net_device_ops *ops = dev->netdev_ops;
1213
1214
	ASSERT_RTNL();

1215
1216
	might_sleep();

Linus Torvalds's avatar
Linus Torvalds committed
1217
1218
1219
1220
1221
1222
1223
	if (!(dev->flags & IFF_UP))
		return 0;

	/*
	 *	Tell people we are going down, so that they can
	 *	prepare to death, when device is still operating.
	 */
1224
	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1225
1226
1227
1228

	clear_bit(__LINK_STATE_START, &dev->state);

	/* Synchronize to scheduled poll. We cannot touch poll list,
1229
1230
1231
1232
1233
	 * it can be even on different cpu. So just clear netif_running().
	 *
	 * dev->stop() will invoke napi_disable() on all of it's
	 * napi_struct instances on this device.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
1234
1235
	smp_mb__after_clear_bit(); /* Commit netif_running(). */

1236
1237
	dev_deactivate(dev);

Linus Torvalds's avatar
Linus Torvalds committed
1238
1239
1240
1241
1242
1243
1244
	/*
	 *	Call the device specific close. This cannot fail.
	 *	Only if device is UP
	 *
	 *	We allow it to be called even after a DETACH hot-plug
	 *	event.
	 */
1245
1246
	if (ops->ndo_stop)
		ops->ndo_stop(dev);
Linus Torvalds's avatar
Linus Torvalds committed
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256

	/*
	 *	Device is now down.
	 */

	dev->flags &= ~IFF_UP;

	/*
	 * Tell people we are down
	 */
1257
	call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1258

1259
1260
1261
	/*
	 *	Shutdown NET_DMA
	 */
1262
	net_dmaengine_put();
1263

Linus Torvalds's avatar
Linus Torvalds committed
1264
1265
	return 0;
}
Eric Dumazet's avatar
Eric Dumazet committed
1266
EXPORT_SYMBOL(dev_close);
Linus Torvalds's avatar
Linus Torvalds committed
1267
1268


1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
/**
 *	dev_disable_lro - disable Large Receive Offload on a device
 *	@dev: device
 *
 *	Disable Large Receive Offload (LRO) on a net device.  Must be
 *	called under RTNL.  This is needed if received packets may be
 *	forwarded to another interface.
 */
void dev_disable_lro(struct net_device *dev)
{
	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
	    dev->ethtool_ops->set_flags) {
		u32 flags = dev->ethtool_ops->get_flags(dev);
		if (flags & ETH_FLAG_LRO) {
			flags &= ~ETH_FLAG_LRO;
			dev->ethtool_ops->set_flags(dev, flags);
		}
	}
	WARN_ON(dev->features & NETIF_F_LRO);
}
EXPORT_SYMBOL(dev_disable_lro);


1292
1293
static int dev_boot_phase = 1;

Linus Torvalds's avatar
Linus Torvalds committed
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
/*
 *	Device change register/unregister. These are not inline or static
 *	as we export them to the world.
 */

/**
 *	register_netdevice_notifier - register a network notifier block
 *	@nb: notifier
 *
 *	Register a notifier to be called when network device events occur.
 *	The notifier passed is linked into the kernel structures and must
 *	not be reused until it has been unregistered. A negative errno code
 *	is returned on a failure.
 *
 * 	When registered all registration and up events are replayed
1309
 *	to the new notifier to allow device to have a race free
Linus Torvalds's avatar
Linus Torvalds committed
1310
1311
1312
1313
1314
1315
 *	view of the network device list.
 */

int register_netdevice_notifier(struct notifier_block *nb)
{
	struct net_device *dev;
1316
	struct net_device *last;
1317
	struct net *net;
Linus Torvalds's avatar
Linus Torvalds committed
1318
1319
1320
	int err;

	rtnl_lock();
1321
	err = raw_notifier_chain_register(&netdev_chain, nb);
1322
1323
	if (err)
		goto unlock;
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
	if (dev_boot_phase)
		goto unlock;
	for_each_net(net) {
		for_each_netdev(net, dev) {
			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
			err = notifier_to_errno(err);
			if (err)
				goto rollback;

			if (!(dev->flags & IFF_UP))
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
1335

1336
1337
			nb->notifier_call(nb, NETDEV_UP, dev);
		}
Linus Torvalds's avatar
Linus Torvalds committed
1338
	}
1339
1340

unlock:
Linus Torvalds's avatar
Linus Torvalds committed
1341
1342
	rtnl_unlock();
	return err;
1343
1344
1345

rollback:
	last = dev;
1346
1347
1348
1349
	for_each_net(net) {
		for_each_netdev(net, dev) {
			if (dev == last)
				break;
1350

1351
1352
1353
1354
1355
			if (dev->flags & IFF_UP) {
				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
				nb->notifier_call(nb, NETDEV_DOWN, dev);
			}
			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1356
			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1357
1358
		}
	}
1359
1360

	raw_notifier_chain_unregister(&netdev_chain, nb);
1361
	goto unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1362
}
Eric Dumazet's avatar
Eric Dumazet committed
1363
EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds's avatar
Linus Torvalds committed
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376

/**
 *	unregister_netdevice_notifier - unregister a network notifier block
 *	@nb: notifier
 *
 *	Unregister a notifier previously registered by
 *	register_netdevice_notifier(). The notifier is unlinked into the
 *	kernel structures and may then be reused. A negative errno code
 *	is returned on a failure.
 */

int unregister_netdevice_notifier(struct notifier_block *nb)
{
1377
1378
1379
	int err;

	rtnl_lock();
1380
	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1381
1382
	rtnl_unlock();
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
1383
}
Eric Dumazet's avatar
Eric Dumazet committed
1384
EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds's avatar
Linus Torvalds committed
1385
1386
1387
1388

/**
 *	call_netdevice_notifiers - call all network notifier blocks
 *      @val: value passed unmodified to notifier function
1389
 *      @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds's avatar
Linus Torvalds committed
1390
1391
 *
 *	Call all network notifier blocks.  Parameters and return value
1392
 *	are as for raw_notifier_call_chain().
Linus Torvalds's avatar
Linus Torvalds committed
1393
1394
 */

1395
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
1396
{
1397
	return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1398
1399
1400
1401
1402
1403
1404
1405
1406
}

/* When > 0 there are consumers of rx skb time stamps */
static atomic_t netstamp_needed = ATOMIC_INIT(0);

void net_enable_timestamp(void)
{
	atomic_inc(&netstamp_needed);
}
Eric Dumazet's avatar
Eric Dumazet committed
1407
EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds's avatar
Linus Torvalds committed
1408
1409
1410
1411
1412

void net_disable_timestamp(void)
{
	atomic_dec(&netstamp_needed);
}
Eric Dumazet's avatar
Eric Dumazet committed
1413
EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds's avatar
Linus Torvalds committed
1414

1415
static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
1416
1417
{
	if (atomic_read(&netstamp_needed))
1418
		__net_timestamp(skb);
1419
1420
	else
		skb->tstamp.tv64 = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1421
1422
}

1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
/**
 * dev_forward_skb - loopback an skb to another netif
 *
 * @dev: destination network device
 * @skb: buffer to forward
 *
 * return values:
 *	NET_RX_SUCCESS	(no congestion)
 *	NET_RX_DROP     (packet was dropped)
 *
 * dev_forward_skb can be used for injecting an skb from the
 * start_xmit function of one device into the receive queue
 * of another device.
 *
 * The receiving device may be in another namespace, so
 * we have to clear all information in the skb that could
 * impact namespace isolation.
 */
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
	skb_orphan(skb);

	if (!(dev->flags & IFF_UP))
		return NET_RX_DROP;

	if (skb->len > (dev->mtu + dev->hard_header_len))
		return NET_RX_DROP;

	skb_dst_drop(skb);
	skb->tstamp.tv64 = 0;
	skb->pkt_type = PACKET_HOST;
	skb->protocol = eth_type_trans(skb, dev);
	skb->mark = 0;
	secpath_reset(skb);
	nf_reset(skb);
	return netif_rx(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);

Linus Torvalds's avatar
Linus Torvalds committed
1462
1463
1464
1465
1466
/*
 *	Support routine. Sends outgoing frames to any network
 *	taps currently in use.
 */