netdevice.h 134 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Definitions for the Interfaces handler.
 *
 * Version:	@(#)dev.h	1.0.10	08/12/93
 *
10
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
11
12
13
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
14
 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds's avatar
Linus Torvalds committed
15
16
17
18
19
20
21
22
23
24
25
26
27
 *		Bjorn Ekwall. <bj0rn@blox.se>
 *              Pekka Riikonen <priikone@poseidon.pspt.fi>
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 *		Moved to /usr/include/linux for NET3
 */
#ifndef _LINUX_NETDEVICE_H
#define _LINUX_NETDEVICE_H

Al Viro's avatar
Al Viro committed
28
#include <linux/timer.h>
29
#include <linux/bug.h>
30
#include <linux/delay.h>
Arun Sharma's avatar
Arun Sharma committed
31
#include <linux/atomic.h>
32
#include <linux/prefetch.h>
Linus Torvalds's avatar
Linus Torvalds committed
33
34
35
36
#include <asm/cache.h>
#include <asm/byteorder.h>

#include <linux/percpu.h>
37
#include <linux/rculist.h>
38
#include <linux/dmaengine.h>
39
#include <linux/workqueue.h>
Tom Herbert's avatar
Tom Herbert committed
40
#include <linux/dynamic_queue_limits.h>
Linus Torvalds's avatar
Linus Torvalds committed
41

42
#include <linux/ethtool.h>
43
#include <net/net_namespace.h>
44
#include <net/dsa.h>
Jeff Kirsher's avatar
Jeff Kirsher committed
45
#ifdef CONFIG_DCB
46
47
#include <net/dcbnl.h>
#endif
48
#include <net/netprio_cgroup.h>
49

50
#include <linux/netdev_features.h>
51
#include <linux/neighbour.h>
52
#include <uapi/linux/netdevice.h>
53
#include <uapi/linux/if_bonding.h>
54
#include <uapi/linux/pkt_cls.h>
55
#include <linux/hashtable.h>
56

57
struct netpoll_info;
58
struct device;
59
struct phy_device;
60
61
/* 802.11 specific */
struct wireless_dev;
62
63
/* 802.15.4 specific */
struct wpan_dev;
Robert Shearman's avatar
Robert Shearman committed
64
struct mpls_dev;
65
66
/* UDP Tunnel offloads */
struct udp_tunnel_info;
67
struct bpf_prog;
Linus Torvalds's avatar
Linus Torvalds committed
68

69
70
void netdev_set_default_ethtool_ops(struct net_device *dev,
				    const struct ethtool_ops *ops);
71

72
73
74
75
/* Backlog congestion levels */
#define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
#define NET_RX_DROP		1	/* packet dropped */

76
77
78
79
80
81
82
83
84
85
86
87
/*
 * Transmit return codes: transmit return codes originate from three different
 * namespaces:
 *
 * - qdisc return codes
 * - driver transmit return codes
 * - errno values
 *
 * Drivers are allowed to return any one of those in their hard_start_xmit()
 * function. Real network devices commonly used with qdiscs should only return
 * the driver transmit return codes though - when qdiscs are used, the actual
 * transmission happens asynchronously, so the value is not propagated to
Bjorn Helgaas's avatar
Bjorn Helgaas committed
88
89
 * higher layers. Virtual network devices transmit synchronously; in this case
 * the driver transmit return codes are consumed by dev_queue_xmit(), and all
90
91
92
93
94
 * others are propagated to higher layers.
 */

/* qdisc ->enqueue() return codes. */
#define NET_XMIT_SUCCESS	0x00
95
96
97
#define NET_XMIT_DROP		0x01	/* skb dropped			*/
#define NET_XMIT_CN		0x02	/* congestion notification	*/
#define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
Linus Torvalds's avatar
Linus Torvalds committed
98

99
100
101
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
 * indicates that the device will soon be dropping packets, or already drops
 * some packets of the same priority; prompting us to send less aggressively. */
102
#define net_xmit_eval(e)	((e) == NET_XMIT_CN ? 0 : (e))
Linus Torvalds's avatar
Linus Torvalds committed
103
104
#define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)

105
/* Driver transmit return codes */
106
#define NETDEV_TX_MASK		0xf0
107

108
enum netdev_tx {
109
	__NETDEV_TX_MIN	 = INT_MIN,	/* make sure enum is signed */
110
111
	NETDEV_TX_OK	 = 0x00,	/* driver took care of packet */
	NETDEV_TX_BUSY	 = 0x10,	/* driver tx path was busy*/
112
113
114
};
typedef enum netdev_tx netdev_tx_t;

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
 * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant;
 * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
 */
static inline bool dev_xmit_complete(int rc)
{
	/*
	 * Positive cases with an skb consumed by a driver:
	 * - successful transmission (rc == NETDEV_TX_OK)
	 * - error while transmitting (rc < 0)
	 * - error while queueing to a different device (rc & NET_XMIT_MASK)
	 */
	if (likely(rc < NET_XMIT_MASK))
		return true;

	return false;
}

Linus Torvalds's avatar
Linus Torvalds committed
133
/*
Bjorn Helgaas's avatar
Bjorn Helgaas committed
134
 *	Compute the worst-case header length according to the protocols
Linus Torvalds's avatar
Linus Torvalds committed
135
136
 *	used.
 */
Graf Yang's avatar
Graf Yang committed
137

138
139
140
#if defined(CONFIG_HYPERV_NET)
# define LL_MAX_HEADER 128
#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
141
142
143
144
145
# if defined(CONFIG_MAC80211_MESH)
#  define LL_MAX_HEADER 128
# else
#  define LL_MAX_HEADER 96
# endif
Linus Torvalds's avatar
Linus Torvalds committed
146
#else
147
# define LL_MAX_HEADER 32
Linus Torvalds's avatar
Linus Torvalds committed
148
149
#endif

150
151
#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
    !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
Linus Torvalds's avatar
Linus Torvalds committed
152
153
154
155
156
157
#define MAX_HEADER LL_MAX_HEADER
#else
#define MAX_HEADER (LL_MAX_HEADER + 48)
#endif

/*
158
159
 *	Old network device statistics. Fields are native words
 *	(unsigned long) so they can be read and written atomically.
Linus Torvalds's avatar
Linus Torvalds committed
160
 */
Graf Yang's avatar
Graf Yang committed
161

Eric Dumazet's avatar
Eric Dumazet committed
162
struct net_device_stats {
163
164
165
166
167
168
169
170
171
	unsigned long	rx_packets;
	unsigned long	tx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_bytes;
	unsigned long	rx_errors;
	unsigned long	tx_errors;
	unsigned long	rx_dropped;
	unsigned long	tx_dropped;
	unsigned long	multicast;
Linus Torvalds's avatar
Linus Torvalds committed
172
173
	unsigned long	collisions;
	unsigned long	rx_length_errors;
174
175
176
177
178
	unsigned long	rx_over_errors;
	unsigned long	rx_crc_errors;
	unsigned long	rx_frame_errors;
	unsigned long	rx_fifo_errors;
	unsigned long	rx_missed_errors;
Linus Torvalds's avatar
Linus Torvalds committed
179
180
181
182
183
184
185
186
187
188
189
190
191
	unsigned long	tx_aborted_errors;
	unsigned long	tx_carrier_errors;
	unsigned long	tx_fifo_errors;
	unsigned long	tx_heartbeat_errors;
	unsigned long	tx_window_errors;
	unsigned long	rx_compressed;
	unsigned long	tx_compressed;
};


#include <linux/cache.h>
#include <linux/skbuff.h>

192
#ifdef CONFIG_RPS
193
194
#include <linux/static_key.h>
extern struct static_key rps_needed;
195
196
#endif

Linus Torvalds's avatar
Linus Torvalds committed
197
198
199
200
struct neighbour;
struct neigh_parms;
struct sk_buff;

201
202
203
204
struct netdev_hw_addr {
	struct list_head	list;
	unsigned char		addr[MAX_ADDR_LEN];
	unsigned char		type;
Jiri Pirko's avatar
Jiri Pirko committed
205
206
207
208
#define NETDEV_HW_ADDR_T_LAN		1
#define NETDEV_HW_ADDR_T_SAN		2
#define NETDEV_HW_ADDR_T_SLAVE		3
#define NETDEV_HW_ADDR_T_UNICAST	4
209
210
#define NETDEV_HW_ADDR_T_MULTICAST	5
	bool			global_use;
211
	int			sync_cnt;
212
	int			refcount;
213
	int			synced;
214
215
216
	struct rcu_head		rcu_head;
};

217
218
219
220
221
struct netdev_hw_addr_list {
	struct list_head	list;
	int			count;
};

222
223
224
225
#define netdev_hw_addr_list_count(l) ((l)->count)
#define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
#define netdev_hw_addr_list_for_each(ha, l) \
	list_for_each_entry(ha, &(l)->list, list)
226

227
228
229
230
#define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
#define netdev_for_each_uc_addr(ha, dev) \
	netdev_hw_addr_list_for_each(ha, &(dev)->uc)
231

232
233
#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
234
#define netdev_for_each_mc_addr(ha, dev) \
235
	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
236

Eric Dumazet's avatar
Eric Dumazet committed
237
struct hh_cache {
238
	u16		hh_len;
239
	u16		__pad;
240
	seqlock_t	hh_lock;
Linus Torvalds's avatar
Linus Torvalds committed
241
242
243
244

	/* cached hardware header; allow for machine alignment needs.        */
#define HH_DATA_MOD	16
#define HH_DATA_OFF(__len) \
Jiri Benc's avatar
Jiri Benc committed
245
	(HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
Linus Torvalds's avatar
Linus Torvalds committed
246
247
248
249
250
#define HH_DATA_ALIGN(__len) \
	(((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};

Bjorn Helgaas's avatar
Bjorn Helgaas committed
251
/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much.
Linus Torvalds's avatar
Linus Torvalds committed
252
253
254
255
256
257
258
259
 * Alternative is:
 *   dev->hard_header_len ? (dev->hard_header_len +
 *                           (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
 *
 * We could use other alignment values, but we must maintain the
 * relationship HH alignment <= LL alignment.
 */
#define LL_RESERVED_SPACE(dev) \
260
	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
Linus Torvalds's avatar
Linus Torvalds committed
261
#define LL_RESERVED_SPACE_EXTRA(dev,extra) \
262
	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
Linus Torvalds's avatar
Linus Torvalds committed
263

264
265
266
struct header_ops {
	int	(*create) (struct sk_buff *skb, struct net_device *dev,
			   unsigned short type, const void *daddr,
267
			   const void *saddr, unsigned int len);
268
	int	(*parse)(const struct sk_buff *skb, unsigned char *haddr);
269
	int	(*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
270
271
272
	void	(*cache_update)(struct hh_cache *hh,
				const struct net_device *dev,
				const unsigned char *haddr);
273
	bool	(*validate)(const char *ll_header, unsigned int len);
274
275
};

Linus Torvalds's avatar
Linus Torvalds committed
276
/* These flag bits are private to the generic network queueing
Bjorn Helgaas's avatar
Bjorn Helgaas committed
277
 * layer; they may not be explicitly referenced by any other
Linus Torvalds's avatar
Linus Torvalds committed
278
279
280
 * code.
 */

Eric Dumazet's avatar
Eric Dumazet committed
281
enum netdev_state_t {
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
	__LINK_STATE_START,
	__LINK_STATE_PRESENT,
	__LINK_STATE_NOCARRIER,
285
286
	__LINK_STATE_LINKWATCH_PENDING,
	__LINK_STATE_DORMANT,
Linus Torvalds's avatar
Linus Torvalds committed
287
288
289
290
};


/*
Bjorn Helgaas's avatar
Bjorn Helgaas committed
291
 * This structure holds boot-time configured netdevice settings. They
Graf Yang's avatar
Graf Yang committed
292
 * are then used in the device probing.
Linus Torvalds's avatar
Linus Torvalds committed
293
294
295
296
297
298
299
 */
struct netdev_boot_setup {
	char name[IFNAMSIZ];
	struct ifmap map;
};
#define NETDEV_BOOT_SETUP_MAX 8

300
int __init netdev_boot_setup(char *str);
Linus Torvalds's avatar
Linus Torvalds committed
301

302
303
304
305
306
307
308
/*
 * Structure for NAPI scheduling similar to tasklet but with weighting
 */
struct napi_struct {
	/* The poll_list must only be managed by the entity which
	 * changes the state of the NAPI_STATE_SCHED bit.  This means
	 * whoever atomically sets that bit can add this napi_struct
Bjorn Helgaas's avatar
Bjorn Helgaas committed
309
	 * to the per-CPU poll_list, and whoever clears that bit
310
311
312
313
314
315
	 * can remove from the list right before clearing the bit.
	 */
	struct list_head	poll_list;

	unsigned long		state;
	int			weight;
316
	unsigned int		gro_count;
317
318
319
320
321
	int			(*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
	spinlock_t		poll_lock;
	int			poll_owner;
#endif
Herbert Xu's avatar
Herbert Xu committed
322
	struct net_device	*dev;
323
	struct sk_buff		*gro_list;
Herbert Xu's avatar
Herbert Xu committed
324
	struct sk_buff		*skb;
325
	struct hrtimer		timer;
326
	struct list_head	dev_list;
Eliezer Tamir's avatar
Eliezer Tamir committed
327
328
	struct hlist_node	napi_hash_node;
	unsigned int		napi_id;
329
330
};

Eric Dumazet's avatar
Eric Dumazet committed
331
enum {
332
	NAPI_STATE_SCHED,	/* Poll is scheduled */
333
	NAPI_STATE_DISABLE,	/* Disable pending */
334
	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
Eric Dumazet's avatar
Eric Dumazet committed
335
336
	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
337
338
339
340
341
342
343
344
345
346
	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};

enum {
	NAPIF_STATE_SCHED	 = (1UL << NAPI_STATE_SCHED),
	NAPIF_STATE_DISABLE	 = (1UL << NAPI_STATE_DISABLE),
	NAPIF_STATE_NPSVC	 = (1UL << NAPI_STATE_NPSVC),
	NAPIF_STATE_HASHED	 = (1UL << NAPI_STATE_HASHED),
	NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
	NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
347
348
};

349
enum gro_result {
350
351
352
353
354
355
	GRO_MERGED,
	GRO_MERGED_FREE,
	GRO_HELD,
	GRO_NORMAL,
	GRO_DROP,
};
356
typedef enum gro_result gro_result_t;
357

358
359
360
361
362
363
364
/*
 * enum rx_handler_result - Possible return values for rx_handlers.
 * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it
 * further.
 * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
 * case skb->dev was changed by rx_handler.
 * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
Bjorn Helgaas's avatar
Bjorn Helgaas committed
365
 * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called.
366
367
368
369
370
371
372
373
374
375
376
377
378
379
 *
 * rx_handlers are functions called from inside __netif_receive_skb(), to do
 * special processing of the skb, prior to delivery to protocol handlers.
 *
 * Currently, a net_device can only have a single rx_handler registered. Trying
 * to register a second rx_handler will return -EBUSY.
 *
 * To register a rx_handler on a net_device, use netdev_rx_handler_register().
 * To unregister a rx_handler on a net_device, use
 * netdev_rx_handler_unregister().
 *
 * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
 * do with the skb.
 *
Bjorn Helgaas's avatar
Bjorn Helgaas committed
380
 * If the rx_handler consumed the skb in some way, it should return
381
 * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
Bjorn Helgaas's avatar
Bjorn Helgaas committed
382
 * the skb to be delivered in some other way.
383
384
385
386
387
 *
 * If the rx_handler changed skb->dev, to divert the skb to another
 * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
 * new device will be called if it exists.
 *
Bjorn Helgaas's avatar
Bjorn Helgaas committed
388
 * If the rx_handler decides the skb should be ignored, it should return
389
 * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
390
 * are registered on exact device (ptype->dev == skb->dev).
391
 *
Bjorn Helgaas's avatar
Bjorn Helgaas committed
392
 * If the rx_handler didn't change skb->dev, but wants the skb to be normally
393
394
395
396
397
398
399
400
401
402
403
404
405
406
 * delivered, it should return RX_HANDLER_PASS.
 *
 * A device without a registered rx_handler will behave as if rx_handler
 * returned RX_HANDLER_PASS.
 */

enum rx_handler_result {
	RX_HANDLER_CONSUMED,
	RX_HANDLER_ANOTHER,
	RX_HANDLER_EXACT,
	RX_HANDLER_PASS,
};
typedef enum rx_handler_result rx_handler_result_t;
typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
407

408
void __napi_schedule(struct napi_struct *n);
409
void __napi_schedule_irqoff(struct napi_struct *n);
410

411
static inline bool napi_disable_pending(struct napi_struct *n)
412
413
414
415
{
	return test_bit(NAPI_STATE_DISABLE, &n->state);
}

416
/**
Bjorn Helgaas's avatar
Bjorn Helgaas committed
417
418
 *	napi_schedule_prep - check if NAPI can be scheduled
 *	@n: NAPI context
419
420
 *
 * Test if NAPI routine is already running, and if not mark
Bjorn Helgaas's avatar
Bjorn Helgaas committed
421
 * it as running.  This is used as a condition variable to
422
423
 * insure only one NAPI poll instance runs.  We also make
 * sure there is no pending NAPI disable.
424
 */
425
static inline bool napi_schedule_prep(struct napi_struct *n)
426
{
427
428
	return !napi_disable_pending(n) &&
		!test_and_set_bit(NAPI_STATE_SCHED, &n->state);
429
430
431
432
}

/**
 *	napi_schedule - schedule NAPI poll
Bjorn Helgaas's avatar
Bjorn Helgaas committed
433
 *	@n: NAPI context
434
435
436
437
438
439
440
441
442
443
 *
 * Schedule NAPI poll routine to be called if it is not already
 * running.
 */
static inline void napi_schedule(struct napi_struct *n)
{
	if (napi_schedule_prep(n))
		__napi_schedule(n);
}

444
445
/**
 *	napi_schedule_irqoff - schedule NAPI poll
Bjorn Helgaas's avatar
Bjorn Helgaas committed
446
 *	@n: NAPI context
447
448
449
450
451
452
453
454
455
 *
 * Variant of napi_schedule(), assuming hard irqs are masked.
 */
static inline void napi_schedule_irqoff(struct napi_struct *n)
{
	if (napi_schedule_prep(n))
		__napi_schedule_irqoff(n);
}

456
/* Try to reschedule poll. Called by dev->poll() after napi_complete().  */
457
static inline bool napi_reschedule(struct napi_struct *napi)
458
459
460
{
	if (napi_schedule_prep(napi)) {
		__napi_schedule(napi);
461
		return true;
462
	}
463
	return false;
464
465
}

466
467
bool __napi_complete(struct napi_struct *n);
bool napi_complete_done(struct napi_struct *n, int work_done);
468
469
/**
 *	napi_complete - NAPI processing complete
Bjorn Helgaas's avatar
Bjorn Helgaas committed
470
 *	@n: NAPI context
471
472
 *
 * Mark NAPI processing as complete.
473
 * Consider using napi_complete_done() instead.
474
 * Return false if device should avoid rearming interrupts.
475
 */
476
static inline bool napi_complete(struct napi_struct *n)
477
478
479
{
	return napi_complete_done(n, 0);
}
480

Eliezer Tamir's avatar
Eliezer Tamir committed
481
482
/**
 *	napi_hash_del - remove a NAPI from global table
Bjorn Helgaas's avatar
Bjorn Helgaas committed
483
 *	@napi: NAPI context
Eliezer Tamir's avatar
Eliezer Tamir committed
484
 *
Bjorn Helgaas's avatar
Bjorn Helgaas committed
485
 * Warning: caller must observe RCU grace period
486
487
 * before freeing memory containing @napi, if
 * this function returns true.
488
 * Note: core networking stack automatically calls it
Bjorn Helgaas's avatar
Bjorn Helgaas committed
489
 * from netif_napi_del().
490
 * Drivers might want to call this helper to combine all
Bjorn Helgaas's avatar
Bjorn Helgaas committed
491
 * the needed RCU grace periods into a single one.
Eliezer Tamir's avatar
Eliezer Tamir committed
492
 */
493
bool napi_hash_del(struct napi_struct *napi);
Eliezer Tamir's avatar
Eliezer Tamir committed
494

495
496
/**
 *	napi_disable - prevent NAPI from scheduling
Bjorn Helgaas's avatar
Bjorn Helgaas committed
497
 *	@n: NAPI context
498
499
500
501
 *
 * Stop NAPI from being scheduled on this context.
 * Waits till any outstanding processing completes.
 */
502
void napi_disable(struct napi_struct *n);
503
504
505

/**
 *	napi_enable - enable NAPI scheduling
Bjorn Helgaas's avatar
Bjorn Helgaas committed
506
 *	@n: NAPI context
507
508
509
510
511
512
513
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 */
static inline void napi_enable(struct napi_struct *n)
{
	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
514
	smp_mb__before_atomic();
515
	clear_bit(NAPI_STATE_SCHED, &n->state);
516
	clear_bit(NAPI_STATE_NPSVC, &n->state);
517
518
}

519
520
/**
 *	napi_synchronize - wait until NAPI is not running
Bjorn Helgaas's avatar
Bjorn Helgaas committed
521
 *	@n: NAPI context
522
523
524
525
526
527
528
 *
 * Wait until NAPI is done being scheduled on this context.
 * Waits till any outstanding processing completes but
 * does not disable future activations.
 */
static inline void napi_synchronize(const struct napi_struct *n)
{
529
530
531
532
533
	if (IS_ENABLED(CONFIG_SMP))
		while (test_bit(NAPI_STATE_SCHED, &n->state))
			msleep(1);
	else
		barrier();
534
535
}

Eric Dumazet's avatar
Eric Dumazet committed
536
enum netdev_queue_state_t {
537
538
	__QUEUE_STATE_DRV_XOFF,
	__QUEUE_STATE_STACK_XOFF,
539
	__QUEUE_STATE_FROZEN,
540
};
541
542
543
544
545
546
547
548
549
550
551

#define QUEUE_STATE_DRV_XOFF	(1 << __QUEUE_STATE_DRV_XOFF)
#define QUEUE_STATE_STACK_XOFF	(1 << __QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_FROZEN	(1 << __QUEUE_STATE_FROZEN)

#define QUEUE_STATE_ANY_XOFF	(QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \
					QUEUE_STATE_FROZEN)
#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \
					QUEUE_STATE_FROZEN)

552
553
554
555
556
557
558
559
560
/*
 * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue.  The
 * netif_tx_* functions below are used to manipulate this flag.  The
 * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
 * queue independently.  The netif_xmit_*stopped functions below are called
 * to check if the queue has been stopped by the driver or stack (either
 * of the XOFF bits are set in the state).  Drivers should not need to call
 * netif_xmit*stopped functions, they should only be using netif_tx_*.
 */
561

562
struct netdev_queue {
563
/*
Bjorn Helgaas's avatar
Bjorn Helgaas committed
564
 * read-mostly part
565
 */
566
	struct net_device	*dev;
567
	struct Qdisc __rcu	*qdisc;
568
	struct Qdisc		*qdisc_sleeping;
569
#ifdef CONFIG_SYSFS
Tom Herbert's avatar
Tom Herbert committed
570
571
	struct kobject		kobj;
#endif
572
573
574
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	int			numa_node;
#endif
575
576
577
578
579
580
	unsigned long		tx_maxrate;
	/*
	 * Number of TX timeouts for this queue
	 * (/sys/class/net/DEV/Q/trans_timeout)
	 */
	unsigned long		trans_timeout;
581
/*
Bjorn Helgaas's avatar
Bjorn Helgaas committed
582
 * write-mostly part
583
584
585
 */
	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
	int			xmit_lock_owner;
586
	/*
587
	 * Time (in jiffies) of last Tx
588
589
	 */
	unsigned long		trans_start;
590

Tom Herbert's avatar
Tom Herbert committed
591
592
593
594
595
	unsigned long		state;

#ifdef CONFIG_BQL
	struct dql		dql;
#endif
596
} ____cacheline_aligned_in_smp;
597

598
599
600
601
602
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	return q->numa_node;
#else
603
	return NUMA_NO_NODE;
604
605
606
607
608
609
610
611
612
613
#endif
}

static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
	q->numa_node = node;
#endif
}

Eric Dumazet's avatar
Eric Dumazet committed
614
#ifdef CONFIG_RPS
Tom Herbert's avatar
Tom Herbert committed
615
616
617
618
619
620
621
622
623
/*
 * This structure holds an RPS map which can be of variable length.  The
 * map is an array of CPUs.
 */
struct rps_map {
	unsigned int len;
	struct rcu_head rcu;
	u16 cpus[0];
};
624
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
Tom Herbert's avatar
Tom Herbert committed
625

Tom Herbert's avatar
Tom Herbert committed
626
/*
627
628
629
 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
 * tail pointer for that CPU's input queue at the time of last enqueue, and
 * a hardware filter index.
Tom Herbert's avatar
Tom Herbert committed
630
631
632
 */
struct rps_dev_flow {
	u16 cpu;
633
	u16 filter;
Tom Herbert's avatar
Tom Herbert committed
634
635
	unsigned int last_qtail;
};
636
#define RPS_NO_FILTER 0xffff
Tom Herbert's avatar
Tom Herbert committed
637
638
639
640
641
642
643
644
645
646

/*
 * The rps_dev_flow_table structure contains a table of flow mappings.
 */
struct rps_dev_flow_table {
	unsigned int mask;
	struct rcu_head rcu;
	struct rps_dev_flow flows[0];
};
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
647
    ((_num) * sizeof(struct rps_dev_flow)))
Tom Herbert's avatar
Tom Herbert committed
648
649
650
651

/*
 * The rps_sock_flow_table contains mappings of flows to the last CPU
 * on which they were processed by the application (set in recvmsg).
Bjorn Helgaas's avatar
Bjorn Helgaas committed
652
653
 * Each entry is a 32bit value. Upper part is the high-order bits
 * of flow hash, lower part is CPU number.
654
 * rps_cpu_mask is used to partition the space, depending on number of
Bjorn Helgaas's avatar
Bjorn Helgaas committed
655
656
 * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
657
 * meaning we use 32-6=26 bits for the hash.
Tom Herbert's avatar
Tom Herbert committed
658
659
 */
struct rps_sock_flow_table {
660
	u32	mask;
661
662

	u32	ents[0] ____cacheline_aligned_in_smp;
Tom Herbert's avatar
Tom Herbert committed
663
};
664
#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
Tom Herbert's avatar
Tom Herbert committed
665
666
667

#define RPS_NO_CPU 0xffff

668
669
670
extern u32 rps_cpu_mask;
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;

Tom Herbert's avatar
Tom Herbert committed
671
672
673
674
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
					u32 hash)
{
	if (table && hash) {
675
676
		unsigned int index = hash & table->mask;
		u32 val = hash & ~rps_cpu_mask;
Tom Herbert's avatar
Tom Herbert committed
677

Bjorn Helgaas's avatar
Bjorn Helgaas committed
678
		/* We only give a hint, preemption can change CPU under us */
679
		val |= raw_smp_processor_id();
Tom Herbert's avatar
Tom Herbert committed
680

681
682
		if (table->ents[index] != val)
			table->ents[index] = val;
Tom Herbert's avatar
Tom Herbert committed
683
684
685
	}
}

686
#ifdef CONFIG_RFS_ACCEL
687
688
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
			 u16 filter_id);
689
#endif
690
#endif /* CONFIG_RPS */
691

Tom Herbert's avatar
Tom Herbert committed
692
693
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
694
#ifdef CONFIG_RPS
Eric Dumazet's avatar
Eric Dumazet committed
695
696
	struct rps_map __rcu		*rps_map;
	struct rps_dev_flow_table __rcu	*rps_flow_table;
697
#endif
Eric Dumazet's avatar
Eric Dumazet committed
698
	struct kobject			kobj;
699
	struct net_device		*dev;
Tom Herbert's avatar
Tom Herbert committed
700
} ____cacheline_aligned_in_smp;
701
702
703
704
705
706
707
708
709
710
711

/*
 * RX queue sysfs structures and functions.
 */
struct rx_queue_attribute {
	struct attribute attr;
	ssize_t (*show)(struct netdev_rx_queue *queue,
	    struct rx_queue_attribute *attr, char *buf);
	ssize_t (*store)(struct netdev_rx_queue *queue,
	    struct rx_queue_attribute *attr, const char *buf, size_t len);
};
712

Tom Herbert's avatar
Tom Herbert committed
713
714
715
716
717
718
719
720
721
722
723
#ifdef CONFIG_XPS
/*
 * This structure holds an XPS map which can be of variable length.  The
 * map is an array of queues.
 */
struct xps_map {
	unsigned int len;
	unsigned int alloc_len;
	struct rcu_head rcu;
	u16 queues[0];
};
724
#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
725
726
#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
       - sizeof(struct xps_map)) / sizeof(u16))
Tom Herbert's avatar
Tom Herbert committed
727
728
729
730
731
732

/*
 * This structure holds all XPS maps for device.  Maps are indexed by CPU.
 */
struct xps_dev_maps {
	struct rcu_head rcu;
Eric Dumazet's avatar
Eric Dumazet committed
733
	struct xps_map __rcu *cpu_map[0];
Tom Herbert's avatar
Tom Herbert committed
734
};
735
736
#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
Tom Herbert's avatar
Tom Herbert committed
737
738
#endif /* CONFIG_XPS */

739
740
741
742
743
744
745
746
#define TC_MAX_QUEUE	16
#define TC_BITMASK	15
/* HW offloaded queuing disciplines txq count and offset maps */
struct netdev_tc_txq {
	u16 count;
	u16 offset;
};

747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
/*
 * This structure is to hold information about the device
 * configured to run FCoE protocol stack.
 */
struct netdev_fcoe_hbainfo {
	char	manufacturer[64];
	char	serial_number[64];
	char	hardware_version[64];
	char	driver_version[64];
	char	optionrom_version[64];
	char	firmware_version[64];
	char	model[256];
	char	model_description[256];
};
#endif

764
#define MAX_PHYS_ITEM_ID_LEN 32
765

766
767
/* This structure holds a unique identifier to identify some
 * physical item (port for example) used by a netdevice.
768
 */
769
770
struct netdev_phys_item_id {
	unsigned char id[MAX_PHYS_ITEM_ID_LEN];
771
772
773
	unsigned char id_len;
};

774
775
776
777
778
779
780
static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
					    struct netdev_phys_item_id *b)
{
	return a->id_len == b->id_len &&
	       memcmp(a->id, b->id, a->id_len) == 0;
}

781
782
783
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
				       struct sk_buff *skb);

784
/* These structures hold the attributes of qdisc and classifiers
785
786
787
788
 * that are being passed to the netdevice through the setup_tc op.
 */
enum {
	TC_SETUP_MQPRIO,
789
	TC_SETUP_CLSU32,
790
	TC_SETUP_CLSFLOWER,
791
	TC_SETUP_MATCHALL,
792
	TC_SETUP_CLSBPF,
793
794
};

795
796
struct tc_cls_u32_offload;

797
798
799
800
struct tc_to_netdev {
	unsigned int type;
	union {
		u8 tc;
801
		struct tc_cls_u32_offload *cls_u32;
802
		struct tc_cls_flower_offload *cls_flower;
803
		struct tc_cls_matchall_offload *cls_mall;
804
		struct tc_cls_bpf_offload *cls_bpf;
805
806
807
	};
};

808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
/* These structures hold the attributes of xdp state that are being passed
 * to the netdevice through the xdp op.
 */
enum xdp_netdev_command {
	/* Set or clear a bpf program used in the earliest stages of packet
	 * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee
	 * is responsible for calling bpf_prog_put on any old progs that are
	 * stored. In case of error, the callee need not release the new prog
	 * reference, but on success it takes ownership and must bpf_prog_put
	 * when it is no longer used.
	 */
	XDP_SETUP_PROG,
	/* Check if a bpf program is set on the device.  The callee should
	 * return true if a program is currently attached and running.
	 */
	XDP_QUERY_PROG,
};

struct netdev_xdp {
	enum xdp_netdev_command command;
	union {
		/* XDP_SETUP_PROG */
		struct bpf_prog *prog;
		/* XDP_QUERY_PROG */
		bool prog_attached;
	};
};
835

836
837
/*
 * This structure defines the management hooks for network devices.
838
839
 * The following hooks can be defined; unless noted otherwise, they are
 * optional and can be filled with a null pointer.
840
841
 *
 * int (*ndo_init)(struct net_device *dev);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
842
843
844
845
 *     This function is called once when a network device is registered.
 *     The network device can use this for any late stage initialization
 *     or semantic validation. It can fail with an error code which will
 *     be propagated back to register_netdev.
846
847
848
849
850
851
 *
 * void (*ndo_uninit)(struct net_device *dev);
 *     This function is called when device is unregistered or when registration
 *     fails. It is not called if init fails.
 *
 * int (*ndo_open)(struct net_device *dev);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
852
 *     This function is called when a network device transitions to the up
853
854
855
 *     state.
 *
 * int (*ndo_stop)(struct net_device *dev);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
856
 *     This function is called when a network device transitions to the down
857
858
 *     state.
 *
859
860
 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
 *                               struct net_device *dev);
861
 *	Called when a packet needs to be transmitted.
862
863
864
865
 *	Returns NETDEV_TX_OK.  Can return NETDEV_TX_BUSY, but you should stop
 *	the queue before that can happen; it's for obsolete devices and weird
 *	corner cases, but the stack really does a non-trivial amount
 *	of useless work if you return NETDEV_TX_BUSY.
Bjorn Helgaas's avatar
Bjorn Helgaas committed
866
 *	Required; cannot be NULL.
867
 *
868
869
870
871
872
873
 * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
 *		netdev_features_t features);
 *	Adjusts the requested feature flags according to device-specific
 *	constraints, and returns the resulting flags. Must not modify
 *	the device state.
 *
874
 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
875
 *                         void *accel_priv, select_queue_fallback_t fallback);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
876
 *	Called to decide which queue to use when device supports multiple
877
878
 *	transmit queues.
 *
879
880
 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
 *	This function is called to allow device receiver to make
Bjorn Helgaas's avatar
Bjorn Helgaas committed
881
 *	changes to configuration when multicast or promiscuous is enabled.
882
883
884
 *
 * void (*ndo_set_rx_mode)(struct net_device *dev);
 *	This function is called device changes address list filtering.
885
 *	If driver handles unicast address filtering, it should set
Bjorn Helgaas's avatar
Bjorn Helgaas committed
886
 *	IFF_UNICAST_FLT in its priv_flags.
887
888
889
 *
 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
 *	This function  is called when the Media Access Control address
890
 *	needs to be changed. If this interface is not defined, the
Bjorn Helgaas's avatar
Bjorn Helgaas committed
891
 *	MAC address can not be changed.
892
893
894
895
896
 *
 * int (*ndo_validate_addr)(struct net_device *dev);
 *	Test if Media Access Control address is valid for the device.
 *
 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
897
898
 *	Called when a user requests an ioctl which can't be handled by
 *	the generic interface code. If not defined ioctls return
899
900
901
902
 *	not supported error code.
 *
 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
 *	Used to set network devices bus interface parameters. This interface
Bjorn Helgaas's avatar
Bjorn Helgaas committed
903
 *	is retained for legacy reasons; new devices should use the bus
904
905
906
907
908
909
910
 *	interface (PCI) for low level management.
 *
 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
 *	Called when a user wants to change the Maximum Transfer Unit
 *	of a device. If not defined, any request to change MTU will
 *	will return an error.
 *
911
 * void (*ndo_tx_timeout)(struct net_device *dev);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
912
 *	Callback used when the transmitter has not made any progress
913
914
 *	for dev->watchdog ticks.
 *
915
 * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
916
 *                      struct rtnl_link_stats64 *storage);
917
 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
918
 *	Called when a user wants to get the network device usage
919
 *	statistics. Drivers must do one of the following:
920
921
 *	1. Define @ndo_get_stats64 to fill in a zero-initialised
 *	   rtnl_link_stats64 structure passed by the caller.
922
 *	2. Define @ndo_get_stats to update a net_device_stats structure
923
924
925
926
927
 *	   (which should normally be dev->stats) and return a pointer to
 *	   it. The structure may be changed asynchronously only if each
 *	   field is written atomically.
 *	3. Update dev->stats asynchronously and atomically, and define
 *	   neither operation.
928
 *
929
930
931
932
933
934
935
936
 * bool (*ndo_has_offload_stats)(int attr_id)
 *	Return true if this device supports offload stats of this attr_id.
 *
 * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
 *	void *attr_data)
 *	Get statistics for offload operations by attr_id. Write it into the
 *	attr_data pointer.
 *
937
 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
938
 *	If device supports VLAN filtering this function is called when a
939
 *	VLAN id is registered.
940
 *
941
 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
Bjorn Helgaas's avatar
Bjorn Helgaas committed
942
 *	If device supports VLAN filtering this function is called when a
943
 *	VLAN id is unregistered.
944
945
 *
 * void (*ndo_poll_controller)(struct net_device *dev);
946
947
948
 *
 *	SR-IOV management functions.
 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
949
950
 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
 *			  u8 qos, __be16 proto);
951
952
 * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
 *			  int max_tx_rate);
953
 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
954
 * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
955
956
 * int (*ndo_get_vf_config)(struct net_device *dev,
 *			    int vf, struct ifla_vf_info *ivf);
957
 * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
958
959
 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
 *			  struct nlattr *port[]);
960
961
962
 *
 *      Enable or disable the VF ability to query its RSS Redirection Table and
 *      Hash Key. This is needed since on some devices VF share this information
Bjorn Helgaas's avatar
Bjorn Helgaas committed
963
 *      with PF and querying it may introduce a theoretical security risk.
964
 * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
965
 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
966
967
968
969
970
 * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
 * 	Called to setup 'tc' number of traffic classes in the net device. This
 * 	is always called from the stack with the rtnl lock held and netif tx
 * 	queues stopped. This allows the netdevice to perform queue management
 * 	safely.
971
 *
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
 *	Fiber Channel over Ethernet (FCoE) offload functions.
 * int (*ndo_fcoe_enable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to start using LLD for FCoE
 *	so the underlying device can perform whatever needed configuration or
 *	initialization to support acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_disable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to stop using LLD for FCoE
 *	so the underlying device can perform whatever needed clean-ups to
 *	stop supporting acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid,
 *			     struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Initiator wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_ddp_done)(struct net_device *dev,  u16 xid);
 *	Called when the FCoE Initiator/Target is done with the DDPed I/O as
 *	indicated by the FC exchange id 'xid', so the underlying device can
 *	clean up and reuse resources for later DDP requests.
 *
 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid,
 *			      struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Target wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
1002
1003
1004
1005
1006
1007
1008
 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
 *			       struct netdev_fcoe_hbainfo *hbainfo);
 *	Called when the FCoE Protocol stack wants information on the underlying
 *	device. This information is utilized by the FCoE protocol stack to
 *	register attributes with Fiber Channel management service as per the
 *	FC-GS Fabric Device Management Information(FDMI) specification.
 *
1009
1010
1011
1012
1013
1014
 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
 *	Called when the underlying device wants to override default World Wide
 *	Name (WWN) generation mechanism in FCoE protocol stack to pass its own
 *	World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE
 *	protocol stack to use.
 *
1015
1016
1017
1018
1019
1020
 *	RFS acceleration.
 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
 *			    u16 rxq_index, u32 flow_id);
 *	Set hardware filter for RFS.  rxq_index is the target queue index;
 *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
 *	Return the filter ID on success, or a negative error code.
1021
 *
1022
 *	Slave management functions (for bridge, bonding, etc).
1023
1024
1025
1026
1027
 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to make another netdev an underling.
 *
 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to release previously enslaved netdev.
1028
1029
 *
 *      Feature/offload setting functions.
1030
 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
1031
1032
1033
1034
 *	Called to update device configuration to new features. Passed
 *	feature set might be less than what was returned by ndo_fix_features()).
 *	Must return >0 or -errno if it changed dev->features itself.
 *
1035
1036
 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
1037
 *		      const unsigned char *addr, u16 vid, u16 flags)
1038
 *	Adds an FDB entry to dev for addr.
1039
1040
 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
1041
 *		      const unsigned char *addr, u16 vid)
1042
1043
 *	Deletes the FDB entry from dev coresponding to addr.
 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
1044
 *		       struct net_device *dev, struct net_device *filter_dev,
1045
 *		       int *idx)
1046
1047
 *	Used to add FDB entries to dump requests. Implementers should add
 *	entries to skb and update idx with the number of entries.
1048
 *
1049
1050
 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags)
1051
 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
1052
1053
 *			     struct net_device *dev, u32 filter_mask,
 *			     int nlflags)
1054
1055
 * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags);
Jiri Pirko's avatar
Jiri Pirko committed
1056
1057
1058
1059
1060
1061
1062
1063
 *
 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
 *	Called to change device carrier. Soft-devices (like dummy, team, etc)
 *	which do not represent real hardware may define this to allow their
 *	userspace components to manage their virtual carrier state. Devices
 *	that determine carrier state from physical hardware properties (eg
 *	network cables) or protocol-dependent mechanisms (eg
 *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
1064
1065
 *
 * int (*ndo_get_phys_port_id)(struct net_device *dev,
1066
 *			       struct netdev_phys_item_id *ppid);
1067
1068
1069
 *	Called to get ID of physical port of this device. If driver does
 *	not implement this, it is assumed that the hw is not able to have
 *	multiple net devices on single physical port.
1070
 *
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
 * void (*ndo_udp_tunnel_add)(struct net_device *dev,
 *			      struct udp_tunnel_info *ti);
 *	Called by UDP tunnel to notify a driver about the UDP port and socket
 *	address family that a UDP tunnel is listnening to. It is called only
 *	when a new port starts listening. The operation is protected by the
 *	RTNL.
 *
 * void (*ndo_udp_tunnel_del)(struct net_device *dev,
 *			      struct udp_tunnel_info *ti);
 *	Called by UDP tunnel to notify the driver about a UDP port and socket
 *	address family that the UDP tunnel is not listening to anymore. The
 *	operation is protected by the RTNL.
 *
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
 * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
 *				 struct net_device *dev)
 *	Called by upper layer devices to accelerate switching or other
 *	station functionality into hardware. 'pdev is the lowerdev
 *	to use for the offload and 'dev' is the net device that will
 *	back the offload. Returns a pointer to the private structure
 *	the upper layer will maintain.
 * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
 *	Called by upper layer device to delete the station created
 *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
 *	the station and priv is the structure returned by the add
 *	operation.
 * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
 *				      struct net_device *dev,
 *				      void *priv);
 *	Callback to use for xmit over the accelerated station. This
 *	is used in place of ndo_start_xmit on accelerated net
 *	devices.
Bjorn Helgaas's avatar
Bjorn Helgaas committed
1102
1103
1104
 * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
 *					   struct net_device *dev
 *					   netdev_features_t features);
Tom Herbert's avatar
Tom Herbert committed
1105
 *	Called by core transmit path to determine if device is capable of
1106
1107
1108
1109
1110
 *	performing offload operations on a given packet. This is to give
 *	the device an opportunity to implement any restrictions that cannot
 *	be otherwise expressed by feature flags. The check is called with
 *	the set of features that the stack has calculated and it returns
 *	those the driver believes to be appropriate.
1111
1112
1113
1114
 * int (*ndo_set_tx_maxrate)(struct net_device *dev,
 *			     int queue_index, u32 maxrate);
 *	Called when a user wants to set a max-rate limitation of specific
 *	TX queue.
1115
1116
 * int (*ndo_get_iflink)(const struct net_device *dev);
 *	Called to get the iflink value of this device.
1117
 * void (*ndo_change_proto_down)(struct net_device *dev,
Bjorn Helgaas's avatar
Bjorn Helgaas committed
1118
 *				 bool proto_down);
1119
1120
1121
 *	This function is used to pass protocol port error state information
 *	to the switch driver. The switch driver can react to the proto_down
 *      by doing a phys down on the associated switch port.
1122
1123
1124
1125
 * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
 *	This function is used to get egress tunnel information for given skb.
 *	This is useful for retrieving outer tunnel header parameters while
 *	sampling packet.
1126
1127
1128
1129
 * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
 *	This function is used to specify the headroom that the skb must
 *	consider when allocation skb during packet reception. Setting
 *	appropriate rx headroom value allows avoiding skb head copy on
Bjorn Helgaas's avatar
Bjorn Helgaas committed
1130
 *	forward. Setting a negative value resets the rx headroom to the
1131
 *	default value.
1132
1133
1134
 * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
 *	This function is used to set or query state related to XDP on the
 *	netdevice. See definition of enum xdp_netdev_command for details.
1135
 *
1136
1137
1138
1139
1140
1141
 */
struct net_device_ops {
	int			(*ndo_init)(struct net_device *dev);
	void			(*ndo_uninit)(struct net_device *dev);
	int			(*ndo_open)(struct net_device *dev);
	int			(*ndo_stop)(struct net_device *dev);
1142
1143
1144
1145
1146
	netdev_tx_t		(*ndo_start_xmit)(struct sk_buff *skb,
						  struct net_device *dev);
	netdev_features_t	(*ndo_features_check)(struct sk_buff *skb,
						      struct net_device *dev,
						      netdev_features_t features);
1147
	u16			(*ndo_select_queue)(struct net_device *dev,
1148
						    struct sk_buff *skb,
1149
1150
						    void *accel_priv,
						    select_queue_fallback_t fallback);
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
	void			(*ndo_change_rx_flags)(struct net_device *dev,
						       int flags);
	void			(*ndo_set_rx_mode)(struct net_device *dev);
	int			(*ndo_set_mac_address)(struct net_device *dev,
						       void *addr);
	int			(*ndo_validate_addr)(struct net_device *dev);
	int			(*ndo_do_ioctl)(struct net_device *dev,
					        struct ifreq *ifr, int cmd);
	int			(*ndo_set_config)(struct net_device *dev,