ip_gre.c 42.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 *	Linux NET3:	GRE over IP protocol decoder.
Linus Torvalds's avatar
Linus Torvalds committed
3
4
5
6
7
8
9
10
11
12
 *
 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 */

13
14
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

15
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
16
17
18
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
19
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <asm/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
33
#include <linux/etherdevice.h>
34
#include <linux/if_ether.h>
Linus Torvalds's avatar
Linus Torvalds committed
35
36
37
38
39
40
41
42
43
44
45

#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/ipip.h>
#include <net/arp.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
46
47
#include <net/net_namespace.h>
#include <net/netns/generic.h>
Herbert Xu's avatar
Herbert Xu committed
48
#include <net/rtnetlink.h>
49
#include <net/gre.h>
Linus Torvalds's avatar
Linus Torvalds committed
50

51
#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds's avatar
Linus Torvalds committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#endif

/*
   Problems & solutions
   --------------------

   1. The most important issue is detecting local dead loops.
   They would cause complete host lockup in transmit, which
   would be "resolved" by stack overflow or, if queueing is enabled,
   with infinite looping in net_bh.

   We cannot track such dead loops during route installation,
   it is infeasible task. The most general solutions would be
   to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet's avatar
Eric Dumazet committed
69
   and silently drop packet when it expires. It is a good
70
   solution, but it supposes maintaining new variable in ALL
Linus Torvalds's avatar
Linus Torvalds committed
71
72
   skb, even if no tunneling is used.

Eric Dumazet's avatar
Eric Dumazet committed
73
74
75
   Current solution: xmit_recursion breaks dead loops. This is a percpu
   counter, since when we enter the first ndo_xmit(), cpu migration is
   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds's avatar
Linus Torvalds committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

   2. Networking dead loops would not kill routers, but would really
   kill network. IP hop limit plays role of "t->recursion" in this case,
   if we copy it from packet being encapsulated to upper header.
   It is very good solution, but it introduces two problems:

   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
     do not work over tunnels.
   - traceroute does not work. I planned to relay ICMP from tunnel,
     so that this problem would be solved and traceroute output
     would even more informative. This idea appeared to be wrong:
     only Linux complies to rfc1812 now (yes, guys, Linux is the only
     true router now :-)), all routers (at least, in neighbourhood of mine)
     return only 8 bytes of payload. It is the end.

   Hence, if we want that OSPF worked or traceroute said something reasonable,
   we should search for another solution.

   One of them is to parse packet trying to detect inner encapsulation
   made by our node. It is difficult or even impossible, especially,
96
   taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds's avatar
Linus Torvalds committed
97
98
99
100
101
102

   Current solution: The solution was UNEXPECTEDLY SIMPLE.
   We force DF flag on tunnels with preconfigured hop limit,
   that is ALL. :-) Well, it does not remove the problem completely,
   but exponential growth of network traffic is changed to linear
   (branches, that exceed pmtu are pruned) and tunnel mtu
103
   rapidly degrades to value <68, where looping stops.
Linus Torvalds's avatar
Linus Torvalds committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
   Yes, it is not good if there exists a router in the loop,
   which does not force DF, even when encapsulating packets have DF set.
   But it is not our problem! Nobody could accuse us, we made
   all that we could make. Even if it is your gated who injected
   fatal route to network, even if it were you who configured
   fatal static route: you are innocent. :-)



   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
   practically identical code. It would be good to glue them
   together, but it is not very evident, how to make them modular.
   sit is integral part of IPv6, ipip and gre are naturally modular.
   We could extract common parts (hash table, ioctl etc)
   to a separate module (ip_tunnel.c).

   Alexey Kuznetsov.
 */

Herbert Xu's avatar
Herbert Xu committed
123
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
124
125
static int ipgre_tunnel_init(struct net_device *dev);
static void ipgre_tunnel_setup(struct net_device *dev);
126
static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds's avatar
Linus Torvalds committed
127
128
129

/* Fallback tunnel: no source, no destination, no key, no options */

130
131
#define HASH_SIZE  16

132
static int ipgre_net_id __read_mostly;
133
struct ipgre_net {
Eric Dumazet's avatar
Eric Dumazet committed
134
	struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
135

136
	struct net_device *fb_tunnel_dev;
137
138
};

Linus Torvalds's avatar
Linus Torvalds committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/* Tunnel hash table */

/*
   4 hash tables:

   3: (remote,local)
   2: (remote,*)
   1: (*,local)
   0: (*,*)

   We require exact key match i.e. if a key is present in packet
   it will match only tunnel with the same key; if it is not present,
   it will match only keyless tunnel.

   All keysless packets, if not matched configured keyless tunnels
   will match fallback tunnel.
 */

Al Viro's avatar
Al Viro committed
157
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds's avatar
Linus Torvalds committed
158

159
160
161
162
#define tunnels_r_l	tunnels[3]
#define tunnels_r	tunnels[2]
#define tunnels_l	tunnels[1]
#define tunnels_wc	tunnels[0]
163
/*
Eric Dumazet's avatar
Eric Dumazet committed
164
 * Locking : hash tables are protected by RCU and RTNL
165
 */
Linus Torvalds's avatar
Linus Torvalds committed
166

167
168
#define for_each_ip_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds's avatar
Linus Torvalds committed
169

Eric Dumazet's avatar
Eric Dumazet committed
170
171
172
173
174
175
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
	unsigned long	rx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_packets;
	unsigned long	tx_bytes;
Eric Dumazet's avatar
Eric Dumazet committed
176
} __attribute__((aligned(4*sizeof(unsigned long))));
Eric Dumazet's avatar
Eric Dumazet committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
{
	struct pcpu_tstats sum = { 0 };
	int i;

	for_each_possible_cpu(i) {
		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);

		sum.rx_packets += tstats->rx_packets;
		sum.rx_bytes   += tstats->rx_bytes;
		sum.tx_packets += tstats->tx_packets;
		sum.tx_bytes   += tstats->tx_bytes;
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

Linus Torvalds's avatar
Linus Torvalds committed
198
199
/* Given src, dst and key, find appropriate for input tunnel. */

200
static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
201
202
					      __be32 remote, __be32 local,
					      __be32 key, __be16 gre_proto)
Linus Torvalds's avatar
Linus Torvalds committed
203
{
204
205
	struct net *net = dev_net(dev);
	int link = dev->ifindex;
Eric Dumazet's avatar
Eric Dumazet committed
206
207
	unsigned int h0 = HASH(remote);
	unsigned int h1 = HASH(key);
Timo Teras's avatar
Timo Teras committed
208
	struct ip_tunnel *t, *cand = NULL;
209
	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
210
211
	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
		       ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Teras's avatar
Timo Teras committed
212
	int score, cand_score = 4;
Linus Torvalds's avatar
Linus Torvalds committed
213

214
	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
215
216
217
218
219
220
221
222
223
224
		if (local != t->parms.iph.saddr ||
		    remote != t->parms.iph.daddr ||
		    key != t->parms.i_key ||
		    !(t->dev->flags & IFF_UP))
			continue;

		if (t->dev->type != ARPHRD_IPGRE &&
		    t->dev->type != dev_type)
			continue;

Timo Teras's avatar
Timo Teras committed
225
		score = 0;
226
		if (t->parms.link != link)
Timo Teras's avatar
Timo Teras committed
227
			score |= 1;
228
		if (t->dev->type != dev_type)
Timo Teras's avatar
Timo Teras committed
229
230
			score |= 2;
		if (score == 0)
231
			return t;
Timo Teras's avatar
Timo Teras committed
232
233
234
235
236

		if (score < cand_score) {
			cand = t;
			cand_score = score;
		}
Linus Torvalds's avatar
Linus Torvalds committed
237
	}
238

239
	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
240
241
242
243
244
245
246
247
248
		if (remote != t->parms.iph.daddr ||
		    key != t->parms.i_key ||
		    !(t->dev->flags & IFF_UP))
			continue;

		if (t->dev->type != ARPHRD_IPGRE &&
		    t->dev->type != dev_type)
			continue;

Timo Teras's avatar
Timo Teras committed
249
		score = 0;
250
		if (t->parms.link != link)
Timo Teras's avatar
Timo Teras committed
251
			score |= 1;
252
		if (t->dev->type != dev_type)
Timo Teras's avatar
Timo Teras committed
253
254
			score |= 2;
		if (score == 0)
255
			return t;
Timo Teras's avatar
Timo Teras committed
256
257
258
259
260

		if (score < cand_score) {
			cand = t;
			cand_score = score;
		}
Linus Torvalds's avatar
Linus Torvalds committed
261
	}
262

263
	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
264
265
266
267
268
269
270
271
272
273
274
		if ((local != t->parms.iph.saddr &&
		     (local != t->parms.iph.daddr ||
		      !ipv4_is_multicast(local))) ||
		    key != t->parms.i_key ||
		    !(t->dev->flags & IFF_UP))
			continue;

		if (t->dev->type != ARPHRD_IPGRE &&
		    t->dev->type != dev_type)
			continue;

Timo Teras's avatar
Timo Teras committed
275
		score = 0;
276
		if (t->parms.link != link)
Timo Teras's avatar
Timo Teras committed
277
			score |= 1;
278
		if (t->dev->type != dev_type)
Timo Teras's avatar
Timo Teras committed
279
280
			score |= 2;
		if (score == 0)
281
			return t;
Timo Teras's avatar
Timo Teras committed
282
283
284
285
286

		if (score < cand_score) {
			cand = t;
			cand_score = score;
		}
Linus Torvalds's avatar
Linus Torvalds committed
287
	}
288

289
	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
290
291
292
293
294
295
296
297
		if (t->parms.i_key != key ||
		    !(t->dev->flags & IFF_UP))
			continue;

		if (t->dev->type != ARPHRD_IPGRE &&
		    t->dev->type != dev_type)
			continue;

Timo Teras's avatar
Timo Teras committed
298
		score = 0;
299
		if (t->parms.link != link)
Timo Teras's avatar
Timo Teras committed
300
			score |= 1;
301
		if (t->dev->type != dev_type)
Timo Teras's avatar
Timo Teras committed
302
303
			score |= 2;
		if (score == 0)
304
			return t;
Timo Teras's avatar
Timo Teras committed
305
306
307
308
309

		if (score < cand_score) {
			cand = t;
			cand_score = score;
		}
Linus Torvalds's avatar
Linus Torvalds committed
310
311
	}

Timo Teras's avatar
Timo Teras committed
312
313
	if (cand != NULL)
		return cand;
314

315
316
317
	dev = ign->fb_tunnel_dev;
	if (dev->flags & IFF_UP)
		return netdev_priv(dev);
318

Linus Torvalds's avatar
Linus Torvalds committed
319
320
321
	return NULL;
}

Eric Dumazet's avatar
Eric Dumazet committed
322
static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
323
		struct ip_tunnel_parm *parms)
Linus Torvalds's avatar
Linus Torvalds committed
324
{
325
326
327
	__be32 remote = parms->iph.daddr;
	__be32 local = parms->iph.saddr;
	__be32 key = parms->i_key;
Eric Dumazet's avatar
Eric Dumazet committed
328
	unsigned int h = HASH(key);
Linus Torvalds's avatar
Linus Torvalds committed
329
330
331
332
	int prio = 0;

	if (local)
		prio |= 1;
333
	if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds's avatar
Linus Torvalds committed
334
335
336
337
		prio |= 2;
		h ^= HASH(remote);
	}

338
	return &ign->tunnels[prio][h];
Linus Torvalds's avatar
Linus Torvalds committed
339
340
}

Eric Dumazet's avatar
Eric Dumazet committed
341
static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
342
		struct ip_tunnel *t)
343
{
344
	return __ipgre_bucket(ign, &t->parms);
345
346
}

347
static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds's avatar
Linus Torvalds committed
348
{
Eric Dumazet's avatar
Eric Dumazet committed
349
	struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds's avatar
Linus Torvalds committed
350

Eric Dumazet's avatar
Eric Dumazet committed
351
	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
352
	rcu_assign_pointer(*tp, t);
Linus Torvalds's avatar
Linus Torvalds committed
353
354
}

355
static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds's avatar
Linus Torvalds committed
356
{
Eric Dumazet's avatar
Eric Dumazet committed
357
358
359
360
361
362
363
364
	struct ip_tunnel __rcu **tp;
	struct ip_tunnel *iter;

	for (tp = ipgre_bucket(ign, t);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
			rcu_assign_pointer(*tp, t->next);
Linus Torvalds's avatar
Linus Torvalds committed
365
366
367
368
369
			break;
		}
	}
}

370
371
372
static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
					   struct ip_tunnel_parm *parms,
					   int type)
Linus Torvalds's avatar
Linus Torvalds committed
373
{
Al Viro's avatar
Al Viro committed
374
375
376
	__be32 remote = parms->iph.daddr;
	__be32 local = parms->iph.saddr;
	__be32 key = parms->i_key;
377
	int link = parms->link;
Eric Dumazet's avatar
Eric Dumazet committed
378
379
	struct ip_tunnel *t;
	struct ip_tunnel __rcu **tp;
380
381
	struct ipgre_net *ign = net_generic(net, ipgre_net_id);

Eric Dumazet's avatar
Eric Dumazet committed
382
383
384
	for (tp = __ipgre_bucket(ign, parms);
	     (t = rtnl_dereference(*tp)) != NULL;
	     tp = &t->next)
385
386
387
		if (local == t->parms.iph.saddr &&
		    remote == t->parms.iph.daddr &&
		    key == t->parms.i_key &&
388
		    link == t->parms.link &&
389
390
391
392
393
394
		    type == t->dev->type)
			break;

	return t;
}

Eric Dumazet's avatar
Eric Dumazet committed
395
static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
396
397
398
		struct ip_tunnel_parm *parms, int create)
{
	struct ip_tunnel *t, *nt;
Linus Torvalds's avatar
Linus Torvalds committed
399
400
	struct net_device *dev;
	char name[IFNAMSIZ];
401
	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
402

403
404
405
	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
	if (t || !create)
		return t;
Linus Torvalds's avatar
Linus Torvalds committed
406
407
408

	if (parms->name[0])
		strlcpy(name, parms->name, IFNAMSIZ);
409
	else
stephen hemminger's avatar
stephen hemminger committed
410
		strcpy(name, "gre%d");
Linus Torvalds's avatar
Linus Torvalds committed
411
412
413

	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
	if (!dev)
stephen hemminger's avatar
stephen hemminger committed
414
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
415

416
417
	dev_net_set(dev, net);

418
	nt = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
419
	nt->parms = *parms;
Herbert Xu's avatar
Herbert Xu committed
420
	dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds's avatar
Linus Torvalds committed
421

422
423
	dev->mtu = ipgre_tunnel_bind_dev(dev);

424
425
	if (register_netdevice(dev) < 0)
		goto failed_free;
Linus Torvalds's avatar
Linus Torvalds committed
426

427
428
429
430
	/* Can use a lockless transmit, unless we generate output sequences */
	if (!(nt->parms.o_flags & GRE_SEQ))
		dev->features |= NETIF_F_LLTX;

Linus Torvalds's avatar
Linus Torvalds committed
431
	dev_hold(dev);
432
	ipgre_tunnel_link(ign, nt);
Linus Torvalds's avatar
Linus Torvalds committed
433
434
	return nt;

435
436
failed_free:
	free_netdev(dev);
Linus Torvalds's avatar
Linus Torvalds committed
437
438
439
440
441
	return NULL;
}

static void ipgre_tunnel_uninit(struct net_device *dev)
{
442
443
444
445
	struct net *net = dev_net(dev);
	struct ipgre_net *ign = net_generic(net, ipgre_net_id);

	ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds's avatar
Linus Torvalds committed
446
447
448
449
450
451
452
	dev_put(dev);
}


static void ipgre_err(struct sk_buff *skb, u32 info)
{

453
/* All the routers (except for Linux) return only
Linus Torvalds's avatar
Linus Torvalds committed
454
455
456
457
458
459
460
461
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.

   Moreover, Cisco "wise men" put GRE key to the third word
   in GRE header. It makes impossible maintaining even soft state for keyed
   GRE tunnels with enabled checksum. Tell them "thank you".

   Well, I wonder, rfc1812 was written by Cisco employee,
462
463
   what the hell these idiots break standards established
   by themselves???
Linus Torvalds's avatar
Linus Torvalds committed
464
465
 */

466
	const struct iphdr *iph = (const struct iphdr *)skb->data;
Al Viro's avatar
Al Viro committed
467
	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds's avatar
Linus Torvalds committed
468
	int grehlen = (iph->ihl<<2) + 4;
469
470
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
Linus Torvalds's avatar
Linus Torvalds committed
471
	struct ip_tunnel *t;
Al Viro's avatar
Al Viro committed
472
	__be16 flags;
Linus Torvalds's avatar
Linus Torvalds committed
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516

	flags = p[0];
	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
		if (flags&(GRE_VERSION|GRE_ROUTING))
			return;
		if (flags&GRE_KEY) {
			grehlen += 4;
			if (flags&GRE_CSUM)
				grehlen += 4;
		}
	}

	/* If only 8 bytes returned, keyed message will be dropped here */
	if (skb_headlen(skb) < grehlen)
		return;

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
		return;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
			return;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

517
	rcu_read_lock();
518
	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
519
520
521
				flags & GRE_KEY ?
				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
				p[1]);
522
523
	if (t == NULL || t->parms.iph.daddr == 0 ||
	    ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds's avatar
Linus Torvalds committed
524
525
526
527
528
		goto out;

	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		goto out;

529
	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds's avatar
Linus Torvalds committed
530
531
532
533
534
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
out:
535
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
536
537
}

538
static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
539
540
541
{
	if (INET_ECN_is_ce(iph->tos)) {
		if (skb->protocol == htons(ETH_P_IP)) {
542
			IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed
543
		} else if (skb->protocol == htons(ETH_P_IPV6)) {
544
			IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed
545
546
547
548
549
		}
	}
}

static inline u8
550
ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
551
552
553
554
555
{
	u8 inner = 0;
	if (skb->protocol == htons(ETH_P_IP))
		inner = old_iph->tos;
	else if (skb->protocol == htons(ETH_P_IPV6))
556
		inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds's avatar
Linus Torvalds committed
557
558
559
560
561
	return INET_ECN_encapsulate(tos, inner);
}

static int ipgre_rcv(struct sk_buff *skb)
{
562
	const struct iphdr *iph;
Linus Torvalds's avatar
Linus Torvalds committed
563
	u8     *h;
Al Viro's avatar
Al Viro committed
564
	__be16    flags;
565
	__sum16   csum = 0;
Al Viro's avatar
Al Viro committed
566
	__be32 key = 0;
Linus Torvalds's avatar
Linus Torvalds committed
567
568
569
	u32    seqno = 0;
	struct ip_tunnel *tunnel;
	int    offset = 4;
570
	__be16 gre_proto;
Linus Torvalds's avatar
Linus Torvalds committed
571
572
573
574

	if (!pskb_may_pull(skb, 16))
		goto drop_nolock;

575
	iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
576
	h = skb->data;
Al Viro's avatar
Al Viro committed
577
	flags = *(__be16*)h;
Linus Torvalds's avatar
Linus Torvalds committed
578
579
580
581
582
583
584
585
586

	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
		/* - Version must be 0.
		   - We do not support routing headers.
		 */
		if (flags&(GRE_VERSION|GRE_ROUTING))
			goto drop_nolock;

		if (flags&GRE_CSUM) {
587
			switch (skb->ip_summed) {
588
			case CHECKSUM_COMPLETE:
589
				csum = csum_fold(skb->csum);
590
591
592
593
594
595
				if (!csum)
					break;
				/* fall through */
			case CHECKSUM_NONE:
				skb->csum = 0;
				csum = __skb_checksum_complete(skb);
596
				skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds's avatar
Linus Torvalds committed
597
598
599
600
			}
			offset += 4;
		}
		if (flags&GRE_KEY) {
Al Viro's avatar
Al Viro committed
601
			key = *(__be32*)(h + offset);
Linus Torvalds's avatar
Linus Torvalds committed
602
603
604
			offset += 4;
		}
		if (flags&GRE_SEQ) {
Al Viro's avatar
Al Viro committed
605
			seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds's avatar
Linus Torvalds committed
606
607
608
609
			offset += 4;
		}
	}

610
611
	gre_proto = *(__be16 *)(h + 2);

612
	rcu_read_lock();
613
	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
614
615
					  iph->saddr, iph->daddr, key,
					  gre_proto))) {
Eric Dumazet's avatar
Eric Dumazet committed
616
		struct pcpu_tstats *tstats;
617

Linus Torvalds's avatar
Linus Torvalds committed
618
619
		secpath_reset(skb);

620
		skb->protocol = gre_proto;
Linus Torvalds's avatar
Linus Torvalds committed
621
622
623
624
		/* WCCP version 1 and 2 protocol decoding.
		 * - Change protocol to IP
		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
		 */
625
		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
626
			skb->protocol = htons(ETH_P_IP);
627
			if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds's avatar
Linus Torvalds committed
628
629
630
				offset += 4;
		}

631
		skb->mac_header = skb->network_header;
632
		__pskb_pull(skb, offset);
633
		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds's avatar
Linus Torvalds committed
634
635
		skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
636
		if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds's avatar
Linus Torvalds committed
637
			/* Looped back packet, drop it! */
638
			if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds's avatar
Linus Torvalds committed
639
				goto drop;
Eric Dumazet's avatar
Eric Dumazet committed
640
			tunnel->dev->stats.multicast++;
Linus Torvalds's avatar
Linus Torvalds committed
641
642
643
644
645
646
			skb->pkt_type = PACKET_BROADCAST;
		}
#endif

		if (((flags&GRE_CSUM) && csum) ||
		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazet's avatar
Eric Dumazet committed
647
648
			tunnel->dev->stats.rx_crc_errors++;
			tunnel->dev->stats.rx_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
649
650
651
652
653
			goto drop;
		}
		if (tunnel->parms.i_flags&GRE_SEQ) {
			if (!(flags&GRE_SEQ) ||
			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazet's avatar
Eric Dumazet committed
654
655
				tunnel->dev->stats.rx_fifo_errors++;
				tunnel->dev->stats.rx_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
656
657
658
659
				goto drop;
			}
			tunnel->i_seqno = seqno + 1;
		}
660
661
662
663

		/* Warning: All skb pointers will be invalidated! */
		if (tunnel->dev->type == ARPHRD_ETHER) {
			if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazet's avatar
Eric Dumazet committed
664
665
				tunnel->dev->stats.rx_length_errors++;
				tunnel->dev->stats.rx_errors++;
666
667
668
669
670
671
672
673
				goto drop;
			}

			iph = ip_hdr(skb);
			skb->protocol = eth_type_trans(skb, tunnel->dev);
			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
		}

Eric Dumazet's avatar
Eric Dumazet committed
674
675
676
677
678
		tstats = this_cpu_ptr(tunnel->dev->tstats);
		tstats->rx_packets++;
		tstats->rx_bytes += skb->len;

		__skb_tunnel_rx(skb, tunnel->dev);
679
680

		skb_reset_network_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
681
		ipgre_ecn_decapsulate(iph, skb);
682

683
		netif_rx(skb);
Eric Dumazet's avatar
Eric Dumazet committed
684

685
		rcu_read_unlock();
Eric Dumazet's avatar
Eric Dumazet committed
686
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
687
	}
688
	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds's avatar
Linus Torvalds committed
689
690

drop:
691
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
692
693
drop_nolock:
	kfree_skb(skb);
Eric Dumazet's avatar
Eric Dumazet committed
694
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
695
696
}

697
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
698
{
699
	struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazet's avatar
Eric Dumazet committed
700
	struct pcpu_tstats *tstats;
701
702
	const struct iphdr  *old_iph = ip_hdr(skb);
	const struct iphdr  *tiph;
703
	struct flowi4 fl4;
Linus Torvalds's avatar
Linus Torvalds committed
704
	u8     tos;
Al Viro's avatar
Al Viro committed
705
	__be16 df;
Linus Torvalds's avatar
Linus Torvalds committed
706
	struct rtable *rt;     			/* Route to the other host */
Eric Dumazet's avatar
Eric Dumazet committed
707
	struct net_device *tdev;		/* Device to other host */
Linus Torvalds's avatar
Linus Torvalds committed
708
	struct iphdr  *iph;			/* Our new IP header */
709
	unsigned int max_headroom;		/* The extra header space needed */
Linus Torvalds's avatar
Linus Torvalds committed
710
	int    gre_hlen;
Al Viro's avatar
Al Viro committed
711
	__be32 dst;
Linus Torvalds's avatar
Linus Torvalds committed
712
713
	int    mtu;

714
715
716
717
	if (dev->type == ARPHRD_ETHER)
		IPCB(skb)->flags = 0;

	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds's avatar
Linus Torvalds committed
718
		gre_hlen = 0;
719
		tiph = (const struct iphdr *)skb->data;
Linus Torvalds's avatar
Linus Torvalds committed
720
721
722
723
724
725
726
727
	} else {
		gre_hlen = tunnel->hlen;
		tiph = &tunnel->parms.iph;
	}

	if ((dst = tiph->daddr) == 0) {
		/* NBMA tunnel */

Eric Dumazet's avatar
Eric Dumazet committed
728
		if (skb_dst(skb) == NULL) {
Eric Dumazet's avatar
Eric Dumazet committed
729
			dev->stats.tx_fifo_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
730
731
732
			goto tx_error;
		}

733
		if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet's avatar
Eric Dumazet committed
734
			rt = skb_rtable(skb);
735
736
			dst = rt->rt_gateway;
		}
737
#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds's avatar
Linus Torvalds committed
738
		else if (skb->protocol == htons(ETH_P_IPV6)) {
739
			const struct in6_addr *addr6;
740
741
			struct neighbour *neigh;
			bool do_tx_error_icmp;
Linus Torvalds's avatar
Linus Torvalds committed
742
743
			int addr_type;

744
			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds's avatar
Linus Torvalds committed
745
746
747
			if (neigh == NULL)
				goto tx_error;

748
			addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds's avatar
Linus Torvalds committed
749
750
751
			addr_type = ipv6_addr_type(addr6);

			if (addr_type == IPV6_ADDR_ANY) {
752
				addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds's avatar
Linus Torvalds committed
753
754
755
756
				addr_type = ipv6_addr_type(addr6);
			}

			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
757
758
759
760
761
762
763
				do_tx_error_icmp = true;
			else {
				do_tx_error_icmp = false;
				dst = addr6->s6_addr32[3];
			}
			neigh_release(neigh);
			if (do_tx_error_icmp)
Linus Torvalds's avatar
Linus Torvalds committed
764
765
766
767
768
769
770
771
				goto tx_error_icmp;
		}
#endif
		else
			goto tx_error;
	}

	tos = tiph->tos;
772
773
	if (tos == 1) {
		tos = 0;
Linus Torvalds's avatar
Linus Torvalds committed
774
775
		if (skb->protocol == htons(ETH_P_IP))
			tos = old_iph->tos;
776
		else if (skb->protocol == htons(ETH_P_IPV6))
777
			tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds's avatar
Linus Torvalds committed
778
779
	}

780
	rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
781
782
783
784
785
				 tunnel->parms.o_key, RT_TOS(tos),
				 tunnel->parms.link);
	if (IS_ERR(rt)) {
		dev->stats.tx_carrier_errors++;
		goto tx_error;
Linus Torvalds's avatar
Linus Torvalds committed
786
	}
787
	tdev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed
788
789
790

	if (tdev == dev) {
		ip_rt_put(rt);
Eric Dumazet's avatar
Eric Dumazet committed
791
		dev->stats.collisions++;
Linus Torvalds's avatar
Linus Torvalds committed
792
793
794
795
796
		goto tx_error;
	}

	df = tiph->frag_off;
	if (df)
797
		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds's avatar
Linus Torvalds committed
798
	else
Eric Dumazet's avatar
Eric Dumazet committed
799
		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds's avatar
Linus Torvalds committed
800

Eric Dumazet's avatar
Eric Dumazet committed
801
802
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds's avatar
Linus Torvalds committed
803
804
805
806
807
808
809
810
811
812
813

	if (skb->protocol == htons(ETH_P_IP)) {
		df |= (old_iph->frag_off&htons(IP_DF));

		if ((old_iph->frag_off&htons(IP_DF)) &&
		    mtu < ntohs(old_iph->tot_len)) {
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
			ip_rt_put(rt);
			goto tx_error;
		}
	}
814
#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds's avatar
Linus Torvalds committed
815
	else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazet's avatar
Eric Dumazet committed
816
		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
817

Eric Dumazet's avatar
Eric Dumazet committed
818
		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
819
820
			if ((tunnel->parms.iph.daddr &&
			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds's avatar
Linus Torvalds committed
821
822
			    rt6->rt6i_dst.plen == 128) {
				rt6->rt6i_flags |= RTF_MODIFIED;
823
				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds's avatar
Linus Torvalds committed
824
825
826
827
			}
		}

		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
828
			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds's avatar
Linus Torvalds committed
829
830
831
832
833
834
835
			ip_rt_put(rt);
			goto tx_error;
		}
	}
#endif

	if (tunnel->err_count > 0) {
836
837
		if (time_before(jiffies,
				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds's avatar
Linus Torvalds committed
838
839
840
841
842
843
844
			tunnel->err_count--;

			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

845
	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds's avatar
Linus Torvalds committed
846

847
848
	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds's avatar
Linus Torvalds committed
849
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
850
851
		if (max_headroom > dev->needed_headroom)
			dev->needed_headroom = max_headroom;
Linus Torvalds's avatar
Linus Torvalds committed
852
853
		if (!new_skb) {
			ip_rt_put(rt);
Eric Dumazet's avatar
Eric Dumazet committed
854
			dev->stats.tx_dropped++;
Linus Torvalds's avatar
Linus Torvalds committed
855
			dev_kfree_skb(skb);
856
			return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
857
858
859
860
861
		}
		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb(skb);
		skb = new_skb;
862
		old_iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
863
864
	}

865
	skb_reset_transport_header(skb);
866
867
	skb_push(skb, gre_hlen);
	skb_reset_network_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
868
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
869
870
	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
			      IPSKB_REROUTED);
Eric Dumazet's avatar
Eric Dumazet committed
871
	skb_dst_drop(skb);
872
	skb_dst_set(skb, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
873
874
875
876
877

	/*
	 *	Push down and install the IPIP header.
	 */

878
	iph 			=	ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
879
880
881
882
883
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr) >> 2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_GRE;
	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
884
885
	iph->daddr		=	fl4.daddr;
	iph->saddr		=	fl4.saddr;
Linus Torvalds's avatar
Linus Torvalds committed
886
887
888
889

	if ((iph->ttl = tiph->ttl) == 0) {
		if (skb->protocol == htons(ETH_P_IP))
			iph->ttl = old_iph->ttl;
890
#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds's avatar
Linus Torvalds committed
891
		else if (skb->protocol == htons(ETH_P_IPV6))
892
			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds's avatar
Linus Torvalds committed
893
894
#endif
		else
895
			iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
896
897
	}

898
899
900
	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
				   htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds's avatar
Linus Torvalds committed
901
902

	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Viro's avatar
Al Viro committed
903
		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds's avatar
Linus Torvalds committed
904
905
906
907
908
909
910
911
912
913
914
915

		if (tunnel->parms.o_flags&GRE_SEQ) {
			++tunnel->o_seqno;
			*ptr = htonl(tunnel->o_seqno);
			ptr--;
		}
		if (tunnel->parms.o_flags&GRE_KEY) {
			*ptr = tunnel->parms.o_key;
			ptr--;
		}
		if (tunnel->parms.o_flags&GRE_CSUM) {
			*ptr = 0;
916
			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds's avatar
Linus Torvalds committed
917
918
919
920
		}
	}

	nf_reset(skb);
Eric Dumazet's avatar
Eric Dumazet committed
921
922
	tstats = this_cpu_ptr(dev->tstats);
	__IPTUNNEL_XMIT(tstats, &dev->stats);
923
	return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
924

925
#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds's avatar
Linus Torvalds committed
926
927
tx_error_icmp:
	dst_link_failure(skb);
928
#endif
Linus Torvalds's avatar
Linus Torvalds committed
929
tx_error:
Eric Dumazet's avatar
Eric Dumazet committed
930
	dev->stats.tx_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
931
	dev_kfree_skb(skb);
932
	return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
933
934
}

935
static int ipgre_tunnel_bind_dev(struct net_device *dev)
936
937
938
{
	struct net_device *tdev = NULL;
	struct ip_tunnel *tunnel;
939
	const struct iphdr *iph;
940
941
942
943
944
945
946
	int hlen = LL_MAX_HEADER;
	int mtu = ETH_DATA_LEN;
	int addend = sizeof(struct iphdr) + 4;

	tunnel = netdev_priv(dev);
	iph = &tunnel->parms.iph;

Herbert Xu's avatar
Herbert Xu committed
947
	/* Guess output device to choose reasonable mtu and needed_headroom */
948
949

	if (iph->daddr) {
950
951
952
953
954
955
956
957
		struct flowi4 fl4;
		struct rtable *rt;

		rt = ip_route_output_gre(dev_net(dev), &fl4,
					 iph->daddr, iph->saddr,
					 tunnel->parms.o_key,
					 RT_TOS(iph->tos),
					 tunnel->parms.link);
958
		if (!IS_ERR(rt)) {
959
			tdev = rt->dst.dev;
960
961
			ip_rt_put(rt);
		}
962
963
964

		if (dev->type != ARPHRD_ETHER)
			dev->flags |= IFF_POINTOPOINT;
965
966
967
	}

	if (!tdev && tunnel->parms.link)
968
		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
969
970

	if (tdev) {
Herbert Xu's avatar
Herbert Xu committed
971
		hlen = tdev->hard_header_len + tdev->needed_headroom;
972
973
974
975
976
977
978
979
980
981
982
983
984
		mtu = tdev->mtu;
	}
	dev->iflink = tunnel->parms.link;

	/* Precalculate GRE options length */
	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
		if (tunnel->parms.o_flags&GRE_CSUM)
			addend += 4;
		if (tunnel->parms.o_flags&GRE_KEY)
			addend += 4;
		if (tunnel->parms.o_flags&GRE_SEQ)
			addend += 4;
	}
Herbert Xu's avatar
Herbert Xu committed
985
	dev->needed_headroom = addend + hlen;
986
	mtu -= dev->hard_header_len + addend;
987
988
989
990

	if (mtu < 68)
		mtu = 68;

991
992
	tunnel->hlen = addend;

993
	return mtu;
994
995
}

Linus Torvalds's avatar
Linus Torvalds committed
996
997
998
999
1000
1001
static int
ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
	int err = 0;
	struct ip_tunnel_parm p;
	struct ip_tunnel *t;
1002
1003
	struct net *net = dev_net(dev);
	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
1004
1005
1006
1007

	switch (cmd) {
	case SIOCGETTUNNEL:
		t = NULL;
1008
		if (dev == ign->fb_tunnel_dev) {
Linus Torvalds's avatar
Linus Torvalds committed
1009
1010
1011
1012
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
				err = -EFAULT;
				break;
			}
1013
			t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1014
1015
		}
		if (t == NULL)
1016
			t = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
		memcpy(&p, &t->parms, sizeof(p));
		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
			err = -EFAULT;
		break;

	case SIOCADDTUNNEL:
	case SIOCCHGTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			goto done;

		err = -EFAULT;
		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
			goto done;

		err = -EINVAL;
		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
			goto done;
		if (p.iph.ttl)
			p.iph.frag_off |= htons(IP_DF);

		if (!(p.i_flags&GRE_KEY))
			p.i_key = 0;
		if (!(p.o_flags&GRE_KEY))
			p.o_key = 0;

1045
		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds's avatar
Linus Torvalds committed
1046

1047
		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds's avatar
Linus Torvalds committed
1048
1049
1050
1051
1052
1053
			if (t != NULL) {
				if (t->dev != dev) {
					err = -EEXIST;
					break;
				}
			} else {
Eric Dumazet's avatar
Eric Dumazet committed
1054
				unsigned int nflags = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1055

1056
				t = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
1057

1058
				if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds's avatar
Linus Torvalds committed
1059
1060
1061
1062
1063
1064
1065
1066
					nflags = IFF_BROADCAST;
				else if (p.iph.daddr)
					nflags = IFF_POINTOPOINT;

				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
					err = -EINVAL;
					break;
				}
1067
				ipgre_tunnel_unlink(ign, t);
1068
				synchronize_net();
Linus Torvalds's avatar
Linus Torvalds committed
1069
1070
1071
1072
1073
1074
				t->parms.iph.saddr = p.iph.saddr;
				t->parms.iph.daddr = p.iph.daddr;
				t->parms.i_key = p.i_key;
				t->parms.o_key = p.o_key;
				memcpy(dev->dev_addr, &p.iph.saddr, 4);
				memcpy(dev->broadcast, &p.iph.daddr, 4);
1075
				ipgre_tunnel_link(ign, t);
Linus Torvalds's avatar
Linus Torvalds committed
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
				netdev_state_change(dev);
			}
		}

		if (t) {
			err = 0;
			if (cmd == SIOCCHGTUNNEL) {
				t->parms.iph.ttl = p.iph.ttl;
				t->parms.iph.tos = p.iph.tos;
				t->parms.iph.frag_off = p.iph.frag_off;
1086
1087
				if (t->parms.link != p.link) {
					t->parms.link = p.link;
1088
					dev->mtu = ipgre_tunnel_bind_dev(dev);
1089
1090
					netdev_state_change(dev);
				}
Linus Torvalds's avatar
Linus Torvalds committed
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
			}
			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
				err = -EFAULT;
		} else
			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
		break;

	case SIOCDELTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			goto done;

1103
		if (dev == ign->fb_tunnel_dev) {
Linus Torvalds's avatar
Linus Torvalds committed
1104
1105
1106
1107
			err = -EFAULT;
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
				goto done;
			err = -ENOENT;
1108
			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
1109
1110
				goto done;
			err = -EPERM;
1111
			if (t == netdev_priv(ign->fb_tunnel_dev))