ipip.c 22 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 *	Linux NET3:	IP/IP protocol decoder.
Linus Torvalds's avatar
Linus Torvalds committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
 *
 *	Authors:
 *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
 *
 *	Fixes:
 *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
 *					a module taking up 2 pages).
 *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
 *					to keep ip_forward happy.
 *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
 *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
 *              David Woodhouse :       Perform some basic ICMP handling.
 *                                      IPIP Routing without decapsulation.
 *              Carlos Picoto   :       GRE over IP support
 *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
 *					I do not want to merge them together.
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 */

/* tunnel.c: an IP tunnel driver

	The purpose of this driver is to provide an IP tunnel through
	which you can tunnel network traffic transparently across subnets.

	This was written by looking at Nick Holloway's dummy driver
	Thanks for the great code!

		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36

Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40
41
42
	Minor tweaks:
		Cleaned up the code a little and added some pre-1.3.0 tweaks.
		dev->hard_header/hard_header_len changed to use no headers.
		Comments/bracketing tweaked.
		Made the tunnels use dev->name not tunnel: when error reporting.
		Added tx_dropped stat
43

44
		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
Linus Torvalds's avatar
Linus Torvalds committed
45
46
47
48
49
50
51
52

	Reworked:
		Changed to tunnel to destination gateway in addition to the
			tunnel's pointopoint address
		Almost completely rewritten
		Note:  There is currently no firewall or ICMP handling done.

		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53

Linus Torvalds's avatar
Linus Torvalds committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
*/

/* Things I wish I had known when writing the tunnel driver:

	When the tunnel_xmit() function is called, the skb contains the
	packet to be sent (plus a great deal of extra info), and dev
	contains the tunnel device that _we_ are.

	When we are passed a packet, we are expected to fill in the
	source address with our source IP address.

	What is the proper way to allocate, copy and free a buffer?
	After you allocate it, it is a "0 length" chunk of memory
	starting at zero.  If you want to add headers to the buffer
	later, you'll have to call "skb_reserve(skb, amount)" with
	the amount of memory you want reserved.  Then, you call
	"skb_put(skb, amount)" with the amount of space you want in
	the buffer.  skb_put() returns a pointer to the top (#0) of
	that buffer.  skb->len is set to the amount of space you have
	"allocated" with skb_put().  You can then write up to skb->len
	bytes to that buffer.  If you need more, you can call skb_put()
	again with the additional amount of space you need.  You can
76
	find out how much more space you can allocate by calling
Linus Torvalds's avatar
Linus Torvalds committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
	"skb_tailroom(skb)".
	Now, to add header space, call "skb_push(skb, header_len)".
	This creates space at the beginning of the buffer and returns
	a pointer to this new space.  If later you need to strip a
	header from a buffer, call "skb_pull(skb, header_len)".
	skb_headroom() will return how much space is left at the top
	of the buffer (before the main data).  Remember, this headroom
	space must be reserved before the skb_put() function is called.
	*/

/*
   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c

   For comments look at net/ipv4/ip_gre.c --ANK
 */

93

94
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
95
96
97
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
98
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
99
100
101
102
103
104
105
106
107
108
#include <asm/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
109
#include <linux/if_ether.h>
Linus Torvalds's avatar
Linus Torvalds committed
110
111
112
113
114
115
116

#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/ipip.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
117
118
#include <net/net_namespace.h>
#include <net/netns/generic.h>
Linus Torvalds's avatar
Linus Torvalds committed
119
120

#define HASH_SIZE  16
Al Viro's avatar
Al Viro committed
121
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds's avatar
Linus Torvalds committed
122

123
static int ipip_net_id __read_mostly;
124
struct ipip_net {
Eric Dumazet's avatar
Eric Dumazet committed
125
126
127
128
129
	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
	struct ip_tunnel __rcu *tunnels_wc[1];
	struct ip_tunnel __rcu **tunnels[4];
130

131
	struct net_device *fb_tunnel_dev;
132
133
};

Eric Dumazet's avatar
Eric Dumazet committed
134
static int ipip_tunnel_init(struct net_device *dev);
Linus Torvalds's avatar
Linus Torvalds committed
135
static void ipip_tunnel_setup(struct net_device *dev);
Eric Dumazet's avatar
Eric Dumazet committed
136
static void ipip_dev_free(struct net_device *dev);
Linus Torvalds's avatar
Linus Torvalds committed
137

138
/*
Eric Dumazet's avatar
Eric Dumazet committed
139
 * Locking : hash tables are protected by RCU and RTNL
140
141
142
143
 */

#define for_each_ip_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds's avatar
Linus Torvalds committed
144

Eric Dumazet's avatar
Eric Dumazet committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
	unsigned long	rx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_packets;
	unsigned long	tx_bytes;
};

static struct net_device_stats *ipip_get_stats(struct net_device *dev)
{
	struct pcpu_tstats sum = { 0 };
	int i;

	for_each_possible_cpu(i) {
		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);

		sum.rx_packets += tstats->rx_packets;
		sum.rx_bytes   += tstats->rx_bytes;
		sum.tx_packets += tstats->tx_packets;
		sum.tx_bytes   += tstats->tx_bytes;
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

173
174
static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
		__be32 remote, __be32 local)
Linus Torvalds's avatar
Linus Torvalds committed
175
{
Eric Dumazet's avatar
Eric Dumazet committed
176
177
	unsigned int h0 = HASH(remote);
	unsigned int h1 = HASH(local);
Linus Torvalds's avatar
Linus Torvalds committed
178
	struct ip_tunnel *t;
179
	struct ipip_net *ipn = net_generic(net, ipip_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
180

181
	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
Linus Torvalds's avatar
Linus Torvalds committed
182
183
184
		if (local == t->parms.iph.saddr &&
		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
			return t;
185
186

	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
Linus Torvalds's avatar
Linus Torvalds committed
187
188
		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
			return t;
189
190

	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
Linus Torvalds's avatar
Linus Torvalds committed
191
192
		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
			return t;
193
194
195

	t = rcu_dereference(ipn->tunnels_wc[0]);
	if (t && (t->dev->flags&IFF_UP))
Linus Torvalds's avatar
Linus Torvalds committed
196
197
198
199
		return t;
	return NULL;
}

Eric Dumazet's avatar
Eric Dumazet committed
200
static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201
		struct ip_tunnel_parm *parms)
Linus Torvalds's avatar
Linus Torvalds committed
202
{
203
204
	__be32 remote = parms->iph.daddr;
	__be32 local = parms->iph.saddr;
Eric Dumazet's avatar
Eric Dumazet committed
205
	unsigned int h = 0;
Linus Torvalds's avatar
Linus Torvalds committed
206
207
208
209
210
211
212
213
214
215
	int prio = 0;

	if (remote) {
		prio |= 2;
		h ^= HASH(remote);
	}
	if (local) {
		prio |= 1;
		h ^= HASH(local);
	}
216
	return &ipn->tunnels[prio][h];
Linus Torvalds's avatar
Linus Torvalds committed
217
218
}

Eric Dumazet's avatar
Eric Dumazet committed
219
static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220
		struct ip_tunnel *t)
221
{
222
	return __ipip_bucket(ipn, &t->parms);
223
}
Linus Torvalds's avatar
Linus Torvalds committed
224

225
static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
Linus Torvalds's avatar
Linus Torvalds committed
226
{
Eric Dumazet's avatar
Eric Dumazet committed
227
228
229
230
231
232
233
234
	struct ip_tunnel __rcu **tp;
	struct ip_tunnel *iter;

	for (tp = ipip_bucket(ipn, t);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
			rcu_assign_pointer(*tp, t->next);
Linus Torvalds's avatar
Linus Torvalds committed
235
236
237
238
239
			break;
		}
	}
}

240
static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
Linus Torvalds's avatar
Linus Torvalds committed
241
{
Eric Dumazet's avatar
Eric Dumazet committed
242
	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
Linus Torvalds's avatar
Linus Torvalds committed
243

Eric Dumazet's avatar
Eric Dumazet committed
244
	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
245
	rcu_assign_pointer(*tp, t);
Linus Torvalds's avatar
Linus Torvalds committed
246
247
}

248
249
static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
		struct ip_tunnel_parm *parms, int create)
Linus Torvalds's avatar
Linus Torvalds committed
250
{
Al Viro's avatar
Al Viro committed
251
252
	__be32 remote = parms->iph.daddr;
	__be32 local = parms->iph.saddr;
Eric Dumazet's avatar
Eric Dumazet committed
253
254
	struct ip_tunnel *t, *nt;
	struct ip_tunnel __rcu **tp;
Linus Torvalds's avatar
Linus Torvalds committed
255
256
	struct net_device *dev;
	char name[IFNAMSIZ];
257
	struct ipip_net *ipn = net_generic(net, ipip_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
258

Eric Dumazet's avatar
Eric Dumazet committed
259
260
261
	for (tp = __ipip_bucket(ipn, parms);
		 (t = rtnl_dereference(*tp)) != NULL;
		 tp = &t->next) {
Linus Torvalds's avatar
Linus Torvalds committed
262
263
264
265
266
267
268
269
		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
			return t;
	}
	if (!create)
		return NULL;

	if (parms->name[0])
		strlcpy(name, parms->name, IFNAMSIZ);
270
	else
Eric Dumazet's avatar
Eric Dumazet committed
271
		strcpy(name, "tunl%d");
Linus Torvalds's avatar
Linus Torvalds committed
272
273
274
275
276

	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
	if (dev == NULL)
		return NULL;

277
278
	dev_net_set(dev, net);

279
280
281
282
283
	if (strchr(name, '%')) {
		if (dev_alloc_name(dev, name) < 0)
			goto failed_free;
	}

284
	nt = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
285
286
	nt->parms = *parms;

Eric Dumazet's avatar
Eric Dumazet committed
287
288
	if (ipip_tunnel_init(dev) < 0)
		goto failed_free;
289

290
291
	if (register_netdevice(dev) < 0)
		goto failed_free;
Linus Torvalds's avatar
Linus Torvalds committed
292
293

	dev_hold(dev);
294
	ipip_tunnel_link(ipn, nt);
Linus Torvalds's avatar
Linus Torvalds committed
295
296
	return nt;

297
failed_free:
Eric Dumazet's avatar
Eric Dumazet committed
298
	ipip_dev_free(dev);
Linus Torvalds's avatar
Linus Torvalds committed
299
300
301
	return NULL;
}

Eric Dumazet's avatar
Eric Dumazet committed
302
/* called with RTNL */
Linus Torvalds's avatar
Linus Torvalds committed
303
304
static void ipip_tunnel_uninit(struct net_device *dev)
{
305
306
307
	struct net *net = dev_net(dev);
	struct ipip_net *ipn = net_generic(net, ipip_net_id);

Eric Dumazet's avatar
Eric Dumazet committed
308
309
310
	if (dev == ipn->fb_tunnel_dev)
		rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
	else
311
		ipip_tunnel_unlink(ipn, netdev_priv(dev));
Linus Torvalds's avatar
Linus Torvalds committed
312
313
314
	dev_put(dev);
}

Herbert Xu's avatar
Herbert Xu committed
315
static int ipip_err(struct sk_buff *skb, u32 info)
Linus Torvalds's avatar
Linus Torvalds committed
316
317
{

318
/* All the routers (except for Linux) return only
Linus Torvalds's avatar
Linus Torvalds committed
319
320
321
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
 */
322
	struct iphdr *iph = (struct iphdr *)skb->data;
323
324
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
Linus Torvalds's avatar
Linus Torvalds committed
325
	struct ip_tunnel *t;
Herbert Xu's avatar
Herbert Xu committed
326
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
327
328
329
330

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
Herbert Xu's avatar
Herbert Xu committed
331
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
332
333
334
335
336
337

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
Herbert Xu's avatar
Herbert Xu committed
338
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
339
340
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
Herbert Xu's avatar
Herbert Xu committed
341
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
342
343
344
345
346
347
348
349
350
351
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
Herbert Xu's avatar
Herbert Xu committed
352
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
353
354
355
		break;
	}

Herbert Xu's avatar
Herbert Xu committed
356
357
	err = -ENOENT;

358
	rcu_read_lock();
359
	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
Linus Torvalds's avatar
Linus Torvalds committed
360
361
	if (t == NULL || t->parms.iph.daddr == 0)
		goto out;
Herbert Xu's avatar
Herbert Xu committed
362
363

	err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
364
365
366
	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		goto out;

367
	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds's avatar
Linus Torvalds committed
368
369
370
371
372
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
out:
373
	rcu_read_unlock();
Herbert Xu's avatar
Herbert Xu committed
374
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
375
376
}

377
378
static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
					struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
379
{
380
	struct iphdr *inner_iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
381
382
383
384
385
386
387
388

	if (INET_ECN_is_ce(outer_iph->tos))
		IP_ECN_set_ce(inner_iph);
}

static int ipip_rcv(struct sk_buff *skb)
{
	struct ip_tunnel *tunnel;
389
	const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
390

391
	rcu_read_lock();
Eric Dumazet's avatar
Eric Dumazet committed
392
393
394
395
	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
	if (tunnel != NULL) {
		struct pcpu_tstats *tstats;

Linus Torvalds's avatar
Linus Torvalds committed
396
		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
397
			rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
398
399
400
401
402
403
			kfree_skb(skb);
			return 0;
		}

		secpath_reset(skb);

404
		skb->mac_header = skb->network_header;
405
		skb_reset_network_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
406
407
408
		skb->protocol = htons(ETH_P_IP);
		skb->pkt_type = PACKET_HOST;

Eric Dumazet's avatar
Eric Dumazet committed
409
410
411
412
413
		tstats = this_cpu_ptr(tunnel->dev->tstats);
		tstats->rx_packets++;
		tstats->rx_bytes += skb->len;

		__skb_tunnel_rx(skb, tunnel->dev);
414

Linus Torvalds's avatar
Linus Torvalds committed
415
		ipip_ecn_decapsulate(iph, skb);
Eric Dumazet's avatar
Eric Dumazet committed
416

417
		netif_rx(skb);
Eric Dumazet's avatar
Eric Dumazet committed
418

419
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
420
421
		return 0;
	}
422
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
423
424
425
426
427
428
429
430
431

	return -1;
}

/*
 *	This function assumes it is being called from dev_queue_xmit()
 *	and that skb is filled properly by that function.
 */

432
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
433
{
434
	struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazet's avatar
Eric Dumazet committed
435
	struct pcpu_tstats *tstats;
Linus Torvalds's avatar
Linus Torvalds committed
436
437
	struct iphdr  *tiph = &tunnel->parms.iph;
	u8     tos = tunnel->parms.iph.tos;
Al Viro's avatar
Al Viro committed
438
	__be16 df = tiph->frag_off;
Linus Torvalds's avatar
Linus Torvalds committed
439
	struct rtable *rt;     			/* Route to the other host */
Eric Dumazet's avatar
Eric Dumazet committed
440
	struct net_device *tdev;		/* Device to other host */
441
	struct iphdr  *old_iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
442
	struct iphdr  *iph;			/* Our new IP header */
443
	unsigned int max_headroom;		/* The extra header space needed */
Al Viro's avatar
Al Viro committed
444
	__be32 dst = tiph->daddr;
Linus Torvalds's avatar
Linus Torvalds committed
445
446
447
448
449
	int    mtu;

	if (skb->protocol != htons(ETH_P_IP))
		goto tx_error;

Eric Dumazet's avatar
Eric Dumazet committed
450
	if (tos & 1)
Linus Torvalds's avatar
Linus Torvalds committed
451
452
453
454
		tos = old_iph->tos;

	if (!dst) {
		/* NBMA tunnel */
Eric Dumazet's avatar
Eric Dumazet committed
455
		if ((rt = skb_rtable(skb)) == NULL) {
Eric Dumazet's avatar
Eric Dumazet committed
456
			dev->stats.tx_fifo_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
457
458
459
460
461
462
			goto tx_error;
		}
		if ((dst = rt->rt_gateway) == 0)
			goto tx_error_icmp;
	}

463
464
465
466
467
468
469
470
	rt = ip_route_output_ports(dev_net(dev), NULL,
				   dst, tiph->saddr,
				   0, 0,
				   IPPROTO_IPIP, RT_TOS(tos),
				   tunnel->parms.link);
	if (IS_ERR(rt)) {
		dev->stats.tx_carrier_errors++;
		goto tx_error_icmp;
Linus Torvalds's avatar
Linus Torvalds committed
471
	}
472
	tdev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed
473
474
475

	if (tdev == dev) {
		ip_rt_put(rt);
Eric Dumazet's avatar
Eric Dumazet committed
476
		dev->stats.collisions++;
Linus Torvalds's avatar
Linus Torvalds committed
477
478
479
		goto tx_error;
	}

480
481
482
	df |= old_iph->frag_off & htons(IP_DF);

	if (df) {
483
		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
Linus Torvalds's avatar
Linus Torvalds committed
484

485
		if (mtu < 68) {
Eric Dumazet's avatar
Eric Dumazet committed
486
			dev->stats.collisions++;
487
488
489
			ip_rt_put(rt);
			goto tx_error;
		}
Linus Torvalds's avatar
Linus Torvalds committed
490

491
492
		if (skb_dst(skb))
			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds's avatar
Linus Torvalds committed
493

494
495
496
497
498
499
500
		if ((old_iph->frag_off & htons(IP_DF)) &&
		    mtu < ntohs(old_iph->tot_len)) {
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
				  htonl(mtu));
			ip_rt_put(rt);
			goto tx_error;
		}
Linus Torvalds's avatar
Linus Torvalds committed
501
502
503
	}

	if (tunnel->err_count > 0) {
504
505
		if (time_before(jiffies,
				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds's avatar
Linus Torvalds committed
506
507
508
509
510
511
512
513
514
515
516
			tunnel->err_count--;
			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));

517
518
	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds's avatar
Linus Torvalds committed
519
520
521
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
Eric Dumazet's avatar
Eric Dumazet committed
522
			dev->stats.tx_dropped++;
Linus Torvalds's avatar
Linus Torvalds committed
523
			dev_kfree_skb(skb);
524
			return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
525
526
527
528
529
		}
		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb(skb);
		skb = new_skb;
530
		old_iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
531
532
	}

533
	skb->transport_header = skb->network_header;
534
535
	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
536
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
537
538
	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
			      IPSKB_REROUTED);
Eric Dumazet's avatar
Eric Dumazet committed
539
	skb_dst_drop(skb);
540
	skb_dst_set(skb, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
541
542
543
544
545

	/*
	 *	Push down and install the IPIP header.
	 */

546
	iph 			=	ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
551
552
553
554
555
556
557
558
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;

	if ((iph->ttl = tiph->ttl) == 0)
		iph->ttl	=	old_iph->ttl;

	nf_reset(skb);
Eric Dumazet's avatar
Eric Dumazet committed
559
560
	tstats = this_cpu_ptr(dev->tstats);
	__IPTUNNEL_XMIT(tstats, &dev->stats);
561
	return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
562
563
564
565

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
Eric Dumazet's avatar
Eric Dumazet committed
566
	dev->stats.tx_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
567
	dev_kfree_skb(skb);
568
	return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
569
570
}

571
572
573
574
575
576
577
578
579
580
static void ipip_tunnel_bind_dev(struct net_device *dev)
{
	struct net_device *tdev = NULL;
	struct ip_tunnel *tunnel;
	struct iphdr *iph;

	tunnel = netdev_priv(dev);
	iph = &tunnel->parms.iph;

	if (iph->daddr) {
581
582
583
584
585
586
		struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL,
							  iph->daddr, iph->saddr,
							  0, 0,
							  IPPROTO_IPIP,
							  RT_TOS(iph->tos),
							  tunnel->parms.link);
Eric Dumazet's avatar
Eric Dumazet committed
587

588
		if (!IS_ERR(rt)) {
589
			tdev = rt->dst.dev;
590
591
592
593
594
595
			ip_rt_put(rt);
		}
		dev->flags |= IFF_POINTOPOINT;
	}

	if (!tdev && tunnel->parms.link)
596
		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
597
598
599
600
601
602
603
604

	if (tdev) {
		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
		dev->mtu = tdev->mtu - sizeof(struct iphdr);
	}
	dev->iflink = tunnel->parms.link;
}

Linus Torvalds's avatar
Linus Torvalds committed
605
606
607
608
609
610
static int
ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
	int err = 0;
	struct ip_tunnel_parm p;
	struct ip_tunnel *t;
611
612
	struct net *net = dev_net(dev);
	struct ipip_net *ipn = net_generic(net, ipip_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
613
614
615
616

	switch (cmd) {
	case SIOCGETTUNNEL:
		t = NULL;
617
		if (dev == ipn->fb_tunnel_dev) {
Linus Torvalds's avatar
Linus Torvalds committed
618
619
620
621
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
				err = -EFAULT;
				break;
			}
622
			t = ipip_tunnel_locate(net, &p, 0);
Linus Torvalds's avatar
Linus Torvalds committed
623
624
		}
		if (t == NULL)
625
			t = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
		memcpy(&p, &t->parms, sizeof(p));
		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
			err = -EFAULT;
		break;

	case SIOCADDTUNNEL:
	case SIOCCHGTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			goto done;

		err = -EFAULT;
		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
			goto done;

		err = -EINVAL;
		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
			goto done;
		if (p.iph.ttl)
			p.iph.frag_off |= htons(IP_DF);

648
		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds's avatar
Linus Torvalds committed
649

650
		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds's avatar
Linus Torvalds committed
651
652
653
654
655
656
657
658
659
660
661
			if (t != NULL) {
				if (t->dev != dev) {
					err = -EEXIST;
					break;
				}
			} else {
				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
					err = -EINVAL;
					break;
				}
662
				t = netdev_priv(dev);
663
				ipip_tunnel_unlink(ipn, t);
664
				synchronize_net();
Linus Torvalds's avatar
Linus Torvalds committed
665
666
667
668
				t->parms.iph.saddr = p.iph.saddr;
				t->parms.iph.daddr = p.iph.daddr;
				memcpy(dev->dev_addr, &p.iph.saddr, 4);
				memcpy(dev->broadcast, &p.iph.daddr, 4);
669
				ipip_tunnel_link(ipn, t);
Linus Torvalds's avatar
Linus Torvalds committed
670
671
672
673
674
675
676
677
678
679
				netdev_state_change(dev);
			}
		}

		if (t) {
			err = 0;
			if (cmd == SIOCCHGTUNNEL) {
				t->parms.iph.ttl = p.iph.ttl;
				t->parms.iph.tos = p.iph.tos;
				t->parms.iph.frag_off = p.iph.frag_off;
680
681
682
683
684
				if (t->parms.link != p.link) {
					t->parms.link = p.link;
					ipip_tunnel_bind_dev(dev);
					netdev_state_change(dev);
				}
Linus Torvalds's avatar
Linus Torvalds committed
685
686
687
688
689
690
691
692
693
694
695
696
			}
			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
				err = -EFAULT;
		} else
			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
		break;

	case SIOCDELTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			goto done;

697
		if (dev == ipn->fb_tunnel_dev) {
Linus Torvalds's avatar
Linus Torvalds committed
698
699
700
701
			err = -EFAULT;
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
				goto done;
			err = -ENOENT;
702
			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
703
704
				goto done;
			err = -EPERM;
705
			if (t->dev == ipn->fb_tunnel_dev)
Linus Torvalds's avatar
Linus Torvalds committed
706
707
708
				goto done;
			dev = t->dev;
		}
709
710
		unregister_netdevice(dev);
		err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
		break;

	default:
		err = -EINVAL;
	}

done:
	return err;
}

static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
		return -EINVAL;
	dev->mtu = new_mtu;
	return 0;
}

729
730
731
732
733
static const struct net_device_ops ipip_netdev_ops = {
	.ndo_uninit	= ipip_tunnel_uninit,
	.ndo_start_xmit	= ipip_tunnel_xmit,
	.ndo_do_ioctl	= ipip_tunnel_ioctl,
	.ndo_change_mtu	= ipip_tunnel_change_mtu,
Eric Dumazet's avatar
Eric Dumazet committed
734
	.ndo_get_stats  = ipip_get_stats,
735
736
};

Eric Dumazet's avatar
Eric Dumazet committed
737
738
739
740
741
742
static void ipip_dev_free(struct net_device *dev)
{
	free_percpu(dev->tstats);
	free_netdev(dev);
}

Linus Torvalds's avatar
Linus Torvalds committed
743
744
static void ipip_tunnel_setup(struct net_device *dev)
{
745
	dev->netdev_ops		= &ipip_netdev_ops;
Eric Dumazet's avatar
Eric Dumazet committed
746
	dev->destructor		= ipip_dev_free;
Linus Torvalds's avatar
Linus Torvalds committed
747
748
749

	dev->type		= ARPHRD_TUNNEL;
	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
750
	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
Linus Torvalds's avatar
Linus Torvalds committed
751
752
753
	dev->flags		= IFF_NOARP;
	dev->iflink		= 0;
	dev->addr_len		= 4;
754
	dev->features		|= NETIF_F_NETNS_LOCAL;
Eric Dumazet's avatar
Eric Dumazet committed
755
	dev->features		|= NETIF_F_LLTX;
756
	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds's avatar
Linus Torvalds committed
757
758
}

Eric Dumazet's avatar
Eric Dumazet committed
759
static int ipip_tunnel_init(struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
760
{
761
	struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
762
763
764
765
766
767
768

	tunnel->dev = dev;
	strcpy(tunnel->parms.name, dev->name);

	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);

769
	ipip_tunnel_bind_dev(dev);
Eric Dumazet's avatar
Eric Dumazet committed
770
771
772
773
774
775

	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
776
777
}

778
static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
779
{
780
	struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
781
	struct iphdr *iph = &tunnel->parms.iph;
782
	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
783
784
785
786
787
788
789
790

	tunnel->dev = dev;
	strcpy(tunnel->parms.name, dev->name);

	iph->version		= 4;
	iph->protocol		= IPPROTO_IPIP;
	iph->ihl		= 5;

791
792
793
794
	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;

Linus Torvalds's avatar
Linus Torvalds committed
795
	dev_hold(dev);
Eric Dumazet's avatar
Eric Dumazet committed
796
	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
797
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
798
799
}

800
static struct xfrm_tunnel ipip_handler __read_mostly = {
Linus Torvalds's avatar
Linus Torvalds committed
801
802
	.handler	=	ipip_rcv,
	.err_handler	=	ipip_err,
Herbert Xu's avatar
Herbert Xu committed
803
	.priority	=	1,
Linus Torvalds's avatar
Linus Torvalds committed
804
805
};

Stephen Hemminger's avatar
Stephen Hemminger committed
806
static const char banner[] __initconst =
Linus Torvalds's avatar
Linus Torvalds committed
807
808
	KERN_INFO "IPv4 over IPv4 tunneling driver\n";

809
static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
810
811
812
813
814
815
{
	int prio;

	for (prio = 1; prio < 4; prio++) {
		int h;
		for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet's avatar
Eric Dumazet committed
816
			struct ip_tunnel *t;
817

Eric Dumazet's avatar
Eric Dumazet committed
818
			t = rtnl_dereference(ipn->tunnels[prio][h]);
819
820
			while (t != NULL) {
				unregister_netdevice_queue(t->dev, head);
Eric Dumazet's avatar
Eric Dumazet committed
821
				t = rtnl_dereference(t->next);
822
			}
823
824
825
826
		}
	}
}

827
static int __net_init ipip_init_net(struct net *net)
828
{
829
	struct ipip_net *ipn = net_generic(net, ipip_net_id);
830
831
	int err;

832
833
834
835
836
	ipn->tunnels[0] = ipn->tunnels_wc;
	ipn->tunnels[1] = ipn->tunnels_l;
	ipn->tunnels[2] = ipn->tunnels_r;
	ipn->tunnels[3] = ipn->tunnels_r_l;

837
838
839
840
841
842
843
	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
					   "tunl0",
					   ipip_tunnel_setup);
	if (!ipn->fb_tunnel_dev) {
		err = -ENOMEM;
		goto err_alloc_dev;
	}
844
	dev_net_set(ipn->fb_tunnel_dev, net);
845

846
847
848
	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
	if (err)
		goto err_reg_dev;
849
850
851
852

	if ((err = register_netdev(ipn->fb_tunnel_dev)))
		goto err_reg_dev;

853
854
	return 0;

855
err_reg_dev:
856
	ipip_dev_free(ipn->fb_tunnel_dev);
857
858
err_alloc_dev:
	/* nothing */
859
860
861
	return err;
}

862
static void __net_exit ipip_exit_net(struct net *net)
863
{
864
	struct ipip_net *ipn = net_generic(net, ipip_net_id);
865
	LIST_HEAD(list);
866

867
	rtnl_lock();
868
869
870
	ipip_destroy_tunnels(ipn, &list);
	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
	unregister_netdevice_many(&list);
871
	rtnl_unlock();
872
873
874
875
876
}

static struct pernet_operations ipip_net_ops = {
	.init = ipip_init_net,
	.exit = ipip_exit_net,
877
878
	.id   = &ipip_net_id,
	.size = sizeof(struct ipip_net),
879
880
};

Linus Torvalds's avatar
Linus Torvalds committed
881
882
883
884
885
886
static int __init ipip_init(void)
{
	int err;

	printk(banner);

887
888
889
890
891
892
	err = register_pernet_device(&ipip_net_ops);
	if (err < 0)
		return err;
	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
	if (err < 0) {
		unregister_pernet_device(&ipip_net_ops);
Linus Torvalds's avatar
Linus Torvalds committed
893
894
895
896
897
898
899
		printk(KERN_INFO "ipip init: can't register tunnel\n");
	}
	return err;
}

static void __exit ipip_fini(void)
{
900
	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
Linus Torvalds's avatar
Linus Torvalds committed
901
902
		printk(KERN_INFO "ipip close: can't deregister tunnel\n");

903
	unregister_pernet_device(&ipip_net_ops);
Linus Torvalds's avatar
Linus Torvalds committed
904
905
906
907
908
}

module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
909
MODULE_ALIAS_NETDEV("tunl0");