raw.c 24.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		RAW - implementation of IP "raw" sockets.
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *
 * Fixes:
 *		Alan Cox	:	verify_area() fixed up
 *		Alan Cox	:	ICMP error handling
 *		Alan Cox	:	EMSGSIZE if you send too big a packet
 *		Alan Cox	: 	Now uses generic datagrams and shared
 *					skbuff library. No more peek crashes,
 *					no more backlogs
 *		Alan Cox	:	Checks sk->broadcast.
 *		Alan Cox	:	Uses skb_free_datagram/skb_copy_datagram
 *		Alan Cox	:	Raw passes ip options too
 *		Alan Cox	:	Setsocketopt added
 *		Alan Cox	:	Fixed error return for broadcasts
 *		Alan Cox	:	Removed wake_up calls
 *		Alan Cox	:	Use ttl/tos
 *		Alan Cox	:	Cleaned up old debugging
 *		Alan Cox	:	Use new kernel side addresses
 *	Arnt Gulbrandsen	:	Fixed MSG_DONTROUTE in raw sockets.
 *		Alan Cox	:	BSD style RAW socket demultiplexing.
 *		Alan Cox	:	Beginnings of mrouted support.
 *		Alan Cox	:	Added IP_HDRINCL option.
 *		Alan Cox	:	Skip broadcast check if BSDism set.
 *		David S. Miller	:	New socket lookup architecture.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
39

40
#include <linux/types.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <asm/atomic.h>
#include <asm/byteorder.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/aio.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/sockios.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/mroute.h>
#include <linux/netdevice.h>
#include <linux/in_route.h>
#include <linux/route.h>
#include <linux/skbuff.h>
60
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
61
62
63
64
65
66
67
68
69
#include <net/dst.h>
#include <net/sock.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <net/snmp.h>
70
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
71
72
73
74
75
76
77
78
#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
79
#include <linux/compat.h>
Linus Torvalds's avatar
Linus Torvalds committed
80

81
static struct raw_hashinfo raw_v4_hashinfo = {
82
	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
83
};
Linus Torvalds's avatar
Linus Torvalds committed
84

85
void raw_hash_sk(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
86
{
87
	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
88
	struct hlist_head *head;
Linus Torvalds's avatar
Linus Torvalds committed
89

90
	head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
91
92

	write_lock_bh(&h->lock);
Linus Torvalds's avatar
Linus Torvalds committed
93
	sk_add_node(sk, head);
94
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
95
96
97
98
	write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_hash_sk);

99
void raw_unhash_sk(struct sock *sk)
100
{
101
102
	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;

103
104
	write_lock_bh(&h->lock);
	if (sk_del_node_init(sk))
105
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
106
107
108
109
	write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);

110
111
static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
		unsigned short num, __be32 raddr, __be32 laddr, int dif)
Linus Torvalds's avatar
Linus Torvalds committed
112
113
114
115
116
117
{
	struct hlist_node *node;

	sk_for_each_from(sk, node) {
		struct inet_sock *inet = inet_sk(sk);

118
119
120
		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
Linus Torvalds's avatar
Linus Torvalds committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
			goto found; /* gotcha */
	}
	sk = NULL;
found:
	return sk;
}

/*
 *	0 - deliver
 *	1 - block
 */
static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
{
	int type;

	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
		return 1;

140
	type = icmp_hdr(skb)->type;
Linus Torvalds's avatar
Linus Torvalds committed
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
	if (type < 32) {
		__u32 data = raw_sk(sk)->filter.data;

		return ((1 << type) & data) != 0;
	}

	/* Do not block unknown ICMP types */
	return 0;
}

/* IP input processing comes here for RAW socket delivery.
 * Caller owns SKB, so we must make clones.
 *
 * RFC 1122: SHOULD pass TOS value up to the transport layer.
 * -> It does. And not only TOS, but all IP header.
 */
157
static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
Linus Torvalds's avatar
Linus Torvalds committed
158
159
160
{
	struct sock *sk;
	struct hlist_head *head;
161
	int delivered = 0;
162
	struct net *net;
Linus Torvalds's avatar
Linus Torvalds committed
163

164
165
	read_lock(&raw_v4_hashinfo.lock);
	head = &raw_v4_hashinfo.ht[hash];
Linus Torvalds's avatar
Linus Torvalds committed
166
167
	if (hlist_empty(head))
		goto out;
168

169
	net = dev_net(skb->dev);
170
	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
Linus Torvalds's avatar
Linus Torvalds committed
171
172
173
174
			     iph->saddr, iph->daddr,
			     skb->dev->ifindex);

	while (sk) {
175
		delivered = 1;
Linus Torvalds's avatar
Linus Torvalds committed
176
177
178
179
180
181
182
		if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);

			/* Not releasing hash table! */
			if (clone)
				raw_rcv(sk, clone);
		}
183
		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
187
				     iph->saddr, iph->daddr,
				     skb->dev->ifindex);
	}
out:
188
	read_unlock(&raw_v4_hashinfo.lock);
189
	return delivered;
Linus Torvalds's avatar
Linus Torvalds committed
190
191
}

192
193
194
195
196
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
	int hash;
	struct sock *raw_sk;

197
198
	hash = protocol & (RAW_HTABLE_SIZE - 1);
	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
199
200
201
202
203
204
205
206
207
208
209
210

	/* If there maybe a raw socket we must check - if not we
	 * don't care less
	 */
	if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
		raw_sk = NULL;

	return raw_sk != NULL;

}

static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
Linus Torvalds's avatar
Linus Torvalds committed
211
212
{
	struct inet_sock *inet = inet_sk(sk);
213
214
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
Linus Torvalds's avatar
Linus Torvalds committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
	int err = 0;
	int harderr = 0;

	/* Report error on raw socket, if:
	   1. User requested ip_recverr.
	   2. Socket is connected (otherwise the error indication
	      is useless without ip_recverr and error is hard.
	 */
	if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
		return;

	switch (type) {
	default:
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	case ICMP_SOURCE_QUENCH:
		return;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		harderr = 1;
		break;
	case ICMP_DEST_UNREACH:
		err = EHOSTUNREACH;
		if (code > NR_ICMP_UNREACH)
			break;
		err = icmp_err_convert[code].errno;
		harderr = icmp_err_convert[code].fatal;
		if (code == ICMP_FRAG_NEEDED) {
			harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
			err = EMSGSIZE;
		}
	}

	if (inet->recverr) {
250
		struct iphdr *iph = (struct iphdr *)skb->data;
Linus Torvalds's avatar
Linus Torvalds committed
251
252
253
254
255
256
257
258
259
260
261
262
263
		u8 *payload = skb->data + (iph->ihl << 2);

		if (inet->hdrincl)
			payload = skb->data;
		ip_icmp_error(sk, skb, err, 0, info, payload);
	}

	if (inet->recverr || harderr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	}
}

264
265
266
267
268
void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
	int hash;
	struct sock *raw_sk;
	struct iphdr *iph;
269
	struct net *net;
270

271
	hash = protocol & (RAW_HTABLE_SIZE - 1);
272

273
274
	read_lock(&raw_v4_hashinfo.lock);
	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
275
276
	if (raw_sk != NULL) {
		iph = (struct iphdr *)skb->data;
277
		net = dev_net(skb->dev);
278
279
280

		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
						iph->daddr, iph->saddr,
281
282
283
284
285
286
						skb->dev->ifindex)) != NULL) {
			raw_err(raw_sk, skb, info);
			raw_sk = sk_next(raw_sk);
			iph = (struct iphdr *)skb->data;
		}
	}
287
	read_unlock(&raw_v4_hashinfo.lock);
288
289
}

Linus Torvalds's avatar
Linus Torvalds committed
290
291
292
static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
	/* Charge it to the socket. */
293

Eric Dumazet's avatar
Eric Dumazet committed
294
	if (ip_queue_rcv_skb(sk, skb) < 0) {
Linus Torvalds's avatar
Linus Torvalds committed
295
296
297
298
299
300
301
302
303
304
		kfree_skb(skb);
		return NET_RX_DROP;
	}

	return NET_RX_SUCCESS;
}

int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
Wang Chen's avatar
Wang Chen committed
305
		atomic_inc(&sk->sk_drops);
Linus Torvalds's avatar
Linus Torvalds committed
306
307
308
		kfree_skb(skb);
		return NET_RX_DROP;
	}
309
	nf_reset(skb);
Linus Torvalds's avatar
Linus Torvalds committed
310

311
	skb_push(skb, skb->data - skb_network_header(skb));
Linus Torvalds's avatar
Linus Torvalds committed
312
313
314
315
316

	raw_rcv_skb(sk, skb);
	return 0;
}

317
static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
Eric Dumazet's avatar
Eric Dumazet committed
318
			struct rtable **rtp,
Linus Torvalds's avatar
Linus Torvalds committed
319
320
321
			unsigned int flags)
{
	struct inet_sock *inet = inet_sk(sk);
322
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
323
324
	struct iphdr *iph;
	struct sk_buff *skb;
325
	unsigned int iphlen;
Linus Torvalds's avatar
Linus Torvalds committed
326
	int err;
Eric Dumazet's avatar
Eric Dumazet committed
327
	struct rtable *rt = *rtp;
Linus Torvalds's avatar
Linus Torvalds committed
328

329
	if (length > rt->dst.dev->mtu) {
330
		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
331
			       rt->dst.dev->mtu);
Linus Torvalds's avatar
Linus Torvalds committed
332
333
334
335
336
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

337
	skb = sock_alloc_send_skb(sk,
338
				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
339
				  flags & MSG_DONTWAIT, &err);
Linus Torvalds's avatar
Linus Torvalds committed
340
	if (skb == NULL)
341
		goto error;
342
	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
Linus Torvalds's avatar
Linus Torvalds committed
343
344

	skb->priority = sk->sk_priority;
345
	skb->mark = sk->sk_mark;
346
	skb_dst_set(skb, &rt->dst);
Eric Dumazet's avatar
Eric Dumazet committed
347
	*rtp = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
348

349
	skb_reset_network_header(skb);
350
	iph = ip_hdr(skb);
351
	skb_put(skb, length);
Linus Torvalds's avatar
Linus Torvalds committed
352
353
354

	skb->ip_summed = CHECKSUM_NONE;

355
	skb->transport_header = skb->network_header;
356
357
358
	err = -EFAULT;
	if (memcpy_fromiovecend((void *)iph, from, 0, length))
		goto error_free;
Linus Torvalds's avatar
Linus Torvalds committed
359

360
	iphlen = iph->ihl * 4;
361
362
363
364
365
366
367
368
369
370
371
372
373

	/*
	 * We don't want to modify the ip header, but we do need to
	 * be sure that it won't cause problems later along the network
	 * stack.  Specifically we want to make sure that iph->ihl is a
	 * sane value.  If ihl points beyond the length of the buffer passed
	 * in, reject the frame as invalid
	 */
	err = -EINVAL;
	if (iphlen > length)
		goto error_free;

	if (iphlen >= sizeof(*iph)) {
Linus Torvalds's avatar
Linus Torvalds committed
374
375
376
377
378
		if (!iph->saddr)
			iph->saddr = rt->rt_src;
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
379
			ip_select_ident(iph, &rt->dst, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
380
381
382

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}
383
	if (iph->protocol == IPPROTO_ICMP)
384
		icmp_out_count(net, ((struct icmphdr *)
385
			skb_transport_header(skb))->type);
Linus Torvalds's avatar
Linus Torvalds committed
386

387
	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
388
		      rt->dst.dev, dst_output);
Linus Torvalds's avatar
Linus Torvalds committed
389
	if (err > 0)
Eric Dumazet's avatar
Eric Dumazet committed
390
		err = net_xmit_errno(err);
Linus Torvalds's avatar
Linus Torvalds committed
391
392
393
394
395
	if (err)
		goto error;
out:
	return 0;

396
error_free:
Linus Torvalds's avatar
Linus Torvalds committed
397
398
	kfree_skb(skb);
error:
Pavel Emelyanov's avatar
Pavel Emelyanov committed
399
	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
Eric Dumazet's avatar
Eric Dumazet committed
400
401
	if (err == -ENOBUFS && !inet->recverr)
		err = 0;
402
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
403
404
}

Heiko Carstens's avatar
Heiko Carstens committed
405
static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
Linus Torvalds's avatar
Linus Torvalds committed
406
407
408
409
410
{
	struct iovec *iov;
	u8 __user *type = NULL;
	u8 __user *code = NULL;
	int probed = 0;
411
	unsigned int i;
Linus Torvalds's avatar
Linus Torvalds committed
412
413

	if (!msg->msg_iov)
Heiko Carstens's avatar
Heiko Carstens committed
414
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

	for (i = 0; i < msg->msg_iovlen; i++) {
		iov = &msg->msg_iov[i];
		if (!iov)
			continue;

		switch (fl->proto) {
		case IPPROTO_ICMP:
			/* check if one-byte field is readable or not. */
			if (iov->iov_base && iov->iov_len < 1)
				break;

			if (!type) {
				type = iov->iov_base;
				/* check if code field is readable or not. */
				if (iov->iov_len > 1)
					code = type + 1;
			} else if (!code)
				code = iov->iov_base;

			if (type && code) {
Heiko Carstens's avatar
Heiko Carstens committed
436
437
438
				if (get_user(fl->fl_icmp_type, type) ||
				    get_user(fl->fl_icmp_code, code))
					return -EFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
439
440
441
442
443
444
445
446
447
448
				probed = 1;
			}
			break;
		default:
			probed = 1;
			break;
		}
		if (probed)
			break;
	}
Heiko Carstens's avatar
Heiko Carstens committed
449
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
450
451
452
453
454
455
456
457
458
}

static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		       size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	int free = 0;
459
	__be32 daddr;
460
	__be32 saddr;
Linus Torvalds's avatar
Linus Torvalds committed
461
462
463
464
	u8  tos;
	int err;

	err = -EMSGSIZE;
465
	if (len > 0xFFFF)
Linus Torvalds's avatar
Linus Torvalds committed
466
467
468
469
470
471
472
473
474
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */
475

Linus Torvalds's avatar
Linus Torvalds committed
476
	/*
477
	 *	Get and verify the address.
Linus Torvalds's avatar
Linus Torvalds committed
478
479
480
	 */

	if (msg->msg_namelen) {
481
		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
Linus Torvalds's avatar
Linus Torvalds committed
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			static int complained;
			if (!complained++)
				printk(KERN_INFO "%s forgot to set AF_INET in "
						 "raw sendmsg. Fix it!\n",
						 current->comm);
			err = -EAFNOSUPPORT;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
502
		if (sk->sk_state != TCP_ESTABLISHED)
Linus Torvalds's avatar
Linus Torvalds committed
503
			goto out;
504
		daddr = inet->inet_daddr;
Linus Torvalds's avatar
Linus Torvalds committed
505
506
	}

507
	ipc.addr = inet->inet_saddr;
Linus Torvalds's avatar
Linus Torvalds committed
508
	ipc.opt = NULL;
509
	ipc.tx_flags = 0;
Linus Torvalds's avatar
Linus Torvalds committed
510
511
512
	ipc.oif = sk->sk_bound_dev_if;

	if (msg->msg_controllen) {
513
		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
Linus Torvalds's avatar
Linus Torvalds committed
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt)
		ipc.opt = inet->opt;

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (inet->hdrincl)
			goto done;
		if (ipc.opt->srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->faddr;
		}
	}
	tos = RT_CONN_FLAGS(sk);
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

543
	if (ipv4_is_multicast(daddr)) {
Linus Torvalds's avatar
Linus Torvalds committed
544
545
546
547
548
549
550
551
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
	}

	{
		struct flowi fl = { .oif = ipc.oif,
552
				    .mark = sk->sk_mark,
553
554
555
				    .fl4_dst = daddr,
				    .fl4_src = saddr,
				    .fl4_tos = tos,
Linus Torvalds's avatar
Linus Torvalds committed
556
				    .proto = inet->hdrincl ? IPPROTO_RAW :
557
							     sk->sk_protocol,
Linus Torvalds's avatar
Linus Torvalds committed
558
				  };
Heiko Carstens's avatar
Heiko Carstens committed
559
560
561
562
563
		if (!inet->hdrincl) {
			err = raw_probe_proto_opt(&fl, msg);
			if (err)
				goto done;
		}
Linus Torvalds's avatar
Linus Torvalds committed
564

565
		security_sk_classify_flow(sk, &fl);
566
		err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, true);
Linus Torvalds's avatar
Linus Torvalds committed
567
568
569
570
571
572
573
574
575
576
577
578
579
	}
	if (err)
		goto done;

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	if (inet->hdrincl)
580
		err = raw_send_hdrinc(sk, msg->msg_iov, len,
Eric Dumazet's avatar
Eric Dumazet committed
581
					&rt, msg->msg_flags);
582

Linus Torvalds's avatar
Linus Torvalds committed
583
584
585
586
587
	 else {
		if (!ipc.addr)
			ipc.addr = rt->rt_dst;
		lock_sock(sk);
		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
588
					&ipc, &rt, msg->msg_flags);
Linus Torvalds's avatar
Linus Torvalds committed
589
590
		if (err)
			ip_flush_pending_frames(sk);
Eric Dumazet's avatar
Eric Dumazet committed
591
		else if (!(msg->msg_flags & MSG_MORE)) {
Linus Torvalds's avatar
Linus Torvalds committed
592
			err = ip_push_pending_frames(sk);
Eric Dumazet's avatar
Eric Dumazet committed
593
594
595
			if (err == -ENOBUFS && !inet->recverr)
				err = 0;
		}
Linus Torvalds's avatar
Linus Torvalds committed
596
597
598
599
600
601
602
		release_sock(sk);
	}
done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

603
604
605
606
out:
	if (err < 0)
		return err;
	return len;
Linus Torvalds's avatar
Linus Torvalds committed
607
608

do_confirm:
609
	dst_confirm(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
610
611
612
613
614
615
616
617
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}

static void raw_close(struct sock *sk, long timeout)
{
618
	/*
Linus Torvalds's avatar
Linus Torvalds committed
619
620
621
622
623
624
625
	 * Raw sockets may have direct kernel refereneces. Kill them.
	 */
	ip_ra_control(sk, 0, NULL);

	sk_common_release(sk);
}

626
static void raw_destroy(struct sock *sk)
Denis V. Lunev's avatar
Denis V. Lunev committed
627
628
629
630
631
632
{
	lock_sock(sk);
	ip_flush_pending_frames(sk);
	release_sock(sk);
}

Linus Torvalds's avatar
Linus Torvalds committed
633
634
635
636
637
638
639
640
641
642
/* This gets rid of all the nasties in af_inet. -DaveM */
static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
	int ret = -EINVAL;
	int chk_addr_ret;

	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
		goto out;
643
	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
Linus Torvalds's avatar
Linus Torvalds committed
644
645
646
647
	ret = -EADDRNOTAVAIL;
	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
		goto out;
648
	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
Linus Torvalds's avatar
Linus Torvalds committed
649
	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
650
		inet->inet_saddr = 0;  /* Use device */
Linus Torvalds's avatar
Linus Torvalds committed
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
	sk_dst_reset(sk);
	ret = 0;
out:	return ret;
}

/*
 *	This should be easy, if there is something there
 *	we return it, otherwise we block.
 */

static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		       size_t len, int noblock, int flags, int *addr_len)
{
	struct inet_sock *inet = inet_sk(sk);
	size_t copied = 0;
	int err = -EOPNOTSUPP;
	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
	struct sk_buff *skb;

	if (flags & MSG_OOB)
		goto out;

	if (addr_len)
		*addr_len = sizeof(*sin);

	if (flags & MSG_ERRQUEUE) {
		err = ip_recv_error(sk, msg, len);
		goto out;
	}

	skb = skb_recv_datagram(sk, flags, noblock, &err);
	if (!skb)
		goto out;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;
		copied = len;
	}

	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
	if (err)
		goto done;

695
	sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
696
697
698
699

	/* Copy the address. */
	if (sin) {
		sin->sin_family = AF_INET;
700
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
701
		sin->sin_port = 0;
Linus Torvalds's avatar
Linus Torvalds committed
702
703
704
705
706
707
708
709
		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
	}
	if (inet->cmsg_flags)
		ip_cmsg_recv(msg, skb);
	if (flags & MSG_TRUNC)
		copied = skb->len;
done:
	skb_free_datagram(sk, skb);
710
711
712
713
out:
	if (err)
		return err;
	return copied;
Linus Torvalds's avatar
Linus Torvalds committed
714
715
716
717
718
719
}

static int raw_init(struct sock *sk)
{
	struct raw_sock *rp = raw_sk(sk);

720
	if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
		memset(&rp->filter, 0, sizeof(rp->filter));
	return 0;
}

static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen)
{
	if (optlen > sizeof(struct icmp_filter))
		optlen = sizeof(struct icmp_filter);
	if (copy_from_user(&raw_sk(sk)->filter, optval, optlen))
		return -EFAULT;
	return 0;
}

static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen)
{
	int len, ret = -EFAULT;

	if (get_user(len, optlen))
		goto out;
	ret = -EINVAL;
	if (len < 0)
		goto out;
	if (len > sizeof(struct icmp_filter))
		len = sizeof(struct icmp_filter);
	ret = -EFAULT;
	if (put_user(len, optlen) ||
	    copy_to_user(optval, &raw_sk(sk)->filter, len))
		goto out;
	ret = 0;
out:	return ret;
}

753
static int do_raw_setsockopt(struct sock *sk, int level, int optname,
754
			  char __user *optval, unsigned int optlen)
Linus Torvalds's avatar
Linus Torvalds committed
755
756
{
	if (optname == ICMP_FILTER) {
757
		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
758
759
760
761
762
763
764
			return -EOPNOTSUPP;
		else
			return raw_seticmpfilter(sk, optval, optlen);
	}
	return -ENOPROTOOPT;
}

765
static int raw_setsockopt(struct sock *sk, int level, int optname,
766
			  char __user *optval, unsigned int optlen)
Linus Torvalds's avatar
Linus Torvalds committed
767
768
{
	if (level != SOL_RAW)
769
770
771
		return ip_setsockopt(sk, level, optname, optval, optlen);
	return do_raw_setsockopt(sk, level, optname, optval, optlen);
}
Linus Torvalds's avatar
Linus Torvalds committed
772

773
774
#ifdef CONFIG_COMPAT
static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
775
				 char __user *optval, unsigned int optlen)
776
777
{
	if (level != SOL_RAW)
778
		return compat_ip_setsockopt(sk, level, optname, optval, optlen);
779
780
781
782
783
784
785
	return do_raw_setsockopt(sk, level, optname, optval, optlen);
}
#endif

static int do_raw_getsockopt(struct sock *sk, int level, int optname,
			  char __user *optval, int __user *optlen)
{
Linus Torvalds's avatar
Linus Torvalds committed
786
	if (optname == ICMP_FILTER) {
787
		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
788
789
790
791
792
793
794
			return -EOPNOTSUPP;
		else
			return raw_geticmpfilter(sk, optval, optlen);
	}
	return -ENOPROTOOPT;
}

795
796
797
798
799
800
801
802
803
804
static int raw_getsockopt(struct sock *sk, int level, int optname,
			  char __user *optval, int __user *optlen)
{
	if (level != SOL_RAW)
		return ip_getsockopt(sk, level, optname, optval, optlen);
	return do_raw_getsockopt(sk, level, optname, optval, optlen);
}

#ifdef CONFIG_COMPAT
static int compat_raw_getsockopt(struct sock *sk, int level, int optname,
805
				 char __user *optval, int __user *optlen)
806
807
{
	if (level != SOL_RAW)
808
		return compat_ip_getsockopt(sk, level, optname, optval, optlen);
809
810
811
812
	return do_raw_getsockopt(sk, level, optname, optval, optlen);
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
813
814
815
816
static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
	switch (cmd) {
		case SIOCOUTQ: {
817
818
			int amount = sk_wmem_alloc_get(sk);

Linus Torvalds's avatar
Linus Torvalds committed
819
820
821
822
823
824
			return put_user(amount, (int __user *)arg);
		}
		case SIOCINQ: {
			struct sk_buff *skb;
			int amount = 0;

825
			spin_lock_bh(&sk->sk_receive_queue.lock);
Linus Torvalds's avatar
Linus Torvalds committed
826
827
828
			skb = skb_peek(&sk->sk_receive_queue);
			if (skb != NULL)
				amount = skb->len;
829
			spin_unlock_bh(&sk->sk_receive_queue.lock);
Linus Torvalds's avatar
Linus Torvalds committed
830
831
832
833
834
835
836
837
838
839
840
841
			return put_user(amount, (int __user *)arg);
		}

		default:
#ifdef CONFIG_IP_MROUTE
			return ipmr_ioctl(sk, cmd, (void __user *)arg);
#else
			return -ENOIOCTLCMD;
#endif
	}
}

842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
#ifdef CONFIG_COMPAT
static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
{
	switch (cmd) {
	case SIOCOUTQ:
	case SIOCINQ:
		return -ENOIOCTLCMD;
	default:
#ifdef CONFIG_IP_MROUTE
		return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));
#else
		return -ENOIOCTLCMD;
#endif
	}
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
859
struct proto raw_prot = {
860
861
862
	.name		   = "RAW",
	.owner		   = THIS_MODULE,
	.close		   = raw_close,
Denis V. Lunev's avatar
Denis V. Lunev committed
863
	.destroy	   = raw_destroy,
864
865
866
867
868
869
870
871
872
873
	.connect	   = ip4_datagram_connect,
	.disconnect	   = udp_disconnect,
	.ioctl		   = raw_ioctl,
	.init		   = raw_init,
	.setsockopt	   = raw_setsockopt,
	.getsockopt	   = raw_getsockopt,
	.sendmsg	   = raw_sendmsg,
	.recvmsg	   = raw_recvmsg,
	.bind		   = raw_bind,
	.backlog_rcv	   = raw_rcv_skb,
874
875
	.hash		   = raw_hash_sk,
	.unhash		   = raw_unhash_sk,
876
	.obj_size	   = sizeof(struct raw_sock),
877
	.h.raw_hash	   = &raw_v4_hashinfo,
878
#ifdef CONFIG_COMPAT
879
880
	.compat_setsockopt = compat_raw_setsockopt,
	.compat_getsockopt = compat_raw_getsockopt,
881
	.compat_ioctl	   = compat_raw_ioctl,
882
#endif
Linus Torvalds's avatar
Linus Torvalds committed
883
884
885
886
887
888
};

#ifdef CONFIG_PROC_FS
static struct sock *raw_get_first(struct seq_file *seq)
{
	struct sock *sk;
889
	struct raw_iter_state *state = raw_seq_private(seq);
Linus Torvalds's avatar
Linus Torvalds committed
890

891
892
	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
			++state->bucket) {
Linus Torvalds's avatar
Linus Torvalds committed
893
894
		struct hlist_node *node;

895
		sk_for_each(sk, node, &state->h->ht[state->bucket])
896
			if (sock_net(sk) == seq_file_net(seq))
Linus Torvalds's avatar
Linus Torvalds committed
897
898
899
900
901
902
903
904
905
				goto found;
	}
	sk = NULL;
found:
	return sk;
}

static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
906
	struct raw_iter_state *state = raw_seq_private(seq);
Linus Torvalds's avatar
Linus Torvalds committed
907
908
909
910
911

	do {
		sk = sk_next(sk);
try_again:
		;
912
	} while (sk && sock_net(sk) != seq_file_net(seq));
Linus Torvalds's avatar
Linus Torvalds committed
913

914
	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
915
		sk = sk_head(&state->h->ht[state->bucket]);
Linus Torvalds's avatar
Linus Torvalds committed
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
		goto try_again;
	}
	return sk;
}

static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
{
	struct sock *sk = raw_get_first(seq);

	if (sk)
		while (pos && (sk = raw_get_next(seq, sk)) != NULL)
			--pos;
	return pos ? NULL : sk;
}

931
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
932
{
933
934
935
	struct raw_iter_state *state = raw_seq_private(seq);

	read_lock(&state->h->lock);
Linus Torvalds's avatar
Linus Torvalds committed
936
937
	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
938
EXPORT_SYMBOL_GPL(raw_seq_start);
Linus Torvalds's avatar
Linus Torvalds committed
939

940
void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
941
942
943
944
945
946
947
948
949
950
{
	struct sock *sk;

	if (v == SEQ_START_TOKEN)
		sk = raw_get_first(seq);
	else
		sk = raw_get_next(seq, v);
	++*pos;
	return sk;
}
951
EXPORT_SYMBOL_GPL(raw_seq_next);
Linus Torvalds's avatar
Linus Torvalds committed
952

953
void raw_seq_stop(struct seq_file *seq, void *v)
Linus Torvalds's avatar
Linus Torvalds committed
954
{
955
956
957
	struct raw_iter_state *state = raw_seq_private(seq);

	read_unlock(&state->h->lock);
Linus Torvalds's avatar
Linus Torvalds committed
958
}
959
EXPORT_SYMBOL_GPL(raw_seq_stop);
Linus Torvalds's avatar
Linus Torvalds committed
960

961
static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
Linus Torvalds's avatar
Linus Torvalds committed
962
963
{
	struct inet_sock *inet = inet_sk(sp);
964
965
	__be32 dest = inet->inet_daddr,
	       src = inet->inet_rcv_saddr;
Linus Torvalds's avatar
Linus Torvalds committed
966
	__u16 destp = 0,
967
	      srcp  = inet->inet_num;
Linus Torvalds's avatar
Linus Torvalds committed
968

969
	seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
970
		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
971
		i, src, srcp, dest, destp, sp->sk_state,
972
973
		sk_wmem_alloc_get(sp),
		sk_rmem_alloc_get(sp),
Linus Torvalds's avatar
Linus Torvalds committed
974
		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
Wang Chen's avatar
Wang Chen committed
975
		atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
Linus Torvalds's avatar
Linus Torvalds committed
976
977
978
979
980
}

static int raw_seq_show(struct seq_file *seq, void *v)
{
	if (v == SEQ_START_TOKEN)
981
982
		seq_printf(seq, "  sl  local_address rem_address   st tx_queue "
				"rx_queue tr tm->when retrnsmt   uid  timeout "
Eric Dumazet's avatar
Eric Dumazet committed
983
				"inode ref pointer drops\n");
984
985
	else
		raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
Linus Torvalds's avatar
Linus Torvalds committed
986
987
988
	return 0;
}

989
static const struct seq_operations raw_seq_ops = {
Linus Torvalds's avatar
Linus Torvalds committed
990
991
992
993
994
995
	.start = raw_seq_start,
	.next  = raw_seq_next,
	.stop  = raw_seq_stop,
	.show  = raw_seq_show,
};

996
997
int raw_seq_open(struct inode *ino, struct file *file,
		 struct raw_hashinfo *h, const struct seq_operations *ops)
Linus Torvalds's avatar
Linus Torvalds committed
998
{
999
	int err;
1000
1001
	struct raw_iter_state *i;

1002
	err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
1003
1004
	if (err < 0)
		return err;
1005

1006
	i = raw_seq_private((struct seq_file *)file->private_data);
1007
1008
1009
1010
1011
1012
1013
	i->h = h;
	return 0;
}
EXPORT_SYMBOL_GPL(raw_seq_open);

static int raw_v4_seq_open(struct inode *inode, struct file *file)
{
1014
	return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
Linus Torvalds's avatar
Linus Torvalds committed
1015
1016
}

1017
static const struct file_operations raw_seq_fops = {
Linus Torvalds's avatar
Linus Torvalds committed
1018
	.owner	 = THIS_MODULE,
1019
	.open	 = raw_v4_seq_open,
Linus Torvalds's avatar
Linus Torvalds committed
1020
1021
	.read	 = seq_read,
	.llseek	 = seq_lseek,
1022
	.release = seq_release_net,
Linus Torvalds's avatar
Linus Torvalds committed
1023
1024
};

1025
static __net_init int raw_init_net(struct net *net)
Linus Torvalds's avatar
Linus Torvalds committed
1026
{
1027
	if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
Linus Torvalds's avatar
Linus Torvalds committed
1028
		return -ENOMEM;
1029

Linus Torvalds's avatar
Linus Torvalds committed
1030
1031
1032
	return 0;
}

1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
static __net_exit void raw_exit_net(struct net *net)
{
	proc_net_remove(net, "raw");
}

static __net_initdata struct pernet_operations raw_net_ops = {
	.init = raw_init_net,
	.exit = raw_exit_net,
};

int __init raw_proc_init(void)
{
	return register_pernet_subsys(&raw_net_ops);
}

Linus Torvalds's avatar
Linus Torvalds committed
1048
1049
void __init raw_proc_exit(void)
{
1050
	unregister_pernet_subsys(&raw_net_ops);
Linus Torvalds's avatar
Linus Torvalds committed
1051
1052
}
#endif /* CONFIG_PROC_FS */