udp.c 57 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The User Datagram Protocol (UDP).
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9
10
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11
 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds's avatar
Linus Torvalds committed
12
13
14
15
16
17
18
19
20
 *		Hirokazu Takahashi, <taka@valinux.co.jp>
 *
 * Fixes:
 *		Alan Cox	:	verify_area() calls
 *		Alan Cox	: 	stopped close while in use off icmp
 *					messages. Not a fix but a botch that
 *					for udp at least is 'valid'.
 *		Alan Cox	:	Fixed icmp handling properly
 *		Alan Cox	: 	Correct error for oversized datagrams
21
22
 *		Alan Cox	:	Tidied select() semantics.
 *		Alan Cox	:	udp_err() fixed properly, also now
Linus Torvalds's avatar
Linus Torvalds committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
 *					select and read wake correctly on errors
 *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
 *		Alan Cox	:	UDP can count its memory
 *		Alan Cox	:	send to an unknown connection causes
 *					an ECONNREFUSED off the icmp, but
 *					does NOT close.
 *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
 *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
 *					bug no longer crashes it.
 *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
 *		Alan Cox	:	Uses skb_free_datagram
 *		Alan Cox	:	Added get/set sockopt support.
 *		Alan Cox	:	Broadcasting without option set returns EACCES.
 *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
 *		Alan Cox	:	Use ip_tos and ip_ttl
 *		Alan Cox	:	SNMP Mibs
 *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
 *		Matt Dillon	:	UDP length checks.
 *		Alan Cox	:	Smarter af_inet used properly.
 *		Alan Cox	:	Use new kernel side addressing.
 *		Alan Cox	:	Incorrect return on truncated datagram receive.
 *	Arnt Gulbrandsen 	:	New udp_send and stuff
 *		Alan Cox	:	Cache last socket
 *		Alan Cox	:	Route cache
 *		Jon Peatfield	:	Minor efficiency fix to sendto().
 *		Mike Shaver	:	RFC1122 checks.
 *		Alan Cox	:	Nonblocking error fix.
 *	Willy Konynenberg	:	Transparent proxying support.
 *		Mike McLagan	:	Routing by source
 *		David S. Miller	:	New socket lookup architecture.
 *					Last socket cache retained as it
 *					does have a high hit rate.
 *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
 *		Andi Kleen	:	Some cleanups, cache destination entry
57
 *					for connect.
Linus Torvalds's avatar
Linus Torvalds committed
58
59
60
61
62
63
64
65
66
67
68
69
70
 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
 *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
 *					return ENOTCONN for unconnected sockets (POSIX)
 *		Janos Farkas	:	don't deliver multi/broadcasts to a different
 *					bound-to-device socket
 *	Hirokazu Takahashi	:	HW checksumming for outgoing UDP
 *					datagrams.
 *	Hirokazu Takahashi	:	sendfile() on UDP works now.
 *		Arnaldo C. Melo :	convert /proc/net/udp to seq_file
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
71
 *	James Chapman		:	Add L2TP encapsulation type.
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
75
76
77
78
 *
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
79

Linus Torvalds's avatar
Linus Torvalds committed
80
81
82
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
Hideo Aoki's avatar
Hideo Aoki committed
83
#include <linux/bootmem.h>
84
85
#include <linux/highmem.h>
#include <linux/swap.h>
Linus Torvalds's avatar
Linus Torvalds committed
86
87
88
89
90
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sockios.h>
91
#include <linux/igmp.h>
Linus Torvalds's avatar
Linus Torvalds committed
92
93
94
95
96
97
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
98
#include <linux/slab.h>
99
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
100
101
102
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
103
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
104
105
106
107
#include <net/icmp.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
108
#include "udp_impl.h"
Linus Torvalds's avatar
Linus Torvalds committed
109

110
struct udp_table udp_table __read_mostly;
111
EXPORT_SYMBOL(udp_table);
Linus Torvalds's avatar
Linus Torvalds committed
112

Eric Dumazet's avatar
Eric Dumazet committed
113
long sysctl_udp_mem[3] __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
114
EXPORT_SYMBOL(sysctl_udp_mem);
Eric Dumazet's avatar
Eric Dumazet committed
115
116

int sysctl_udp_rmem_min __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
117
EXPORT_SYMBOL(sysctl_udp_rmem_min);
Eric Dumazet's avatar
Eric Dumazet committed
118
119

int sysctl_udp_wmem_min __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
120
121
EXPORT_SYMBOL(sysctl_udp_wmem_min);

Eric Dumazet's avatar
Eric Dumazet committed
122
atomic_long_t udp_memory_allocated;
Hideo Aoki's avatar
Hideo Aoki committed
123
124
EXPORT_SYMBOL(udp_memory_allocated);

125
126
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
127

128
static int udp_lib_lport_inuse(struct net *net, __u16 num,
129
			       const struct udp_hslot *hslot,
130
			       unsigned long *bitmap,
131
132
			       struct sock *sk,
			       int (*saddr_comp)(const struct sock *sk1,
133
134
						 const struct sock *sk2),
			       unsigned int log)
Linus Torvalds's avatar
Linus Torvalds committed
135
{
136
	struct sock *sk2;
137
	struct hlist_nulls_node *node;
138

139
	sk_nulls_for_each(sk2, node, &hslot->head)
140
141
		if (net_eq(sock_net(sk2), net) &&
		    sk2 != sk &&
142
		    (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
143
144
145
		    (!sk2->sk_reuse || !sk->sk_reuse) &&
		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
146
147
		    (*saddr_comp)(sk, sk2)) {
			if (bitmap)
148
149
				__set_bit(udp_sk(sk2)->udp_port_hash >> log,
					  bitmap);
150
151
152
			else
				return 1;
		}
153
154
155
	return 0;
}

Eric Dumazet's avatar
Eric Dumazet committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
 * Note: we still hold spinlock of primary hash chain, so no other writer
 * can insert/delete a socket with local_port == num
 */
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
			       struct udp_hslot *hslot2,
			       struct sock *sk,
			       int (*saddr_comp)(const struct sock *sk1,
						 const struct sock *sk2))
{
	struct sock *sk2;
	struct hlist_nulls_node *node;
	int res = 0;

	spin_lock(&hslot2->lock);
	udp_portaddr_for_each_entry(sk2, node, &hslot2->head)
172
173
174
175
176
177
		if (net_eq(sock_net(sk2), net) &&
		    sk2 != sk &&
		    (udp_sk(sk2)->udp_port_hash == num) &&
		    (!sk2->sk_reuse || !sk->sk_reuse) &&
		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
Eric Dumazet's avatar
Eric Dumazet committed
178
179
180
181
182
183
184
185
		    (*saddr_comp)(sk, sk2)) {
			res = 1;
			break;
		}
	spin_unlock(&hslot2->lock);
	return res;
}

186
/**
187
 *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
188
189
190
 *
 *  @sk:          socket struct in question
 *  @snum:        port number to look up
191
 *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
Eric Dumazet's avatar
Eric Dumazet committed
192
193
 *  @hash2_nulladdr: AF-dependant hash value in secondary hash chains,
 *                   with NULL address
194
 */
195
int udp_lib_get_port(struct sock *sk, unsigned short snum,
196
		       int (*saddr_comp)(const struct sock *sk1,
Eric Dumazet's avatar
Eric Dumazet committed
197
198
					 const struct sock *sk2),
		     unsigned int hash2_nulladdr)
199
{
200
	struct udp_hslot *hslot, *hslot2;
201
	struct udp_table *udptable = sk->sk_prot->h.udp_table;
202
	int    error = 1;
203
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
204

205
	if (!snum) {
Eric Dumazet's avatar
Eric Dumazet committed
206
207
		int low, high, remaining;
		unsigned rand;
208
209
		unsigned short first, last;
		DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
210

211
		inet_get_local_port_range(&low, &high);
212
		remaining = (high - low) + 1;
213

Eric Dumazet's avatar
Eric Dumazet committed
214
		rand = net_random();
215
216
217
218
		first = (((u64)rand * remaining) >> 32) + low;
		/*
		 * force rand to be an odd multiple of UDP_HTABLE_SIZE
		 */
219
		rand = (rand | 1) * (udptable->mask + 1);
Eric Dumazet's avatar
Eric Dumazet committed
220
221
		last = first + udptable->mask + 1;
		do {
222
			hslot = udp_hashslot(udptable, net, first);
223
			bitmap_zero(bitmap, PORTS_PER_CHAIN);
224
			spin_lock_bh(&hslot->lock);
225
			udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
226
					    saddr_comp, udptable->log);
227
228
229
230
231
232
233

			snum = first;
			/*
			 * Iterate on all possible values of snum for this hash.
			 * Using steps of an odd multiple of UDP_HTABLE_SIZE
			 * give us randomization and full range coverage.
			 */
Eric Dumazet's avatar
Eric Dumazet committed
234
			do {
235
				if (low <= snum && snum <= high &&
236
237
				    !test_bit(snum >> udptable->log, bitmap) &&
				    !inet_is_reserved_local_port(snum))
238
239
240
241
					goto found;
				snum += rand;
			} while (snum != first);
			spin_unlock_bh(&hslot->lock);
Eric Dumazet's avatar
Eric Dumazet committed
242
		} while (++first != last);
243
		goto fail;
244
	} else {
245
		hslot = udp_hashslot(udptable, net, snum);
246
		spin_lock_bh(&hslot->lock);
Eric Dumazet's avatar
Eric Dumazet committed
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
		if (hslot->count > 10) {
			int exist;
			unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;

			slot2          &= udptable->mask;
			hash2_nulladdr &= udptable->mask;

			hslot2 = udp_hashslot2(udptable, slot2);
			if (hslot->count < hslot2->count)
				goto scan_primary_hash;

			exist = udp_lib_lport_inuse2(net, snum, hslot2,
						     sk, saddr_comp);
			if (!exist && (hash2_nulladdr != slot2)) {
				hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
				exist = udp_lib_lport_inuse2(net, snum, hslot2,
							     sk, saddr_comp);
			}
			if (exist)
				goto fail_unlock;
			else
				goto found;
		}
scan_primary_hash:
271
272
		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
					saddr_comp, 0))
273
274
			goto fail_unlock;
	}
275
found:
276
	inet_sk(sk)->inet_num = snum;
277
278
	udp_sk(sk)->udp_port_hash = snum;
	udp_sk(sk)->udp_portaddr_hash ^= snum;
Linus Torvalds's avatar
Linus Torvalds committed
279
	if (sk_unhashed(sk)) {
280
		sk_nulls_add_node_rcu(sk, &hslot->head);
281
		hslot->count++;
282
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
283
284
285
286
287
288
289

		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
		spin_lock(&hslot2->lock);
		hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
					 &hslot2->head);
		hslot2->count++;
		spin_unlock(&hslot2->lock);
Linus Torvalds's avatar
Linus Torvalds committed
290
	}
291
	error = 0;
292
293
fail_unlock:
	spin_unlock_bh(&hslot->lock);
Linus Torvalds's avatar
Linus Torvalds committed
294
fail:
295
296
	return error;
}
Eric Dumazet's avatar
Eric Dumazet committed
297
EXPORT_SYMBOL(udp_lib_get_port);
298

299
static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
300
301
302
{
	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);

Eric Dumazet's avatar
Eric Dumazet committed
303
	return 	(!ipv6_only_sock(sk2)  &&
304
305
		 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
306
307
}

308
309
310
static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
				       unsigned int port)
{
311
	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
312
313
}

314
int udp_v4_get_port(struct sock *sk, unsigned short snum)
315
{
Eric Dumazet's avatar
Eric Dumazet committed
316
	unsigned int hash2_nulladdr =
317
		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
Eric Dumazet's avatar
Eric Dumazet committed
318
319
320
	unsigned int hash2_partial =
		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);

321
	/* precompute partial secondary hash */
Eric Dumazet's avatar
Eric Dumazet committed
322
323
	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
324
325
}

326
327
328
329
330
331
static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
			 unsigned short hnum,
			 __be16 sport, __be32 daddr, __be16 dport, int dif)
{
	int score = -1;

332
	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
333
334
335
336
			!ipv6_only_sock(sk)) {
		struct inet_sock *inet = inet_sk(sk);

		score = (sk->sk_family == PF_INET ? 1 : 0);
337
338
		if (inet->inet_rcv_saddr) {
			if (inet->inet_rcv_saddr != daddr)
339
340
341
				return -1;
			score += 2;
		}
342
343
		if (inet->inet_daddr) {
			if (inet->inet_daddr != saddr)
344
345
346
				return -1;
			score += 2;
		}
347
348
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
349
350
351
352
353
354
355
356
357
358
359
360
				return -1;
			score += 2;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score += 2;
		}
	}
	return score;
}

361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/*
 * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
 */
#define SCORE2_MAX (1 + 2 + 2 + 2)
static inline int compute_score2(struct sock *sk, struct net *net,
				 __be32 saddr, __be16 sport,
				 __be32 daddr, unsigned int hnum, int dif)
{
	int score = -1;

	if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
		struct inet_sock *inet = inet_sk(sk);

		if (inet->inet_rcv_saddr != daddr)
			return -1;
		if (inet->inet_num != hnum)
			return -1;

		score = (sk->sk_family == PF_INET ? 1 : 0);
		if (inet->inet_daddr) {
			if (inet->inet_daddr != saddr)
				return -1;
			score += 2;
		}
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
				return -1;
			score += 2;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score += 2;
		}
	}
	return score;
}


/* called with read_rcu_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
		__be32 saddr, __be16 sport,
		__be32 daddr, unsigned int hnum, int dif,
		struct udp_hslot *hslot2, unsigned int slot2)
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	int score, badness;

begin:
	result = NULL;
	badness = -1;
	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
		score = compute_score2(sk, net, saddr, sport,
				      daddr, hnum, dif);
		if (score > badness) {
			result = sk;
			badness = score;
			if (score == SCORE2_MAX)
				goto exact_match;
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot2)
		goto begin;

	if (result) {
exact_match:
433
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
434
435
436
437
438
439
440
441
442
443
			result = NULL;
		else if (unlikely(compute_score2(result, net, saddr, sport,
				  daddr, hnum, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	return result;
}

444
445
446
447
448
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
 * harder than this. -DaveM
 */
static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
		__be16 sport, __be32 daddr, __be16 dport,
449
		int dif, struct udp_table *udptable)
450
{
451
	struct sock *sk, *result;
452
	struct hlist_nulls_node *node;
453
	unsigned short hnum = ntohs(dport);
454
455
	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
456
	int score, badness;
457

458
	rcu_read_lock();
459
460
461
462
463
464
465
466
467
468
469
	if (hslot->count > 10) {
		hash2 = udp4_portaddr_hash(net, daddr, hnum);
		slot2 = hash2 & udptable->mask;
		hslot2 = &udptable->hash2[slot2];
		if (hslot->count < hslot2->count)
			goto begin;

		result = udp4_lib_lookup2(net, saddr, sport,
					  daddr, hnum, dif,
					  hslot2, slot2);
		if (!result) {
470
			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
471
472
473
474
475
			slot2 = hash2 & udptable->mask;
			hslot2 = &udptable->hash2[slot2];
			if (hslot->count < hslot2->count)
				goto begin;

476
			result = udp4_lib_lookup2(net, saddr, sport,
477
						  htonl(INADDR_ANY), hnum, dif,
478
479
480
481
482
						  hslot2, slot2);
		}
		rcu_read_unlock();
		return result;
	}
483
484
485
begin:
	result = NULL;
	badness = -1;
486
	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
487
488
489
490
491
		score = compute_score(sk, net, saddr, hnum, sport,
				      daddr, dport, dif);
		if (score > badness) {
			result = sk;
			badness = score;
492
493
		}
	}
494
495
496
497
498
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
499
	if (get_nulls_value(node) != slot)
500
501
		goto begin;

502
	if (result) {
503
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
504
505
506
507
508
509
510
511
			result = NULL;
		else if (unlikely(compute_score(result, net, saddr, hnum, sport,
				  daddr, dport, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
512
513
514
	return result;
}

515
516
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
						 __be16 sport, __be16 dport,
517
						 struct udp_table *udptable)
518
{
519
	struct sock *sk;
520
521
	const struct iphdr *iph = ip_hdr(skb);

522
523
524
	if (unlikely(sk = skb_steal_sock(skb)))
		return sk;
	else
Eric Dumazet's avatar
Eric Dumazet committed
525
		return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
526
527
					 iph->daddr, dport, inet_iif(skb),
					 udptable);
528
529
}

530
531
532
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
			     __be32 daddr, __be16 dport, int dif)
{
533
	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
534
535
536
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);

537
static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
538
539
540
541
					     __be16 loc_port, __be32 loc_addr,
					     __be16 rmt_port, __be32 rmt_addr,
					     int dif)
{
542
	struct hlist_nulls_node *node;
543
544
545
	struct sock *s = sk;
	unsigned short hnum = ntohs(loc_port);

546
	sk_nulls_for_each_from(s, node) {
547
548
		struct inet_sock *inet = inet_sk(s);

549
550
551
552
553
554
555
		if (!net_eq(sock_net(s), net) ||
		    udp_sk(s)->udp_port_hash != hnum ||
		    (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
		    (inet->inet_dport != rmt_port && inet->inet_dport) ||
		    (inet->inet_rcv_saddr &&
		     inet->inet_rcv_saddr != loc_addr) ||
		    ipv6_only_sock(s) ||
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
			continue;
		if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
			continue;
		goto found;
	}
	s = NULL;
found:
	return s;
}

/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
 * be closed and the error returned to the user.  If err > 0
 * it's just the icmp type << 8 | icmp code.
 * Header points to the ip header of the error packet. We move
 * on past this. Then (as it used to claim before adjustment)
 * header points to the first 8 bytes of the udp header.  We need
 * to find the appropriate port.
 */

578
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
579
580
{
	struct inet_sock *inet;
Eric Dumazet's avatar
Eric Dumazet committed
581
582
	struct iphdr *iph = (struct iphdr *)skb->data;
	struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
583
584
585
586
587
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
	struct sock *sk;
	int harderr;
	int err;
588
	struct net *net = dev_net(skb->dev);
589

590
	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
591
592
			iph->saddr, uh->source, skb->dev->ifindex, udptable);
	if (sk == NULL) {
593
		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
		return;	/* No socket for error */
	}

	err = 0;
	harderr = 0;
	inet = inet_sk(sk);

	switch (type) {
	default:
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	case ICMP_SOURCE_QUENCH:
		goto out;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		harderr = 1;
		break;
	case ICMP_DEST_UNREACH:
		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
			if (inet->pmtudisc != IP_PMTUDISC_DONT) {
				err = EMSGSIZE;
				harderr = 1;
				break;
			}
			goto out;
		}
		err = EHOSTUNREACH;
		if (code <= NR_ICMP_UNREACH) {
			harderr = icmp_err_convert[code].fatal;
			err = icmp_err_convert[code].errno;
		}
		break;
	}

	/*
	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
	 *	4.1.3.3.
	 */
	if (!inet->recverr) {
		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
			goto out;
636
	} else
Eric Dumazet's avatar
Eric Dumazet committed
637
		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
638

639
640
641
642
643
644
645
646
	sk->sk_err = err;
	sk->sk_error_report(sk);
out:
	sock_put(sk);
}

void udp_err(struct sk_buff *skb, u32 info)
{
647
	__udp4_lib_err(skb, info, &udp_table);
648
649
650
651
652
}

/*
 * Throw away all pending data and cancel the corking. Socket is locked.
 */
653
void udp_flush_pending_frames(struct sock *sk)
654
655
656
657
658
659
660
661
662
{
	struct udp_sock *up = udp_sk(sk);

	if (up->pending) {
		up->len = 0;
		up->pending = 0;
		ip_flush_pending_frames(sk);
	}
}
663
EXPORT_SYMBOL(udp_flush_pending_frames);
664
665

/**
Herbert Xu's avatar
Herbert Xu committed
666
 * 	udp4_hwcsum  -  handle outgoing HW checksumming
667
668
 * 	@skb: 	sk_buff containing the filled-in UDP header
 * 	        (checksum field must be zeroed out)
Herbert Xu's avatar
Herbert Xu committed
669
670
 *	@src:	source IP address
 *	@dst:	destination IP address
671
 */
Herbert Xu's avatar
Herbert Xu committed
672
static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
673
674
{
	struct udphdr *uh = udp_hdr(skb);
Herbert Xu's avatar
Herbert Xu committed
675
676
677
678
	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
	int offset = skb_transport_offset(skb);
	int len = skb->len - offset;
	int hlen = len;
679
680
	__wsum csum = 0;

Herbert Xu's avatar
Herbert Xu committed
681
	if (!frags) {
682
683
684
685
686
		/*
		 * Only one fragment on the socket.
		 */
		skb->csum_start = skb_transport_header(skb) - skb->head;
		skb->csum_offset = offsetof(struct udphdr, check);
Herbert Xu's avatar
Herbert Xu committed
687
688
		uh->check = ~csum_tcpudp_magic(src, dst, len,
					       IPPROTO_UDP, 0);
689
690
691
692
693
694
	} else {
		/*
		 * HW-checksum won't work as there are two or more
		 * fragments on the socket so that all csums of sk_buffs
		 * should be together
		 */
Herbert Xu's avatar
Herbert Xu committed
695
696
697
698
		do {
			csum = csum_add(csum, frags->csum);
			hlen -= frags->len;
		} while ((frags = frags->next));
699

Herbert Xu's avatar
Herbert Xu committed
700
		csum = skb_checksum(skb, offset, hlen, csum);
701
702
703
704
705
706
707
708
		skb->ip_summed = CHECKSUM_NONE;

		uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;
	}
}

Herbert Xu's avatar
Herbert Xu committed
709
static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
710
{
Herbert Xu's avatar
Herbert Xu committed
711
	struct sock *sk = skb->sk;
712
713
	struct inet_sock *inet = inet_sk(sk);
	struct udphdr *uh;
Herbert Xu's avatar
Herbert Xu committed
714
	struct rtable *rt = (struct rtable *)skb_dst(skb);
715
716
	int err = 0;
	int is_udplite = IS_UDPLITE(sk);
Herbert Xu's avatar
Herbert Xu committed
717
718
	int offset = skb_transport_offset(skb);
	int len = skb->len - offset;
719
720
721
722
723
724
	__wsum csum = 0;

	/*
	 * Create a UDP header
	 */
	uh = udp_hdr(skb);
Herbert Xu's avatar
Herbert Xu committed
725
726
727
	uh->source = inet->inet_sport;
	uh->dest = dport;
	uh->len = htons(len);
728
729
730
	uh->check = 0;

	if (is_udplite)  				 /*     UDP-Lite      */
Herbert Xu's avatar
Herbert Xu committed
731
		csum = udplite_csum(skb);
732
733
734
735
736
737
738
739

	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */

		skb->ip_summed = CHECKSUM_NONE;
		goto send;

	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */

Herbert Xu's avatar
Herbert Xu committed
740
		udp4_hwcsum(skb, rt->rt_src, daddr);
741
742
		goto send;

Herbert Xu's avatar
Herbert Xu committed
743
744
	} else
		csum = udp_csum(skb);
745
746

	/* add protocol-dependent pseudo-header */
Herbert Xu's avatar
Herbert Xu committed
747
	uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len,
Eric Dumazet's avatar
Eric Dumazet committed
748
				      sk->sk_protocol, csum);
749
750
751
752
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

send:
Herbert Xu's avatar
Herbert Xu committed
753
	err = ip_send_skb(skb);
Eric Dumazet's avatar
Eric Dumazet committed
754
755
756
757
758
759
760
761
762
	if (err) {
		if (err == -ENOBUFS && !inet->recverr) {
			UDP_INC_STATS_USER(sock_net(sk),
					   UDP_MIB_SNDBUFERRORS, is_udplite);
			err = 0;
		}
	} else
		UDP_INC_STATS_USER(sock_net(sk),
				   UDP_MIB_OUTDATAGRAMS, is_udplite);
Herbert Xu's avatar
Herbert Xu committed
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
	return err;
}

/*
 * Push out all pending data as one UDP datagram. Socket is locked.
 */
static int udp_push_pending_frames(struct sock *sk)
{
	struct udp_sock  *up = udp_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
	struct flowi *fl = &inet->cork.fl;
	struct sk_buff *skb;
	int err = 0;

	skb = ip_finish_skb(sk);
	if (!skb)
		goto out;

	err = udp_send_skb(skb, fl->fl4_dst, fl->fl_ip_dport);

783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
out:
	up->len = 0;
	up->pending = 0;
	return err;
}

int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct udp_sock *up = udp_sk(sk);
	int ulen = len;
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	int free = 0;
	int connected = 0;
	__be32 daddr, faddr, saddr;
	__be16 dport;
	u8  tos;
	int err, is_udplite = IS_UDPLITE(sk);
	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
Herbert Xu's avatar
Herbert Xu committed
805
	struct sk_buff *skb;
806
807
808
809
810
811
812
813

	if (len > 0xFFFF)
		return -EMSGSIZE;

	/*
	 *	Check the flags.
	 */

Eric Dumazet's avatar
Eric Dumazet committed
814
	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
815
816
817
		return -EOPNOTSUPP;

	ipc.opt = NULL;
818
	ipc.tx_flags = 0;
819

Herbert Xu's avatar
Herbert Xu committed
820
821
	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
	if (up->pending) {
		/*
		 * There are pending frames.
		 * The socket lock must be held while it's corked.
		 */
		lock_sock(sk);
		if (likely(up->pending)) {
			if (unlikely(up->pending != AF_INET)) {
				release_sock(sk);
				return -EINVAL;
			}
			goto do_append_data;
		}
		release_sock(sk);
	}
	ulen += sizeof(struct udphdr);

	/*
	 *	Get and verify the address.
	 */
	if (msg->msg_name) {
Eric Dumazet's avatar
Eric Dumazet committed
843
		struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
844
845
846
847
848
849
850
851
852
853
854
855
856
857
		if (msg->msg_namelen < sizeof(*usin))
			return -EINVAL;
		if (usin->sin_family != AF_INET) {
			if (usin->sin_family != AF_UNSPEC)
				return -EAFNOSUPPORT;
		}

		daddr = usin->sin_addr.s_addr;
		dport = usin->sin_port;
		if (dport == 0)
			return -EINVAL;
	} else {
		if (sk->sk_state != TCP_ESTABLISHED)
			return -EDESTADDRREQ;
858
859
		daddr = inet->inet_daddr;
		dport = inet->inet_dport;
860
861
862
863
864
		/* Open fast path for connected socket.
		   Route will not be used, if at least one option is set.
		 */
		connected = 1;
	}
865
	ipc.addr = inet->inet_saddr;
866
867

	ipc.oif = sk->sk_bound_dev_if;
868
	err = sock_tx_timestamp(sk, &ipc.tx_flags);
869
870
	if (err)
		return err;
871
	if (msg->msg_controllen) {
872
		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
		if (err)
			return err;
		if (ipc.opt)
			free = 1;
		connected = 0;
	}
	if (!ipc.opt)
		ipc.opt = inet->opt;

	saddr = ipc.addr;
	ipc.addr = faddr = daddr;

	if (ipc.opt && ipc.opt->srr) {
		if (!daddr)
			return -EINVAL;
		faddr = ipc.opt->faddr;
		connected = 0;
	}
	tos = RT_TOS(inet->tos);
	if (sock_flag(sk, SOCK_LOCALROUTE) ||
	    (msg->msg_flags & MSG_DONTROUTE) ||
	    (ipc.opt && ipc.opt->is_strictroute)) {
		tos |= RTO_ONLINK;
		connected = 0;
	}

	if (ipv4_is_multicast(daddr)) {
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
		connected = 0;
	}

	if (connected)
Eric Dumazet's avatar
Eric Dumazet committed
908
		rt = (struct rtable *)sk_dst_check(sk, 0);
909
910
911

	if (rt == NULL) {
		struct flowi fl = { .oif = ipc.oif,
912
				    .mark = sk->sk_mark,
913
914
915
				    .fl4_dst = faddr,
				    .fl4_src = saddr,
				    .fl4_tos = tos,
916
				    .proto = sk->sk_protocol,
917
				    .flags = inet_sk_flowi_flags(sk),
918
919
				    .fl_ip_sport = inet->inet_sport,
				    .fl_ip_dport = dport };
920
921
		struct net *net = sock_net(sk);

922
		security_sk_classify_flow(sk, &fl);
923
		err = ip_route_output_flow(net, &rt, &fl, sk, true);
924
925
		if (err) {
			if (err == -ENETUNREACH)
926
				IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
927
928
929
930
931
932
933
934
			goto out;
		}

		err = -EACCES;
		if ((rt->rt_flags & RTCF_BROADCAST) &&
		    !sock_flag(sk, SOCK_BROADCAST))
			goto out;
		if (connected)
935
			sk_dst_set(sk, dst_clone(&rt->dst));
936
937
938
939
940
941
942
943
944
945
	}

	if (msg->msg_flags&MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	saddr = rt->rt_src;
	if (!ipc.addr)
		daddr = ipc.addr = rt->rt_dst;

Herbert Xu's avatar
Herbert Xu committed
946
947
948
949
950
951
952
953
954
955
956
	/* Lockless fast path for the non-corking case. */
	if (!corkreq) {
		skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen,
				  sizeof(struct udphdr), &ipc, &rt,
				  msg->msg_flags);
		err = PTR_ERR(skb);
		if (skb && !IS_ERR(skb))
			err = udp_send_skb(skb, daddr, dport);
		goto out;
	}

957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
	lock_sock(sk);
	if (unlikely(up->pending)) {
		/* The socket is already corked while preparing it. */
		/* ... which is an evident application bug. --ANK */
		release_sock(sk);

		LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
		err = -EINVAL;
		goto out;
	}
	/*
	 *	Now cork the socket to pend data.
	 */
	inet->cork.fl.fl4_dst = daddr;
	inet->cork.fl.fl_ip_dport = dport;
	inet->cork.fl.fl4_src = saddr;
973
	inet->cork.fl.fl_ip_sport = inet->inet_sport;
974
975
976
977
978
	up->pending = AF_INET;

do_append_data:
	up->len += ulen;
	err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
979
			sizeof(struct udphdr), &ipc, &rt,
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
	if (err)
		udp_flush_pending_frames(sk);
	else if (!corkreq)
		err = udp_push_pending_frames(sk);
	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
		up->pending = 0;
	release_sock(sk);

out:
	ip_rt_put(rt);
	if (free)
		kfree(ipc.opt);
	if (!err)
		return len;
	/*
	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
	 * we don't have a good statistic (IpOutDiscards but it can be too many
	 * things).  We could add another new stat but at least for now that
	 * seems like overkill.
	 */
	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1003
1004
		UDP_INC_STATS_USER(sock_net(sk),
				UDP_MIB_SNDBUFERRORS, is_udplite);
1005
1006
1007
1008
	}
	return err;

do_confirm:
1009
	dst_confirm(&rt->dst);
1010
1011
1012
1013
1014
	if (!(msg->msg_flags&MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto out;
}
Eric Dumazet's avatar
Eric Dumazet committed
1015
EXPORT_SYMBOL(udp_sendmsg);
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064

int udp_sendpage(struct sock *sk, struct page *page, int offset,
		 size_t size, int flags)
{
	struct udp_sock *up = udp_sk(sk);
	int ret;

	if (!up->pending) {
		struct msghdr msg = {	.msg_flags = flags|MSG_MORE };

		/* Call udp_sendmsg to specify destination address which
		 * sendpage interface can't pass.
		 * This will succeed only when the socket is connected.
		 */
		ret = udp_sendmsg(NULL, sk, &msg, 0);
		if (ret < 0)
			return ret;
	}

	lock_sock(sk);

	if (unlikely(!up->pending)) {
		release_sock(sk);

		LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
		return -EINVAL;
	}

	ret = ip_append_page(sk, page, offset, size, flags);
	if (ret == -EOPNOTSUPP) {
		release_sock(sk);
		return sock_no_sendpage(sk->sk_socket, page, offset,
					size, flags);
	}
	if (ret < 0) {
		udp_flush_pending_frames(sk);
		goto out;
	}

	up->len += size;
	if (!(up->corkflag || (flags&MSG_MORE)))
		ret = udp_push_pending_frames(sk);
	if (!ret)
		ret = size;
out:
	release_sock(sk);
	return ret;
}

Eric Dumazet's avatar
Eric Dumazet committed
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085

/**
 *	first_packet_length	- return length of first packet in receive queue
 *	@sk: socket
 *
 *	Drops all bad checksum frames, until a valid one is found.
 *	Returns the length of found skb, or 0 if none is found.
 */
static unsigned int first_packet_length(struct sock *sk)
{
	struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
	struct sk_buff *skb;
	unsigned int res;

	__skb_queue_head_init(&list_kill);

	spin_lock_bh(&rcvq->lock);
	while ((skb = skb_peek(rcvq)) != NULL &&
		udp_lib_checksum_complete(skb)) {
		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
				 IS_UDPLITE(sk));
1086
		atomic_inc(&sk->sk_drops);
Eric Dumazet's avatar
Eric Dumazet committed
1087
1088
1089
1090
1091
1092
1093
		__skb_unlink(skb, rcvq);
		__skb_queue_tail(&list_kill, skb);
	}
	res = skb ? skb->len : 0;
	spin_unlock_bh(&rcvq->lock);

	if (!skb_queue_empty(&list_kill)) {
1094
1095
		bool slow = lock_sock_fast(sk);

Eric Dumazet's avatar
Eric Dumazet committed
1096
1097
		__skb_queue_purge(&list_kill);
		sk_mem_reclaim_partial(sk);
1098
		unlock_sock_fast(sk, slow);
Eric Dumazet's avatar
Eric Dumazet committed
1099
1100
1101
1102
	}
	return res;
}

Linus Torvalds's avatar
Linus Torvalds committed
1103
1104
1105
/*
 *	IOCTL requests applicable to the UDP protocol
 */
1106

Linus Torvalds's avatar
Linus Torvalds committed
1107
1108
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
1109
1110
	switch (cmd) {
	case SIOCOUTQ:
Linus Torvalds's avatar
Linus Torvalds committed
1111
	{
1112
1113
		int amount = sk_wmem_alloc_get(sk);

1114
1115
		return put_user(amount, (int __user *)arg);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1116

1117
1118
	case SIOCINQ:
	{
Eric Dumazet's avatar
Eric Dumazet committed
1119
		unsigned int amount = first_packet_length(sk);
1120

Eric Dumazet's avatar
Eric Dumazet committed
1121
		if (amount)
1122
1123
1124
1125
1126
			/*
			 * We will only return the amount
			 * of this packet since that is all
			 * that will be read.
			 */
Eric Dumazet's avatar
Eric Dumazet committed
1127
1128
			amount -= sizeof(struct udphdr);

1129
1130
		return put_user(amount, (int __user *)arg);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1131

1132
1133
	default:
		return -ENOIOCTLCMD;
Linus Torvalds's avatar
Linus Torvalds committed
1134
	}
1135
1136

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1137
}
Eric Dumazet's avatar
Eric Dumazet committed
1138
EXPORT_SYMBOL(udp_ioctl);
Linus Torvalds's avatar
Linus Torvalds committed
1139

1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
/*
 * 	This should be easy, if there is something there we
 * 	return it, otherwise we block.
 */

int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		size_t len, int noblock, int flags, int *addr_len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
	struct sk_buff *skb;
Gerrit Renker's avatar
Gerrit Renker committed
1151
	unsigned int ulen;
1152
1153
1154
	int peeked;
	int err;
	int is_udplite = IS_UDPLITE(sk);
1155
	bool slow;
1156
1157
1158
1159
1160

	/*
	 *	Check any passed addresses
	 */
	if (addr_len)
Eric Dumazet's avatar
Eric Dumazet committed
1161
		*addr_len = sizeof(*sin);
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172

	if (flags & MSG_ERRQUEUE)
		return ip_recv_error(sk, msg, len);

try_again:
	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
				  &peeked, &err);
	if (!skb)
		goto out;

	ulen = skb->len - sizeof(struct udphdr);
Gerrit Renker's avatar
Gerrit Renker committed
1173
1174
1175
	if (len > ulen)
		len = ulen;
	else if (len < ulen)
1176
1177
1178
1179
1180
1181
1182
1183
		msg->msg_flags |= MSG_TRUNC;

	/*
	 * If checksum is needed at all, try to do it while copying the
	 * data.  If the data is truncated, or if we only want a partial
	 * coverage checksum (UDP-Lite), do it before the copy.
	 */

Gerrit Renker's avatar
Gerrit Renker committed
1184
	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
1185
1186
1187
1188
1189
1190
		if (udp_lib_checksum_complete(skb))
			goto csum_copy_err;
	}

	if (skb_csum_unnecessary(skb))
		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
Gerrit Renker's avatar
Gerrit Renker committed
1191
					      msg->msg_iov, len);
1192
	else {
Eric Dumazet's avatar
Eric Dumazet committed
1193
1194
1195
		err = skb_copy_and_csum_datagram_iovec(skb,
						       sizeof(struct udphdr),
						       msg->msg_iov);
1196
1197
1198
1199
1200
1201
1202
1203
1204

		if (err == -EINVAL)
			goto csum_copy_err;
	}

	if (err)
		goto out_free;

	if (!peeked)
1205
1206
		UDP_INC_STATS_USER(sock_net(sk),
				UDP_MIB_INDATAGRAMS, is_udplite);
1207

1208
	sock_recv_ts_and_drops(msg, sk, skb);
1209
1210

	/* Copy the address. */
Eric Dumazet's avatar
Eric Dumazet committed
1211
	if (sin) {
1212
1213
1214
1215
1216
1217
1218
1219
		sin->sin_family = AF_INET;
		sin->sin_port = udp_hdr(skb)->source;
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
	}
	if (inet->cmsg_flags)
		ip_cmsg_recv(msg, skb);

Gerrit Renker's avatar
Gerrit Renker committed
1220
	err = len;
1221
1222
1223
1224
	if (flags & MSG_TRUNC)
		err = ulen;

out_free:
1225
	skb_free_datagram_locked(sk, skb);
1226
1227
1228
1229
out:
	return err;

csum_copy_err:
1230
	slow = lock_sock_fast(sk);
1231
	if (!skb_kill_datagram(sk, skb, flags))
1232
		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1233
	unlock_sock_fast(sk, slow);
1234
1235
1236
1237
1238
1239
1240

	if (noblock)
		return -EAGAIN;
	goto try_again;
}


Linus Torvalds's avatar
Linus Torvalds committed
1241
1242
1243
1244
1245
1246
int udp_disconnect(struct sock *sk, int flags)
{
	struct inet_sock *inet = inet_sk(sk);
	/*
	 *	1003.1g - break association.
	 */
1247

Linus Torvalds's avatar
Linus Torvalds committed
1248
	sk->sk_state = TCP_CLOSE;
1249
1250
	inet->inet_daddr = 0;
	inet->inet_dport = 0;
1251
	sock_rps_save_rxhash(sk, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1252
1253
1254
1255
1256
1257
	sk->sk_bound_dev_if = 0;
	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
		inet_reset_saddr(sk);

	if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
		sk->sk_prot->unhash(sk);
1258
		inet->inet_sport = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1259
1260
1261
1262
	}
	sk_dst_reset(sk);
	return 0;
}
Eric Dumazet's avatar
Eric Dumazet committed
1263
EXPORT_SYMBOL(udp_disconnect);
Linus Torvalds's avatar
Linus Torvalds committed
1264

1265
1266
void udp_lib_unhash(struct sock *sk)
{
1267
1268
	if (sk_hashed(sk)) {
		struct udp_table *udptable = sk->sk_prot->h.udp_table;
1269
1270
1271
1272
1273
		struct udp_hslot *hslot, *hslot2;

		hslot  = udp_hashslot(udptable, sock_net(sk),
				      udp_sk(sk)->udp_port_hash);
		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
1274

1275
1276
		spin_lock_bh(&hslot->lock);
		if (sk_nulls_del_node_init_rcu(sk)) {
1277
			hslot->count--;
1278
			inet_sk(sk)->inet_num = 0;
1279
			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1280
1281
1282
1283
1284

			spin_lock(&hslot2->lock);
			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
			hslot2->count--;
			spin_unlock(&hslot2->lock);
1285
1286
		}
		spin_unlock_bh(&hslot->lock);
1287
1288
1289
1290
	}
}
EXPORT_SYMBOL(udp_lib_unhash);

Eric Dumazet's avatar
Eric Dumazet committed
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
/*
 * inet_rcv_saddr was changed, we must rehash secondary hash
 */
void udp_lib_rehash(struct sock *sk, u16 newhash)
{
	if (sk_hashed(sk)) {
		struct udp_table *udptable = sk->sk_prot->h.udp_table;
		struct udp_hslot *hslot, *hslot2, *nhslot2;

		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
		nhslot2 = udp_hashslot2(udptable, newhash);
		udp_sk(sk)->udp_portaddr_hash = newhash;
		if (hslot2 != nhslot2) {
			hslot = udp_hashslot(udptable, sock_net(sk),
					     udp_sk(sk)->udp_port_hash);
			/* we must lock primary chain too */
			spin_lock_bh(&hslot->lock);

			spin_lock(&hslot2->lock);
			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
			hslot2->count--;
			spin_unlock(&hslot2->lock);

			spin_lock(&nhslot2->lock);
			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
						 &nhslot2->head);
			nhslot2->count++;
			spin_unlock(&nhslot2->lock);

			spin_unlock_bh(&hslot->lock);
		}
	}
}
EXPORT_SYMBOL(udp_lib_rehash);

static void udp_v4_rehash(struct sock *sk)
{
	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
					  inet_sk(sk)->inet_rcv_saddr,
					  inet_sk(sk)->inet_num);
	udp_lib_rehash(sk, new_hash);
}

Herbert Xu's avatar
Herbert Xu committed
1334
1335
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
Tom Herbert's avatar
Tom Herbert committed
1336
	int rc;
Eric Dumazet's avatar
Eric Dumazet committed
1337

Tom Herbert's avatar
Tom Herbert committed
1338
	if (inet_sk(sk)->inet_daddr)
1339
		sock_rps_save_rxhash(sk, skb->rxhash);
Tom Herbert's avatar
Tom Herbert committed
1340

Eric Dumazet's avatar
Eric Dumazet committed
1341
	rc = ip_queue_rcv_skb(sk, skb);
Eric Dumazet's avatar