udp.c 37.2 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
/*
 *	UDP over IPv6
3
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
4
5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
 *
 *	Based on linux/ipv4/udp.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
 *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
35
#include <linux/module.h>
36
#include <linux/skbuff.h>
37
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
38
39
40
41
42
43
44
#include <asm/uaccess.h>

#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/raw.h>
45
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
46
47
48
49
50
#include <net/ip6_checksum.h>
#include <net/xfrm.h>

#include <linux/proc_fs.h>
#include <linux/seq_file.h>
51
#include "udp_impl.h"
Linus Torvalds's avatar
Linus Torvalds committed
52

53
54
55
56
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
57
58
	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
59
60
61
62
63
64
65
	int sk_ipv6only = ipv6_only_sock(sk);
	int sk2_ipv6only = inet_v6_ipv6only(sk2);
	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;

	/* if both are mapped, treat as IPv4 */
	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
66
		return (!sk2_ipv6only &&
67
68
			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
			  sk1_rcv_saddr == sk2_rcv_saddr));
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

	if (addr_type2 == IPV6_ADDR_ANY &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return 1;

	if (addr_type == IPV6_ADDR_ANY &&
	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
		return 1;

	if (sk2_rcv_saddr6 &&
	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
		return 1;

	return 0;
}

85
86
87
88
89
90
91
92
static unsigned int udp6_portaddr_hash(struct net *net,
				       const struct in6_addr *addr6,
				       unsigned int port)
{
	unsigned int hash, mix = net_hash_mix(net);

	if (ipv6_addr_any(addr6))
		hash = jhash_1word(0, mix);
Brian Haley's avatar
Brian Haley committed
93
	else if (ipv6_addr_v4mapped(addr6))
94
		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
95
	else
96
		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
97
98
99
100
101

	return hash ^ port;
}


102
int udp_v6_get_port(struct sock *sk, unsigned short snum)
Linus Torvalds's avatar
Linus Torvalds committed
103
{
Eric Dumazet's avatar
Eric Dumazet committed
104
105
106
107
108
	unsigned int hash2_nulladdr =
		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
	unsigned int hash2_partial = 
		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);

109
	/* precompute partial secondary hash */
Eric Dumazet's avatar
Eric Dumazet committed
110
111
	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
Linus Torvalds's avatar
Linus Torvalds committed
112
113
}

Eric Dumazet's avatar
Eric Dumazet committed
114
115
116
117
118
119
120
121
122
static void udp_v6_rehash(struct sock *sk)
{
	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
					  &inet6_sk(sk)->rcv_saddr,
					  inet_sk(sk)->inet_num);

	udp_lib_rehash(sk, new_hash);
}

123
124
static inline int compute_score(struct sock *sk, struct net *net,
				unsigned short hnum,
125
126
				const struct in6_addr *saddr, __be16 sport,
				const struct in6_addr *daddr, __be16 dport,
127
128
129
130
				int dif)
{
	int score = -1;

131
	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
132
133
134
135
136
			sk->sk_family == PF_INET6) {
		struct ipv6_pinfo *np = inet6_sk(sk);
		struct inet_sock *inet = inet_sk(sk);

		score = 0;
137
138
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->rcv_saddr)) {
			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->daddr)) {
			if (!ipv6_addr_equal(&np->daddr, saddr))
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
	}
	return score;
}

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
				const struct in6_addr *saddr, __be16 sport,
				const struct in6_addr *daddr, unsigned short hnum,
				int dif)
{
	int score = -1;

	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
			sk->sk_family == PF_INET6) {
		struct ipv6_pinfo *np = inet6_sk(sk);
		struct inet_sock *inet = inet_sk(sk);

		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
			return -1;
		score = 0;
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->daddr)) {
			if (!ipv6_addr_equal(&np->daddr, saddr))
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
	}
	return score;
}


/* called with read_rcu_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
		const struct in6_addr *saddr, __be16 sport,
		const struct in6_addr *daddr, unsigned int hnum, int dif,
		struct udp_hslot *hslot2, unsigned int slot2)
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	int score, badness;

begin:
	result = NULL;
	badness = -1;
	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
		score = compute_score2(sk, net, saddr, sport,
				      daddr, hnum, dif);
		if (score > badness) {
			result = sk;
			badness = score;
			if (score == SCORE2_MAX)
				goto exact_match;
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot2)
		goto begin;

	if (result) {
exact_match:
230
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
231
232
233
234
235
236
237
238
239
240
			result = NULL;
		else if (unlikely(compute_score2(result, net, saddr, sport,
				  daddr, hnum, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	return result;
}

241
static struct sock *__udp6_lib_lookup(struct net *net,
242
243
				      const struct in6_addr *saddr, __be16 sport,
				      const struct in6_addr *daddr, __be16 dport,
244
				      int dif, struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
245
{
246
	struct sock *sk, *result;
247
	struct hlist_nulls_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
248
	unsigned short hnum = ntohs(dport);
249
250
	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
251
	int score, badness;
252

253
	rcu_read_lock();
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
	if (hslot->count > 10) {
		hash2 = udp6_portaddr_hash(net, daddr, hnum);
		slot2 = hash2 & udptable->mask;
		hslot2 = &udptable->hash2[slot2];
		if (hslot->count < hslot2->count)
			goto begin;

		result = udp6_lib_lookup2(net, saddr, sport,
					  daddr, hnum, dif,
					  hslot2, slot2);
		if (!result) {
			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
			slot2 = hash2 & udptable->mask;
			hslot2 = &udptable->hash2[slot2];
			if (hslot->count < hslot2->count)
				goto begin;

271
272
			result = udp6_lib_lookup2(net, saddr, sport,
						  &in6addr_any, hnum, dif,
273
274
275
276
277
						  hslot2, slot2);
		}
		rcu_read_unlock();
		return result;
	}
278
279
280
begin:
	result = NULL;
	badness = -1;
281
	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
282
283
284
285
		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
		if (score > badness) {
			result = sk;
			badness = score;
Linus Torvalds's avatar
Linus Torvalds committed
286
287
		}
	}
288
289
290
291
292
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
293
	if (get_nulls_value(node) != slot)
294
295
		goto begin;

296
	if (result) {
297
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
298
299
300
301
302
303
304
305
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
					daddr, dport, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
306
307
308
	return result;
}

309
310
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
					  __be16 sport, __be16 dport,
311
					  struct udp_table *udptable)
312
{
313
	struct sock *sk;
314
315
	struct ipv6hdr *iph = ipv6_hdr(skb);

316
317
	if (unlikely(sk = skb_steal_sock(skb)))
		return sk;
Eric Dumazet's avatar
Eric Dumazet committed
318
319
320
	return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
				 &iph->daddr, dport, inet6_iif(skb),
				 udptable);
321
322
}

323
324
325
326
327
328
329
330
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
			     const struct in6_addr *daddr, __be16 dport, int dif)
{
	return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);


Linus Torvalds's avatar
Linus Torvalds committed
331
332
333
334
335
/*
 * 	This should be easy, if there is something there we
 * 	return it, otherwise we block.
 */

336
int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
Linus Torvalds's avatar
Linus Torvalds committed
337
338
339
340
341
		  struct msghdr *msg, size_t len,
		  int noblock, int flags, int *addr_len)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
342
	struct sk_buff *skb;
Gerrit Renker's avatar
Gerrit Renker committed
343
	unsigned int ulen;
344
	int peeked;
345
346
	int err;
	int is_udplite = IS_UDPLITE(sk);
347
	int is_udp4;
348
	bool slow;
Linus Torvalds's avatar
Linus Torvalds committed
349

350
351
352
	if (addr_len)
		*addr_len=sizeof(struct sockaddr_in6);

Linus Torvalds's avatar
Linus Torvalds committed
353
354
355
	if (flags & MSG_ERRQUEUE)
		return ipv6_recv_error(sk, msg, len);

356
357
358
	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
		return ipv6_recv_rxpmtu(sk, msg, len);

Linus Torvalds's avatar
Linus Torvalds committed
359
try_again:
360
361
	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
				  &peeked, &err);
Linus Torvalds's avatar
Linus Torvalds committed
362
363
364
	if (!skb)
		goto out;

365
	ulen = skb->len - sizeof(struct udphdr);
Gerrit Renker's avatar
Gerrit Renker committed
366
367
368
	if (len > ulen)
		len = ulen;
	else if (len < ulen)
369
		msg->msg_flags |= MSG_TRUNC;
Linus Torvalds's avatar
Linus Torvalds committed
370

371
372
	is_udp4 = (skb->protocol == htons(ETH_P_IP));

373
	/*
374
375
376
	 * If checksum is needed at all, try to do it while copying the
	 * data.  If the data is truncated, or if we only want a partial
	 * coverage checksum (UDP-Lite), do it before the copy.
377
378
	 */

Gerrit Renker's avatar
Gerrit Renker committed
379
	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
380
		if (udp_lib_checksum_complete(skb))
Linus Torvalds's avatar
Linus Torvalds committed
381
			goto csum_copy_err;
382
383
	}

384
	if (skb_csum_unnecessary(skb))
385
		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
Gerrit Renker's avatar
Gerrit Renker committed
386
					      msg->msg_iov,len);
387
	else {
Linus Torvalds's avatar
Linus Torvalds committed
388
389
390
391
392
393
394
		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
		if (err == -EINVAL)
			goto csum_copy_err;
	}
	if (err)
		goto out_free;

395
396
397
398
399
400
401
402
	if (!peeked) {
		if (is_udp4)
			UDP_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INDATAGRAMS, is_udplite);
		else
			UDP6_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INDATAGRAMS, is_udplite);
	}
403

404
	sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
405
406
407
408

	/* Copy the address. */
	if (msg->msg_name) {
		struct sockaddr_in6 *sin6;
409

Linus Torvalds's avatar
Linus Torvalds committed
410
411
		sin6 = (struct sockaddr_in6 *) msg->msg_name;
		sin6->sin6_family = AF_INET6;
412
		sin6->sin6_port = udp_hdr(skb)->source;
Linus Torvalds's avatar
Linus Torvalds committed
413
414
415
		sin6->sin6_flowinfo = 0;
		sin6->sin6_scope_id = 0;

416
		if (is_udp4)
417
418
			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
					       &sin6->sin6_addr);
Linus Torvalds's avatar
Linus Torvalds committed
419
		else {
420
421
			ipv6_addr_copy(&sin6->sin6_addr,
				       &ipv6_hdr(skb)->saddr);
Linus Torvalds's avatar
Linus Torvalds committed
422
423
424
425
426
			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
				sin6->sin6_scope_id = IP6CB(skb)->iif;
		}

	}
427
	if (is_udp4) {
Linus Torvalds's avatar
Linus Torvalds committed
428
429
430
431
432
		if (inet->cmsg_flags)
			ip_cmsg_recv(msg, skb);
	} else {
		if (np->rxopt.all)
			datagram_recv_ctl(sk, msg, skb);
433
	}
Linus Torvalds's avatar
Linus Torvalds committed
434

Gerrit Renker's avatar
Gerrit Renker committed
435
	err = len;
Linus Torvalds's avatar
Linus Torvalds committed
436
	if (flags & MSG_TRUNC)
437
		err = ulen;
Linus Torvalds's avatar
Linus Torvalds committed
438
439

out_free:
440
	skb_free_datagram_locked(sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
441
442
443
444
out:
	return err;

csum_copy_err:
445
	slow = lock_sock_fast(sk);
446
447
448
449
450
451
452
453
	if (!skb_kill_datagram(sk, skb, flags)) {
		if (is_udp4)
			UDP_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INERRORS, is_udplite);
		else
			UDP6_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INERRORS, is_udplite);
	}
454
	unlock_sock_fast(sk, slow);
Linus Torvalds's avatar
Linus Torvalds committed
455

456
	if (flags & MSG_DONTWAIT)
Linus Torvalds's avatar
Linus Torvalds committed
457
458
459
460
		return -EAGAIN;
	goto try_again;
}

461
void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
462
		    u8 type, u8 code, int offset, __be32 info,
463
		    struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
464
465
466
467
468
469
470
471
472
{
	struct ipv6_pinfo *np;
	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
	struct in6_addr *saddr = &hdr->saddr;
	struct in6_addr *daddr = &hdr->daddr;
	struct udphdr *uh = (struct udphdr*)(skb->data+offset);
	struct sock *sk;
	int err;

473
	sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
474
			       saddr, uh->source, inet6_iif(skb), udptable);
Linus Torvalds's avatar
Linus Torvalds committed
475
476
477
478
479
480
481
482
483
484
485
	if (sk == NULL)
		return;

	np = inet6_sk(sk);

	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
		goto out;

	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
		goto out;

486
	if (np->recverr)
Linus Torvalds's avatar
Linus Torvalds committed
487
		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
488

Linus Torvalds's avatar
Linus Torvalds committed
489
490
491
492
493
494
	sk->sk_err = err;
	sk->sk_error_report(sk);
out:
	sock_put(sk);
}

495
static __inline__ void udpv6_err(struct sk_buff *skb,
496
497
				 struct inet6_skb_parm *opt, u8 type,
				 u8 code, int offset, __be32 info     )
498
{
499
	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
500
501
502
}

int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
503
{
504
	struct udp_sock *up = udp_sk(sk);
505
	int rc;
506
	int is_udplite = IS_UDPLITE(sk);
507

508
509
510
	if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
		sock_rps_save_rxhash(sk, skb->rxhash);

511
512
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
		goto drop;
Linus Torvalds's avatar
Linus Torvalds committed
513

514
515
516
	/*
	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
	 */
517
	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
518
519
520
521
522
523
524
525
526
527
528
529
530

		if (up->pcrlen == 0) {          /* full coverage was set  */
			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
				" %d while full coverage %d requested\n",
				UDP_SKB_CB(skb)->cscov, skb->len);
			goto drop;
		}
		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
						    "too small, need min %d\n",
				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
			goto drop;
		}
Linus Torvalds's avatar
Linus Torvalds committed
531
532
	}

533
	if (rcu_dereference_raw(sk->sk_filter)) {
534
535
536
		if (udp_lib_checksum_complete(skb))
			goto drop;
	}
537

Eric Dumazet's avatar
Eric Dumazet committed
538
	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
539
		/* Note that an ENOMEM error is charged twice */
Eric Dumazet's avatar
Eric Dumazet committed
540
		if (rc == -ENOMEM)
541
542
			UDP6_INC_STATS_BH(sock_net(sk),
					UDP_MIB_RCVBUFERRORS, is_udplite);
543
		goto drop_no_sk_drops_inc;
Linus Torvalds's avatar
Linus Torvalds committed
544
	}
545

Linus Torvalds's avatar
Linus Torvalds committed
546
	return 0;
547
drop:
548
549
	atomic_inc(&sk->sk_drops);
drop_no_sk_drops_inc:
550
	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
551
552
	kfree_skb(skb);
	return -1;
Linus Torvalds's avatar
Linus Torvalds committed
553
554
}

555
static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
Al Viro's avatar
Al Viro committed
556
557
				      __be16 loc_port, struct in6_addr *loc_addr,
				      __be16 rmt_port, struct in6_addr *rmt_addr,
Linus Torvalds's avatar
Linus Torvalds committed
558
559
				      int dif)
{
560
	struct hlist_nulls_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
561
562
563
	struct sock *s = sk;
	unsigned short num = ntohs(loc_port);

564
	sk_nulls_for_each_from(s, node) {
Linus Torvalds's avatar
Linus Torvalds committed
565
566
		struct inet_sock *inet = inet_sk(s);

567
		if (!net_eq(sock_net(s), net))
568
569
			continue;

570
571
		if (udp_sk(s)->udp_port_hash == num &&
		    s->sk_family == PF_INET6) {
Linus Torvalds's avatar
Linus Torvalds committed
572
			struct ipv6_pinfo *np = inet6_sk(s);
573
574
			if (inet->inet_dport) {
				if (inet->inet_dport != rmt_port)
Linus Torvalds's avatar
Linus Torvalds committed
575
576
577
578
579
580
581
582
583
584
					continue;
			}
			if (!ipv6_addr_any(&np->daddr) &&
			    !ipv6_addr_equal(&np->daddr, rmt_addr))
				continue;

			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
				continue;

			if (!ipv6_addr_any(&np->rcv_saddr)) {
585
586
				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
					continue;
Linus Torvalds's avatar
Linus Torvalds committed
587
			}
Stephen Hemminger's avatar
Stephen Hemminger committed
588
			if (!inet6_mc_check(s, loc_addr, rmt_addr))
Linus Torvalds's avatar
Linus Torvalds committed
589
590
591
592
593
594
595
				continue;
			return s;
		}
	}
	return NULL;
}

596
597
598
599
600
601
602
603
604
605
static void flush_stack(struct sock **stack, unsigned int count,
			struct sk_buff *skb, unsigned int final)
{
	unsigned int i;
	struct sock *sk;
	struct sk_buff *skb1;

	for (i = 0; i < count; i++) {
		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);

606
		sk = stack[i];
607
		if (skb1) {
608
			if (sk_rcvqueues_full(sk, skb1)) {
609
610
611
				kfree_skb(skb1);
				goto drop;
			}
612
613
614
			bh_lock_sock(sk);
			if (!sock_owned_by_user(sk))
				udpv6_queue_rcv_skb(sk, skb1);
Zhu Yi's avatar
Zhu Yi committed
615
			else if (sk_add_backlog(sk, skb1)) {
Zhu Yi's avatar
Zhu Yi committed
616
617
618
619
				kfree_skb(skb1);
				bh_unlock_sock(sk);
				goto drop;
			}
620
			bh_unlock_sock(sk);
Zhu Yi's avatar
Zhu Yi committed
621
			continue;
622
		}
Zhu Yi's avatar
Zhu Yi committed
623
624
625
626
627
628
drop:
		atomic_inc(&sk->sk_drops);
		UDP6_INC_STATS_BH(sock_net(sk),
				UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
		UDP6_INC_STATS_BH(sock_net(sk),
				UDP_MIB_INERRORS, IS_UDPLITE(sk));
629
630
	}
}
Linus Torvalds's avatar
Linus Torvalds committed
631
632
633
634
/*
 * Note: called only from the BH handler context,
 * so we don't need to lock the hashes.
 */
635
636
static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
		struct in6_addr *saddr, struct in6_addr *daddr,
637
		struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
638
{
639
	struct sock *sk, *stack[256 / sizeof(struct sock *)];
640
	const struct udphdr *uh = udp_hdr(skb);
641
	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
Linus Torvalds's avatar
Linus Torvalds committed
642
	int dif;
643
	unsigned int i, count = 0;
Linus Torvalds's avatar
Linus Torvalds committed
644

645
	spin_lock(&hslot->lock);
646
	sk = sk_nulls_head(&hslot->head);
647
	dif = inet6_iif(skb);
648
	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
649
650
651
652
653
654
655
656
657
	while (sk) {
		stack[count++] = sk;
		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
				       uh->source, saddr, dif);
		if (unlikely(count == ARRAY_SIZE(stack))) {
			if (!sk)
				break;
			flush_stack(stack, count, skb, ~0);
			count = 0;
Hideo Aoki's avatar
Hideo Aoki committed
658
		}
Linus Torvalds's avatar
Linus Torvalds committed
659
	}
660
661
662
663
664
665
	/*
	 * before releasing the lock, we must take reference on sockets
	 */
	for (i = 0; i < count; i++)
		sock_hold(stack[i]);

666
	spin_unlock(&hslot->lock);
667
668
669
670
671
672
673
674
675

	if (count) {
		flush_stack(stack, count, skb, count - 1);

		for (i = 0; i < count; i++)
			sock_put(stack[i]);
	} else {
		kfree_skb(skb);
	}
676
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
677
678
}

679
680
static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
				 int proto)
681
{
682
683
684
685
686
	int err;

	UDP_SKB_CB(skb)->partial_cov = 0;
	UDP_SKB_CB(skb)->cscov = skb->len;

687
	if (proto == IPPROTO_UDPLITE) {
688
689
690
691
692
		err = udplite_checksum_init(skb, uh);
		if (err)
			return err;
	}

693
694
695
696
697
698
699
700
	if (uh->check == 0) {
		/* RFC 2460 section 8.1 says that we SHOULD log
		   this error. Well, it is reasonable.
		 */
		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
		return 1;
	}
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
701
	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
702
			     skb->len, proto, skb->csum))
703
704
		skb->ip_summed = CHECKSUM_UNNECESSARY;

705
	if (!skb_csum_unnecessary(skb))
706
707
		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
							 &ipv6_hdr(skb)->daddr,
708
							 skb->len, proto, 0));
709

710
	return 0;
711
712
}

713
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
714
		   int proto)
Linus Torvalds's avatar
Linus Torvalds committed
715
{
716
	struct net *net = dev_net(skb->dev);
Linus Torvalds's avatar
Linus Torvalds committed
717
	struct sock *sk;
718
	struct udphdr *uh;
Linus Torvalds's avatar
Linus Torvalds committed
719
720
721
722
	struct in6_addr *saddr, *daddr;
	u32 ulen = 0;

	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
723
		goto discard;
Linus Torvalds's avatar
Linus Torvalds committed
724

725
726
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
727
	uh = udp_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
728
729

	ulen = ntohs(uh->len);
730
731
	if (ulen > skb->len)
		goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed
732

733
734
	if (proto == IPPROTO_UDP) {
		/* UDP validates ulen. */
Linus Torvalds's avatar
Linus Torvalds committed
735

736
737
738
		/* Check for jumbo payload */
		if (ulen == 0)
			ulen = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
739

740
741
		if (ulen < sizeof(*uh))
			goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed
742

743
744
745
		if (ulen < skb->len) {
			if (pskb_trim_rcsum(skb, ulen))
				goto short_packet;
746
747
			saddr = &ipv6_hdr(skb)->saddr;
			daddr = &ipv6_hdr(skb)->daddr;
748
			uh = udp_hdr(skb);
749
750
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
751

752
753
754
	if (udp6_csum_init(skb, uh, proto))
		goto discard;

755
756
	/*
	 *	Multicast receive code
Linus Torvalds's avatar
Linus Torvalds committed
757
	 */
758
	if (ipv6_addr_is_multicast(daddr))
759
760
		return __udp6_lib_mcast_deliver(net, skb,
				saddr, daddr, udptable);
Linus Torvalds's avatar
Linus Torvalds committed
761
762
763

	/* Unicast */

764
	/*
Linus Torvalds's avatar
Linus Torvalds committed
765
766
767
	 * check socket cache ... must talk to Alan about his plans
	 * for sock caches... i'll skip this for now.
	 */
768
	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
Linus Torvalds's avatar
Linus Torvalds committed
769
770
771
772
773

	if (sk == NULL) {
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
			goto discard;

774
		if (udp_lib_checksum_complete(skb))
Linus Torvalds's avatar
Linus Torvalds committed
775
			goto discard;
776
777
		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
				proto == IPPROTO_UDPLITE);
Linus Torvalds's avatar
Linus Torvalds committed
778

779
		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
Linus Torvalds's avatar
Linus Torvalds committed
780
781

		kfree_skb(skb);
Stephen Hemminger's avatar
Stephen Hemminger committed
782
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
783
	}
784

Linus Torvalds's avatar
Linus Torvalds committed
785
	/* deliver */
786

787
788
789
790
	if (sk_rcvqueues_full(sk, skb)) {
		sock_put(sk);
		goto discard;
	}
791
	bh_lock_sock(sk);
Hideo Aoki's avatar
Hideo Aoki committed
792
793
	if (!sock_owned_by_user(sk))
		udpv6_queue_rcv_skb(sk, skb);
Zhu Yi's avatar
Zhu Yi committed
794
	else if (sk_add_backlog(sk, skb)) {
Zhu Yi's avatar
Zhu Yi committed
795
796
797
798
799
		atomic_inc(&sk->sk_drops);
		bh_unlock_sock(sk);
		sock_put(sk);
		goto discard;
	}
Hideo Aoki's avatar
Hideo Aoki committed
800
	bh_unlock_sock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
801
	sock_put(sk);
Stephen Hemminger's avatar
Stephen Hemminger committed
802
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
803

804
short_packet:
805
	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
806
		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
807
808
809
810
811
812
		       saddr,
		       ntohs(uh->source),
		       ulen,
		       skb->len,
		       daddr,
		       ntohs(uh->dest));
Linus Torvalds's avatar
Linus Torvalds committed
813
814

discard:
815
	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
Linus Torvalds's avatar
Linus Torvalds committed
816
	kfree_skb(skb);
Stephen Hemminger's avatar
Stephen Hemminger committed
817
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
818
}
819

820
static __inline__ int udpv6_rcv(struct sk_buff *skb)
821
{
822
	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
823
824
}

Linus Torvalds's avatar
Linus Torvalds committed
825
826
827
828
829
830
831
/*
 * Throw away all pending data and cancel the corking. Socket is locked.
 */
static void udp_v6_flush_pending_frames(struct sock *sk)
{
	struct udp_sock *up = udp_sk(sk);

832
833
834
	if (up->pending == AF_INET)
		udp_flush_pending_frames(sk);
	else if (up->pending) {
Linus Torvalds's avatar
Linus Torvalds committed
835
836
837
		up->len = 0;
		up->pending = 0;
		ip6_flush_pending_frames(sk);
838
	}
Linus Torvalds's avatar
Linus Torvalds committed
839
840
}

841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
/**
 * 	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
 * 	@sk: 	socket we are sending on
 * 	@skb: 	sk_buff containing the filled-in UDP header
 * 	        (checksum field must be zeroed out)
 */
static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
				 const struct in6_addr *saddr,
				 const struct in6_addr *daddr, int len)
{
	unsigned int offset;
	struct udphdr *uh = udp_hdr(skb);
	__wsum csum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
		/* Only one fragment on the socket.  */
		skb->csum_start = skb_transport_header(skb) - skb->head;
		skb->csum_offset = offsetof(struct udphdr, check);
		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
	} else {
		/*
		 * HW-checksum won't work as there are two or more
		 * fragments on the socket so that all csums of sk_buffs
		 * should be together
		 */
		offset = skb_transport_offset(skb);
		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);

		skb->ip_summed = CHECKSUM_NONE;

		skb_queue_walk(&sk->sk_write_queue, skb) {
			csum = csum_add(csum, skb->csum);
		}

		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
					    csum);
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
882
883
884
885
/*
 *	Sending
 */

886
static int udp_v6_push_pending_frames(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
887
888
889
{
	struct sk_buff *skb;
	struct udphdr *uh;
890
	struct udp_sock  *up = udp_sk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
891
	struct inet_sock *inet = inet_sk(sk);
892
	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
Linus Torvalds's avatar
Linus Torvalds committed
893
	int err = 0;
894
	int is_udplite = IS_UDPLITE(sk);
895
	__wsum csum = 0;
Linus Torvalds's avatar
Linus Torvalds committed
896
897
898
899
900
901
902
903

	/* Grab the skbuff where UDP header space exists. */
	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
		goto out;

	/*
	 * Create a UDP header
	 */
904
	uh = udp_hdr(skb);
905
906
	uh->source = fl6->fl6_sport;
	uh->dest = fl6->fl6_dport;
Linus Torvalds's avatar
Linus Torvalds committed
907
908
909
	uh->len = htons(up->len);
	uh->check = 0;

910
	if (is_udplite)
911
		csum = udplite_csum_outgoing(sk, skb);
912
	else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
913
		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
914
915
916
				     up->len);
		goto send;
	} else
917
		csum = udp_csum_outgoing(sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
918

919
	/* add protocol-dependent pseudo-header */
920
921
	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
				    up->len, fl6->flowi6_proto, csum);
Linus Torvalds's avatar
Linus Torvalds committed
922
	if (uh->check == 0)
923
		uh->check = CSUM_MANGLED_0;
Linus Torvalds's avatar
Linus Torvalds committed
924

925
send:
Linus Torvalds's avatar
Linus Torvalds committed
926
	err = ip6_push_pending_frames(sk);
Eric Dumazet's avatar
Eric Dumazet committed
927
928
929
930
931
932
933
934
935
	if (err) {
		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
			UDP6_INC_STATS_USER(sock_net(sk),
					    UDP_MIB_SNDBUFERRORS, is_udplite);
			err = 0;
		}
	} else
		UDP6_INC_STATS_USER(sock_net(sk),
				    UDP_MIB_OUTDATAGRAMS, is_udplite);
Linus Torvalds's avatar
Linus Torvalds committed
936
937
938
939
940
941
out:
	up->len = 0;
	up->pending = 0;
	return err;
}

942
int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
Linus Torvalds's avatar
Linus Torvalds committed
943
944
945
946
947
948
949
		  struct msghdr *msg, size_t len)
{
	struct ipv6_txoptions opt_space;
	struct udp_sock *up = udp_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
950
	struct in6_addr *daddr, *final_p, final;
Linus Torvalds's avatar
Linus Torvalds committed
951
952
	struct ipv6_txoptions *opt = NULL;
	struct ip6_flowlabel *flowlabel = NULL;
953
	struct flowi6 fl6;
Linus Torvalds's avatar
Linus Torvalds committed
954
955
956
957
	struct dst_entry *dst;
	int addr_len = msg->msg_namelen;
	int ulen = len;
	int hlimit = -1;
958
	int tclass = -1;
959
	int dontfrag = -1;
Linus Torvalds's avatar
Linus Torvalds committed
960
961
	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
	int err;
962
	int connected = 0;
963
	int is_udplite = IS_UDPLITE(sk);
964
	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
Linus Torvalds's avatar
Linus Torvalds committed
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990

	/* destination address check */
	if (sin6) {
		if (addr_len < offsetof(struct sockaddr, sa_data))
			return -EINVAL;

		switch (sin6->sin6_family) {
		case AF_INET6:
			if (addr_len < SIN6_LEN_RFC2133)
				return -EINVAL;
			daddr = &sin6->sin6_addr;
			break;
		case AF_INET:
			goto do_udp_sendmsg;
		case AF_UNSPEC:
			msg->msg_name = sin6 = NULL;
			msg->msg_namelen = addr_len = 0;
			daddr = NULL;
			break;
		default:
			return -EINVAL;
		}
	} else if (!up->pending) {
		if (sk->sk_state != TCP_ESTABLISHED)
			return -EDESTADDRREQ;
		daddr = &np->daddr;
991
	} else
Linus Torvalds's avatar
Linus Torvalds committed
992
993
994
		daddr = NULL;

	if (daddr) {
995
		if (ipv6_addr_v4mapped(daddr)) {
Linus Torvalds's avatar
Linus Torvalds committed
996
997
			struct sockaddr_in sin;
			sin.sin_family = AF_INET;
998
			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
Linus Torvalds's avatar
Linus Torvalds committed
999
1000
			sin.sin_addr.s_addr = daddr->s6_addr32[3];
			msg->msg_name = &sin;