filter.c 98.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
4
5
 * Based on the design of the Berkeley Packet Filter. The new
 * internal format has been designed by PLUMgrid:
Linus Torvalds's avatar
Linus Torvalds committed
6
 *
7
8
9
10
11
12
13
 *	Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
 *
 * Authors:
 *
 *	Jay Schulist <jschlst@samba.org>
 *	Alexei Starovoitov <ast@plumgrid.com>
 *	Daniel Borkmann <dborkman@redhat.com>
Linus Torvalds's avatar
Linus Torvalds committed
14
15
16
17
18
19
20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
21
 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
26
27
28
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
29
#include <linux/sock_diag.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
32
33
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
34
#include <linux/if_arp.h>
35
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
36
37
#include <net/ip.h>
#include <net/protocol.h>
38
#include <net/netlink.h>
Linus Torvalds's avatar
Linus Torvalds committed
39
40
#include <linux/skbuff.h>
#include <net/sock.h>
41
#include <net/flow_dissector.h>
Linus Torvalds's avatar
Linus Torvalds committed
42
43
#include <linux/errno.h>
#include <linux/timer.h>
44
#include <linux/uaccess.h>
45
#include <asm/unaligned.h>
Linus Torvalds's avatar
Linus Torvalds committed
46
#include <linux/filter.h>
47
#include <linux/ratelimit.h>
48
#include <linux/seccomp.h>
49
#include <linux/if_vlan.h>
50
#include <linux/bpf.h>
51
#include <net/sch_generic.h>
52
#include <net/cls_cgroup.h>
53
#include <net/dst_metadata.h>
54
#include <net/dst.h>
55
#include <net/sock_reuseport.h>
56
#include <net/busy_poll.h>
57
#include <net/tcp.h>
Linus Torvalds's avatar
Linus Torvalds committed
58

Stephen Hemminger's avatar
Stephen Hemminger committed
59
/**
60
 *	sk_filter_trim_cap - run a packet through a socket filter
Stephen Hemminger's avatar
Stephen Hemminger committed
61
62
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
63
 *	@cap: limit on how short the eBPF program may trim the packet
Stephen Hemminger's avatar
Stephen Hemminger committed
64
 *
65
66
 * Run the eBPF program and then cut skb->data to correct size returned by
 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
Stephen Hemminger's avatar
Stephen Hemminger committed
67
 * than pkt_len we keep whole skb->data. This is the socket level
68
 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
Stephen Hemminger's avatar
Stephen Hemminger committed
69
70
71
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
72
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
Stephen Hemminger's avatar
Stephen Hemminger committed
73
74
75
76
{
	int err;
	struct sk_filter *filter;

77
78
79
80
81
	/*
	 * If the skb was allocated from pfmemalloc reserves, only
	 * allow SOCK_MEMALLOC sockets to use it as this socket is
	 * helping free memory
	 */
82
83
	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
		NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
84
		return -ENOMEM;
85
	}
86
87
88
89
	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
	if (err)
		return err;

Stephen Hemminger's avatar
Stephen Hemminger committed
90
91
92
93
	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

94
95
	rcu_read_lock();
	filter = rcu_dereference(sk->sk_filter);
Stephen Hemminger's avatar
Stephen Hemminger committed
96
	if (filter) {
97
98
99
100
101
102
		struct sock *save_sk = skb->sk;
		unsigned int pkt_len;

		skb->sk = sk;
		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
		skb->sk = save_sk;
103
		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
Stephen Hemminger's avatar
Stephen Hemminger committed
104
	}
105
	rcu_read_unlock();
Stephen Hemminger's avatar
Stephen Hemminger committed
106
107
108

	return err;
}
109
EXPORT_SYMBOL(sk_filter_trim_cap);
Stephen Hemminger's avatar
Stephen Hemminger committed
110

111
BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
112
{
113
	return skb_get_poff(skb);
114
115
}

116
BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
117
118
119
120
121
122
{
	struct nlattr *nla;

	if (skb_is_nonlinear(skb))
		return 0;

123
124
125
	if (skb->len < sizeof(struct nlattr))
		return 0;

126
	if (a > skb->len - sizeof(struct nlattr))
127
128
		return 0;

129
	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
130
131
132
133
134
135
	if (nla)
		return (void *) nla - (void *) skb->data;

	return 0;
}

136
BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
137
138
139
140
141
142
{
	struct nlattr *nla;

	if (skb_is_nonlinear(skb))
		return 0;

143
144
145
	if (skb->len < sizeof(struct nlattr))
		return 0;

146
	if (a > skb->len - sizeof(struct nlattr))
147
148
		return 0;

149
150
	nla = (struct nlattr *) &skb->data[a];
	if (nla->nla_len > skb->len - a)
151
152
		return 0;

153
	nla = nla_find_nested(nla, x);
154
155
156
157
158
159
	if (nla)
		return (void *) nla - (void *) skb->data;

	return 0;
}

160
BPF_CALL_0(__get_raw_cpu_id)
161
162
163
164
{
	return raw_smp_processor_id();
}

165
166
167
168
169
170
static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
	.func		= __get_raw_cpu_id,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
};

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
			      struct bpf_insn *insn_buf)
{
	struct bpf_insn *insn = insn_buf;

	switch (skb_field) {
	case SKF_AD_MARK:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);

		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
				      offsetof(struct sk_buff, mark));
		break;

	case SKF_AD_PKTTYPE:
		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
#endif
		break;

	case SKF_AD_QUEUE:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);

		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
				      offsetof(struct sk_buff, queue_mapping));
		break;
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

	case SKF_AD_VLAN_TAG:
	case SKF_AD_VLAN_TAG_PRESENT:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);

		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
				      offsetof(struct sk_buff, vlan_tci));
		if (skb_field == SKF_AD_VLAN_TAG) {
			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
						~VLAN_TAG_PRESENT);
		} else {
			/* dst_reg >>= 12 */
			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
			/* dst_reg &= 1 */
			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
		}
		break;
217
218
219
220
221
	}

	return insn - insn_buf;
}

222
static bool convert_bpf_extensions(struct sock_filter *fp,
223
				   struct bpf_insn **insnp)
224
{
225
	struct bpf_insn *insn = *insnp;
226
	u32 cnt;
227
228
229

	switch (fp->k) {
	case SKF_AD_OFF + SKF_AD_PROTOCOL:
230
231
232
233
234
235
236
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);

		/* A = *(u16 *) (CTX + offsetof(protocol)) */
		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
				      offsetof(struct sk_buff, protocol));
		/* A = ntohs(A) [emitting a nop or swap16] */
		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
237
238
239
		break;

	case SKF_AD_OFF + SKF_AD_PKTTYPE:
240
241
		cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
		insn += cnt - 1;
242
243
244
245
246
247
		break;

	case SKF_AD_OFF + SKF_AD_IFINDEX:
	case SKF_AD_OFF + SKF_AD_HATYPE:
		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
248

249
		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
250
251
252
253
254
255
256
257
258
259
260
				      BPF_REG_TMP, BPF_REG_CTX,
				      offsetof(struct sk_buff, dev));
		/* if (tmp != 0) goto pc + 1 */
		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
		*insn++ = BPF_EXIT_INSN();
		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
					    offsetof(struct net_device, ifindex));
		else
			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
					    offsetof(struct net_device, type));
261
262
263
		break;

	case SKF_AD_OFF + SKF_AD_MARK:
264
265
		cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
		insn += cnt - 1;
266
267
268
269
270
		break;

	case SKF_AD_OFF + SKF_AD_RXHASH:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);

271
272
		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
				    offsetof(struct sk_buff, hash));
273
274
275
		break;

	case SKF_AD_OFF + SKF_AD_QUEUE:
276
277
		cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
		insn += cnt - 1;
278
279
280
		break;

	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
281
282
283
284
		cnt = convert_skb_access(SKF_AD_VLAN_TAG,
					 BPF_REG_A, BPF_REG_CTX, insn);
		insn += cnt - 1;
		break;
285

286
287
288
289
	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
		cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
					 BPF_REG_A, BPF_REG_CTX, insn);
		insn += cnt - 1;
290
291
		break;

292
293
294
295
296
297
298
299
300
301
	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);

		/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
				      offsetof(struct sk_buff, vlan_proto));
		/* A = ntohs(A) [emitting a nop or swap16] */
		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
		break;

302
303
304
305
	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
	case SKF_AD_OFF + SKF_AD_NLATTR:
	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
	case SKF_AD_OFF + SKF_AD_CPU:
306
	case SKF_AD_OFF + SKF_AD_RANDOM:
307
		/* arg1 = CTX */
308
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
309
		/* arg2 = A */
310
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
311
		/* arg3 = X */
312
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
313
		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
314
315
		switch (fp->k) {
		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
316
			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
317
318
			break;
		case SKF_AD_OFF + SKF_AD_NLATTR:
319
			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
320
321
			break;
		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
322
			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
323
324
			break;
		case SKF_AD_OFF + SKF_AD_CPU:
325
			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
326
			break;
327
		case SKF_AD_OFF + SKF_AD_RANDOM:
328
329
			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
			bpf_user_rnd_init_once();
330
			break;
331
332
333
334
		}
		break;

	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
335
336
		/* A ^= X */
		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
		break;

	default:
		/* This is just a dummy call to avoid letting the compiler
		 * evict __bpf_call_base() as an optimization. Placed here
		 * where no-one bothers.
		 */
		BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
		return false;
	}

	*insnp = insn;
	return true;
}

/**
353
 *	bpf_convert_filter - convert filter program
354
355
 *	@prog: the user passed filter program
 *	@len: the length of the user passed filter program
356
 *	@new_prog: allocated 'struct bpf_prog' or NULL
357
358
 *	@new_len: pointer to store length of converted program
 *
359
360
 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
 * style extended BPF (eBPF).
361
362
363
 * Conversion workflow:
 *
 * 1) First pass for calculating the new program length:
364
 *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
365
366
367
 *
 * 2) 2nd pass to remap in two passes: 1st pass finds new
 *    jump offsets, 2nd pass remapping:
368
 *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
369
 */
370
static int bpf_convert_filter(struct sock_filter *prog, int len,
371
			      struct bpf_prog *new_prog, int *new_len)
372
{
373
374
	int new_flen = 0, pass = 0, target, i, stack_off;
	struct bpf_insn *new_insn, *first_insn = NULL;
375
376
377
378
379
	struct sock_filter *fp;
	int *addrs = NULL;
	u8 bpf_src;

	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
380
	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
381

382
	if (len <= 0 || len > BPF_MAXINSNS)
383
384
385
		return -EINVAL;

	if (new_prog) {
386
		first_insn = new_prog->insnsi;
387
388
		addrs = kcalloc(len, sizeof(*addrs),
				GFP_KERNEL | __GFP_NOWARN);
389
390
391
392
393
		if (!addrs)
			return -ENOMEM;
	}

do_pass:
394
	new_insn = first_insn;
395
396
	fp = prog;

397
	/* Classic BPF related prologue emission. */
398
	if (new_prog) {
399
400
401
402
403
404
405
406
407
408
409
410
411
412
		/* Classic BPF expects A and X to be reset first. These need
		 * to be guaranteed to be the first two instructions.
		 */
		*new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
		*new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);

		/* All programs must keep CTX in callee saved BPF_REG_CTX.
		 * In eBPF case it's done by the compiler, here we need to
		 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
		 */
		*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
	} else {
		new_insn += 3;
	}
413
414

	for (i = 0; i < len; fp++, i++) {
415
416
		struct bpf_insn tmp_insns[6] = { };
		struct bpf_insn *insn = tmp_insns;
417
418

		if (addrs)
419
			addrs[i] = new_insn - first_insn;
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458

		switch (fp->code) {
		/* All arithmetic insns and skb loads map as-is. */
		case BPF_ALU | BPF_ADD | BPF_X:
		case BPF_ALU | BPF_ADD | BPF_K:
		case BPF_ALU | BPF_SUB | BPF_X:
		case BPF_ALU | BPF_SUB | BPF_K:
		case BPF_ALU | BPF_AND | BPF_X:
		case BPF_ALU | BPF_AND | BPF_K:
		case BPF_ALU | BPF_OR | BPF_X:
		case BPF_ALU | BPF_OR | BPF_K:
		case BPF_ALU | BPF_LSH | BPF_X:
		case BPF_ALU | BPF_LSH | BPF_K:
		case BPF_ALU | BPF_RSH | BPF_X:
		case BPF_ALU | BPF_RSH | BPF_K:
		case BPF_ALU | BPF_XOR | BPF_X:
		case BPF_ALU | BPF_XOR | BPF_K:
		case BPF_ALU | BPF_MUL | BPF_X:
		case BPF_ALU | BPF_MUL | BPF_K:
		case BPF_ALU | BPF_DIV | BPF_X:
		case BPF_ALU | BPF_DIV | BPF_K:
		case BPF_ALU | BPF_MOD | BPF_X:
		case BPF_ALU | BPF_MOD | BPF_K:
		case BPF_ALU | BPF_NEG:
		case BPF_LD | BPF_ABS | BPF_W:
		case BPF_LD | BPF_ABS | BPF_H:
		case BPF_LD | BPF_ABS | BPF_B:
		case BPF_LD | BPF_IND | BPF_W:
		case BPF_LD | BPF_IND | BPF_H:
		case BPF_LD | BPF_IND | BPF_B:
			/* Check for overloaded BPF extension and
			 * directly convert it if found, otherwise
			 * just move on with mapping.
			 */
			if (BPF_CLASS(fp->code) == BPF_LD &&
			    BPF_MODE(fp->code) == BPF_ABS &&
			    convert_bpf_extensions(fp, &insn))
				break;

459
			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
460
461
			break;

462
463
464
465
466
467
468
		/* Jump transformation cannot use BPF block macros
		 * everywhere as offset calculation and target updates
		 * require a bit more work than the rest, i.e. jump
		 * opcodes map as-is, but offsets need adjustment.
		 */

#define BPF_EMIT_JMP							\
469
470
471
472
473
474
475
476
	do {								\
		if (target >= len || target < 0)			\
			goto err;					\
		insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;	\
		/* Adjust pc relative offset for 2nd or 3rd insn. */	\
		insn->off -= insn - tmp_insns;				\
	} while (0)

477
478
479
480
		case BPF_JMP | BPF_JA:
			target = i + fp->k + 1;
			insn->code = fp->code;
			BPF_EMIT_JMP;
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
			break;

		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
			if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
				/* BPF immediates are signed, zero extend
				 * immediate into tmp register and use it
				 * in compare insn.
				 */
496
				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
497

498
499
				insn->dst_reg = BPF_REG_A;
				insn->src_reg = BPF_REG_TMP;
500
501
				bpf_src = BPF_X;
			} else {
502
				insn->dst_reg = BPF_REG_A;
503
504
				insn->imm = fp->k;
				bpf_src = BPF_SRC(fp->code);
505
				insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
Linus Torvalds's avatar
Linus Torvalds committed
506
			}
507
508
509
510
511

			/* Common case where 'jump_false' is next insn. */
			if (fp->jf == 0) {
				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
				target = i + fp->jt + 1;
512
				BPF_EMIT_JMP;
513
				break;
Linus Torvalds's avatar
Linus Torvalds committed
514
			}
515
516
517
518
519

			/* Convert JEQ into JNE when 'jump_true' is next insn. */
			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
				insn->code = BPF_JMP | BPF_JNE | bpf_src;
				target = i + fp->jf + 1;
520
				BPF_EMIT_JMP;
521
				break;
522
			}
523
524
525
526

			/* Other jumps are mapped into two insns: Jxx and JA. */
			target = i + fp->jt + 1;
			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
527
			BPF_EMIT_JMP;
528
529
530
531
			insn++;

			insn->code = BPF_JMP | BPF_JA;
			target = i + fp->jf + 1;
532
			BPF_EMIT_JMP;
533
534
535
536
			break;

		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
		case BPF_LDX | BPF_MSH | BPF_B:
537
			/* tmp = A */
538
			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
539
			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
540
			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
541
			/* A &= 0xf */
542
			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
543
			/* A <<= 2 */
544
			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
545
			/* X = A */
546
			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
547
			/* A = tmp */
548
			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
549
550
			break;

551
552
553
		/* RET_K is remaped into 2 insns. RET_A case doesn't need an
		 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
		 */
554
555
		case BPF_RET | BPF_A:
		case BPF_RET | BPF_K:
556
557
558
			if (BPF_RVAL(fp->code) == BPF_K)
				*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
							0, fp->k);
559
			*insn = BPF_EXIT_INSN();
560
561
562
563
564
			break;

		/* Store to stack. */
		case BPF_ST:
		case BPF_STX:
565
			stack_off = fp->k * 4  + 4;
566
567
			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
					    BPF_ST ? BPF_REG_A : BPF_REG_X,
568
569
570
571
572
573
574
					    -stack_off);
			/* check_load_and_stores() verifies that classic BPF can
			 * load from stack only after write, so tracking
			 * stack_depth for ST|STX insns is enough
			 */
			if (new_prog && new_prog->aux->stack_depth < stack_off)
				new_prog->aux->stack_depth = stack_off;
575
576
577
578
579
			break;

		/* Load from stack. */
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
580
			stack_off = fp->k * 4  + 4;
581
582
			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
583
					    -stack_off);
584
585
586
587
588
			break;

		/* A = K or X = K */
		case BPF_LD | BPF_IMM:
		case BPF_LDX | BPF_IMM:
589
590
			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
					      BPF_REG_A : BPF_REG_X, fp->k);
591
592
593
594
			break;

		/* X = A */
		case BPF_MISC | BPF_TAX:
595
			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
596
597
598
599
			break;

		/* A = X */
		case BPF_MISC | BPF_TXA:
600
			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
601
602
603
604
605
			break;

		/* A = skb->len or X = skb->len */
		case BPF_LD | BPF_W | BPF_LEN:
		case BPF_LDX | BPF_W | BPF_LEN:
606
607
608
			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
					    offsetof(struct sk_buff, len));
609
610
			break;

611
		/* Access seccomp_data fields. */
612
		case BPF_LDX | BPF_ABS | BPF_W:
613
614
			/* A = *(u32 *) (ctx + K) */
			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
615
616
			break;

Stephen Hemminger's avatar
Stephen Hemminger committed
617
		/* Unknown instruction. */
Linus Torvalds's avatar
Linus Torvalds committed
618
		default:
619
			goto err;
Linus Torvalds's avatar
Linus Torvalds committed
620
		}
621
622
623
624
625
626

		insn++;
		if (new_prog)
			memcpy(new_insn, tmp_insns,
			       sizeof(*insn) * (insn - tmp_insns));
		new_insn += insn - tmp_insns;
Linus Torvalds's avatar
Linus Torvalds committed
627
628
	}

629
630
	if (!new_prog) {
		/* Only calculating new length. */
631
		*new_len = new_insn - first_insn;
632
633
634
635
		return 0;
	}

	pass++;
636
637
	if (new_flen != new_insn - first_insn) {
		new_flen = new_insn - first_insn;
638
639
640
641
642
643
644
		if (pass > 2)
			goto err;
		goto do_pass;
	}

	kfree(addrs);
	BUG_ON(*new_len != new_flen);
Linus Torvalds's avatar
Linus Torvalds committed
645
	return 0;
646
647
648
err:
	kfree(addrs);
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
649
650
}

651
652
/* Security:
 *
653
 * As we dont want to clear mem[] array for each packet going through
Li RongQing's avatar
Li RongQing committed
654
 * __bpf_prog_run(), we check that filter loaded by user never try to read
655
 * a cell if not previously written, and we check all branches to be sure
Lucas De Marchi's avatar
Lucas De Marchi committed
656
 * a malicious user doesn't try to abuse us.
657
 */
658
static int check_load_and_stores(const struct sock_filter *filter, int flen)
659
{
660
	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
661
662
663
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
664

665
	masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
666
667
	if (!masks)
		return -ENOMEM;
668

669
670
671
672
673
674
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
675
676
		case BPF_ST:
		case BPF_STX:
677
678
			memvalid |= (1 << filter[pc].k);
			break;
679
680
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
681
682
683
684
685
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
686
687
		case BPF_JMP | BPF_JA:
			/* A jump must set masks on target */
688
689
690
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
691
692
693
694
695
696
697
698
699
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
			/* A jump must set masks on targets */
700
701
702
703
704
705
706
707
708
709
710
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
static bool chk_code_allowed(u16 code_to_probe)
{
	static const bool codes[] = {
		/* 32 bit ALU operations */
		[BPF_ALU | BPF_ADD | BPF_K] = true,
		[BPF_ALU | BPF_ADD | BPF_X] = true,
		[BPF_ALU | BPF_SUB | BPF_K] = true,
		[BPF_ALU | BPF_SUB | BPF_X] = true,
		[BPF_ALU | BPF_MUL | BPF_K] = true,
		[BPF_ALU | BPF_MUL | BPF_X] = true,
		[BPF_ALU | BPF_DIV | BPF_K] = true,
		[BPF_ALU | BPF_DIV | BPF_X] = true,
		[BPF_ALU | BPF_MOD | BPF_K] = true,
		[BPF_ALU | BPF_MOD | BPF_X] = true,
		[BPF_ALU | BPF_AND | BPF_K] = true,
		[BPF_ALU | BPF_AND | BPF_X] = true,
		[BPF_ALU | BPF_OR | BPF_K] = true,
		[BPF_ALU | BPF_OR | BPF_X] = true,
		[BPF_ALU | BPF_XOR | BPF_K] = true,
		[BPF_ALU | BPF_XOR | BPF_X] = true,
		[BPF_ALU | BPF_LSH | BPF_K] = true,
		[BPF_ALU | BPF_LSH | BPF_X] = true,
		[BPF_ALU | BPF_RSH | BPF_K] = true,
		[BPF_ALU | BPF_RSH | BPF_X] = true,
		[BPF_ALU | BPF_NEG] = true,
		/* Load instructions */
		[BPF_LD | BPF_W | BPF_ABS] = true,
		[BPF_LD | BPF_H | BPF_ABS] = true,
		[BPF_LD | BPF_B | BPF_ABS] = true,
		[BPF_LD | BPF_W | BPF_LEN] = true,
		[BPF_LD | BPF_W | BPF_IND] = true,
		[BPF_LD | BPF_H | BPF_IND] = true,
		[BPF_LD | BPF_B | BPF_IND] = true,
		[BPF_LD | BPF_IMM] = true,
		[BPF_LD | BPF_MEM] = true,
		[BPF_LDX | BPF_W | BPF_LEN] = true,
		[BPF_LDX | BPF_B | BPF_MSH] = true,
		[BPF_LDX | BPF_IMM] = true,
		[BPF_LDX | BPF_MEM] = true,
		/* Store instructions */
		[BPF_ST] = true,
		[BPF_STX] = true,
		/* Misc instructions */
		[BPF_MISC | BPF_TAX] = true,
		[BPF_MISC | BPF_TXA] = true,
		/* Return instructions */
		[BPF_RET | BPF_K] = true,
		[BPF_RET | BPF_A] = true,
		/* Jump instructions */
		[BPF_JMP | BPF_JA] = true,
		[BPF_JMP | BPF_JEQ | BPF_K] = true,
		[BPF_JMP | BPF_JEQ | BPF_X] = true,
		[BPF_JMP | BPF_JGE | BPF_K] = true,
		[BPF_JMP | BPF_JGE | BPF_X] = true,
		[BPF_JMP | BPF_JGT | BPF_K] = true,
		[BPF_JMP | BPF_JGT | BPF_X] = true,
		[BPF_JMP | BPF_JSET | BPF_K] = true,
		[BPF_JMP | BPF_JSET | BPF_X] = true,
	};

	if (code_to_probe >= ARRAY_SIZE(codes))
		return false;

	return codes[code_to_probe];
}

777
778
779
780
781
782
783
784
785
786
787
static bool bpf_check_basics_ok(const struct sock_filter *filter,
				unsigned int flen)
{
	if (filter == NULL)
		return false;
	if (flen == 0 || flen > BPF_MAXINSNS)
		return false;

	return true;
}

Linus Torvalds's avatar
Linus Torvalds committed
788
/**
789
 *	bpf_check_classic - verify socket filter code
Linus Torvalds's avatar
Linus Torvalds committed
790
791
792
793
794
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
795
796
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
Linus Torvalds's avatar
Linus Torvalds committed
797
 *
798
799
800
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds's avatar
Linus Torvalds committed
801
 */
802
803
static int bpf_check_classic(const struct sock_filter *filter,
			     unsigned int flen)
Linus Torvalds's avatar
Linus Torvalds committed
804
{
805
	bool anc_found;
806
	int pc;
Linus Torvalds's avatar
Linus Torvalds committed
807

808
	/* Check the filter code now */
Linus Torvalds's avatar
Linus Torvalds committed
809
	for (pc = 0; pc < flen; pc++) {
810
		const struct sock_filter *ftest = &filter[pc];
811

812
813
		/* May we actually operate on this code? */
		if (!chk_code_allowed(ftest->code))
814
			return -EINVAL;
815

816
		/* Some instructions need special checks */
817
818
819
820
		switch (ftest->code) {
		case BPF_ALU | BPF_DIV | BPF_K:
		case BPF_ALU | BPF_MOD | BPF_K:
			/* Check for division by zero */
Eric Dumazet's avatar
Eric Dumazet committed
821
822
823
			if (ftest->k == 0)
				return -EINVAL;
			break;
824
825
826
827
828
		case BPF_ALU | BPF_LSH | BPF_K:
		case BPF_ALU | BPF_RSH | BPF_K:
			if (ftest->k >= 32)
				return -EINVAL;
			break;
829
830
831
832
833
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
		case BPF_ST:
		case BPF_STX:
			/* Check for invalid memory addresses */
834
835
836
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
837
838
		case BPF_JMP | BPF_JA:
			/* Note, the large ftest->k might cause loops.
839
840
841
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
842
			if (ftest->k >= (unsigned int)(flen - pc - 1))
843
				return -EINVAL;
844
			break;
845
846
847
848
849
850
851
852
853
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
			/* Both conditionals must be safe */
854
			if (pc + ftest->jt + 1 >= flen ||
855
856
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
857
			break;
858
859
860
		case BPF_LD | BPF_W | BPF_ABS:
		case BPF_LD | BPF_H | BPF_ABS:
		case BPF_LD | BPF_B | BPF_ABS:
861
			anc_found = false;
862
863
864
			if (bpf_anc_helper(ftest) & BPF_ANC)
				anc_found = true;
			/* Ancillary operation unknown or unsupported */
865
866
			if (anc_found == false && ftest->k >= SKF_AD_OFF)
				return -EINVAL;
867
868
		}
	}
869

870
	/* Last instruction must be a RET code */
871
	switch (filter[flen - 1].code) {
872
873
	case BPF_RET | BPF_K:
	case BPF_RET | BPF_A:
874
		return check_load_and_stores(filter, flen);
875
	}
876

877
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
878
879
}

880
881
static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
				      const struct sock_fprog *fprog)
882
{
883
	unsigned int fsize = bpf_classic_proglen(fprog);
884
885
886
887
888
889
890
891
	struct sock_fprog_kern *fkprog;

	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
	if (!fp->orig_prog)
		return -ENOMEM;

	fkprog = fp->orig_prog;
	fkprog->len = fprog->len;
892
893
894

	fkprog->filter = kmemdup(fp->insns, fsize,
				 GFP_KERNEL | __GFP_NOWARN);
895
896
897
898
899
900
901
902
	if (!fkprog->filter) {
		kfree(fp->orig_prog);
		return -ENOMEM;
	}

	return 0;
}

903
static void bpf_release_orig_filter(struct bpf_prog *fp)
904
905
906
907
908
909
910
911
912
{
	struct sock_fprog_kern *fprog = fp->orig_prog;

	if (fprog) {
		kfree(fprog->filter);
		kfree(fprog);
	}
}

913
914
static void __bpf_prog_release(struct bpf_prog *prog)
{
915
	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
916
917
918
919
920
		bpf_prog_put(prog);
	} else {
		bpf_release_orig_filter(prog);
		bpf_prog_free(prog);
	}
921
922
}

923
924
static void __sk_filter_release(struct sk_filter *fp)
{
925
926
	__bpf_prog_release(fp->prog);
	kfree(fp);
927
928
}

929
/**
930
 * 	sk_filter_release_rcu - Release a socket filter by rcu_head
931
932
 *	@rcu: rcu_head that contains the sk_filter to free
 */
933
static void sk_filter_release_rcu(struct rcu_head *rcu)
934
935
936
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

937
	__sk_filter_release(fp);
938
}
939
940
941
942
943
944
945
946
947

/**
 *	sk_filter_release - release a socket filter
 *	@fp: filter to remove
 *
 *	Remove a filter from a socket and release its resources.
 */
static void sk_filter_release(struct sk_filter *fp)
{
948
	if (refcount_dec_and_test(&fp->refcnt))
949
950
951
952
953
		call_rcu(&fp->rcu, sk_filter_release_rcu);
}

void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
954
	u32 filter_size = bpf_prog_size(fp->prog->len);
955

956
957
	atomic_sub(filter_size, &sk->sk_omem_alloc);
	sk_filter_release(fp);
958
}
959

960
961
962
/* try to charge the socket memory if there is space available
 * return true on success
 */
963
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
964
{
965
	u32 filter_size = bpf_prog_size(fp->prog->len);
966
967
968
969
970
971

	/* same check as in sock_kmalloc() */
	if (filter_size <= sysctl_optmem_max &&
	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
		atomic_add(filter_size, &sk->sk_omem_alloc);
		return true;
972
	}
973
	return false;
974
975
}

976
977
978
979
980
981
982
983
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
	bool ret = __sk_filter_charge(sk, fp);
	if (ret)
		refcount_inc(&fp->refcnt);
	return ret;
}

984
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
985
986
{
	struct sock_filter *old_prog;
987
	struct bpf_prog *old_fp;
988
	int err, new_len, old_len = fp->len;
989
990
991
992
993
994
995

	/* We are free to overwrite insns et al right here as it
	 * won't be used at this point in time anymore internally
	 * after the migration to the internal BPF instruction
	 * representation.
	 */
	BUILD_BUG_ON(sizeof(struct sock_filter) !=
996
		     sizeof(struct bpf_insn));
997
998
999
1000
1001
1002

	/* Conversion cannot happen on overlapping memory areas,
	 * so we need to keep the user BPF around until the 2nd
	 * pass. At this time, the user BPF is stored in fp->insns.
	 */
	old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1003
			   GFP_KERNEL | __GFP_NOWARN);
1004
1005
1006
1007
1008
1009
	if (!old_prog) {
		err = -ENOMEM;
		goto out_err;
	}

	/* 1st pass: calculate the new program length. */
1010
	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
1011
1012
1013
1014
1015
	if (err)
		goto out_err_free;

	/* Expand fp for appending the new filter representation. */
	old_fp = fp;
1016
	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
	if (!fp) {
		/* The old_fp is still around in case we couldn't
		 * allocate new memory, so uncharge on that one.
		 */
		fp = old_fp;
		err = -ENOMEM;
		goto out_err_free;
	}

	fp->len = new_len;

1028
	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1029
	err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
1030
	if (err)
1031
		/* 2nd bpf_convert_filter() can fail only if it fails
1032
1033
		 * to allocate memory, remapping must succeed. Note,
		 * that at this time old_fp has already been released
1034
		 * by krealloc().
1035
1036
1037
		 */
		goto out_err_free;

1038
1039
1040
1041
1042
	/* We are guaranteed to never error here with cBPF to eBPF
	 * transitions, since there's no issue with type compatibility
	 * checks on program arrays.
	 */
	fp = bpf_prog_select_runtime(fp, &err);
1043

1044
1045
1046
1047
1048
1049
	kfree(old_prog);
	return fp;

out_err_free:
	kfree(old_prog);
out_err:
1050
	__bpf_prog_release(fp);
1051
1052
1053
	return ERR_PTR(err);
}

1054
1055
static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
					   bpf_aux_classic_check_t trans)
1056
1057
1058
{
	int err;

1059
	fp->bpf_func = NULL;
1060
	fp->jited = 0;
1061

1062
	err = bpf_check_classic(fp->insns, fp->len);
1063
	if (err) {
1064
		__bpf_prog_release(fp);
1065
		return ERR_PTR(err);
1066
	}
1067

1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
	/* There might be additional checks and transformations
	 * needed on classic filters, f.e. in case of seccomp.
	 */
	if (trans) {
		err = trans(fp->insns, fp->len);
		if (err) {
			__bpf_prog_release(fp);
			return ERR_PTR(err);
		}
	}

1079
1080
1081
	/* Probe if we can JIT compile the filter and if so, do
	 * the compilation of the filter.
	 */
1082
	bpf_jit_compile(fp);
1083
1084
1085
1086

	/* JIT compiler couldn't process this filter, so do the
	 * internal BPF translation for the optimized interpreter.
	 */
1087
	if (!fp->jited)
1088
		fp = bpf_migrate_filter(fp);
1089
1090

	return fp;
1091
1092
1093
}

/**
1094
 *	bpf_prog_create - create an unattached filter
1095
 *	@pfp: the unattached filter that is created
1096
 *	@fprog: the filter program
1097
 *
1098
 * Create a filter independent of any socket. We first run some
1099
1100
1101
1102
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
1103
int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1104
{
1105
	unsigned int fsize = bpf_classic_proglen(fprog);
1106
	struct bpf_prog *fp;
1107
1108

	/* Make sure new filter is there and in the right amounts. */
1109
	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1110
1111
		return -EINVAL;

1112
	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1113
1114
	if (!fp)
		return -ENOMEM;
1115

1116
1117
1118
	memcpy(fp->insns, fprog->filter, fsize);

	fp->len = fprog->len;
1119
1120
1121
1122
1123
	/* Since unattached filters are not copied back to user
	 * space through sk_get_filter(), we do not need to hold
	 * a copy here, and can spare us the work.
	 */
	fp->orig_prog = NULL;
1124

1125
	/* bpf_prepare_filter() already takes care of freeing
1126
1127
	 * memory in case something goes wrong.
	 */
1128
	fp = bpf_prepare_filter(fp, NULL);
1129
1130
	if (IS_ERR(fp))
		return PTR_ERR(fp);
1131
1132
1133
1134

	*pfp = fp;
	return 0;
}
1135
EXPORT_SYMBOL_GPL(bpf_prog_create);
1136