Commit 6a5ef90c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'faster-soreuseport'



Craig Gallek says:

====================
Faster SO_REUSEPORT

This series contains two optimizations for the SO_REUSEPORT feature:
Faster lookup when selecting a socket for an incoming packet and
the ability to select the socket from the group using a BPF program.

This series only includes the UDP path.  I plan to submit a follow-up
including the TCP path if the implementation in this series is
acceptable.

Changes in v4:
- pskb_may_pull is unnecessary with pskb_pull (per Alexei Starovoitov)

Changes in v3:
- skb_pull_inline -> pskb_pull (per Alexei Starovoitov)
- reuseport_attach* -> sk_reuseport_attach* and simple return statement
  syntax change (per Daniel Borkmann)

Changes in v2:
- Fix ARM build; remove unnecessary include.
- Handle case where protocol header is not in linear section (per
  Alexei Starovoitov).
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ebb3cf41 3ca8e402
......@@ -92,4 +92,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _UAPI_ASM_SOCKET_H */
......@@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _UAPI__ASM_AVR32_SOCKET_H */
......@@ -85,5 +85,8 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_SOCKET_H */
......@@ -94,4 +94,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_IA64_SOCKET_H */
......@@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_M32R_SOCKET_H */
......@@ -103,4 +103,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _UAPI_ASM_SOCKET_H */
......@@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_SOCKET_H */
......@@ -84,4 +84,7 @@
#define SO_ATTACH_BPF 0x402B
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 0x402C
#define SO_ATTACH_REUSEPORT_EBPF 0x402D
#endif /* _UAPI_ASM_SOCKET_H */
......@@ -92,4 +92,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_POWERPC_SOCKET_H */
......@@ -91,4 +91,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _ASM_SOCKET_H */
......@@ -81,6 +81,9 @@
#define SO_ATTACH_BPF 0x0034
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 0x0035
#define SO_ATTACH_REUSEPORT_EBPF 0x0036
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
......
......@@ -96,4 +96,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* _XTENSA_SOCKET_H */
......@@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
int sk_detach_filter(struct sock *sk);
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
unsigned int len);
......
......@@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
......
......@@ -318,6 +318,7 @@ struct cg_proto;
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
* @sk_reuseport_cb: reuseport group container
*/
struct sock {
/*
......@@ -453,6 +454,7 @@ struct sock {
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
};
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
......
#ifndef _SOCK_REUSEPORT_H
#define _SOCK_REUSEPORT_H
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/sock.h>
struct sock_reuseport {
struct rcu_head rcu;
u16 max_socks; /* length of socks */
u16 num_socks; /* elements in socks */
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
struct sock *socks[0]; /* array of sock pointers */
};
extern int reuseport_alloc(struct sock *sk);
extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
struct sk_buff *skb,
int hdr_len);
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
struct bpf_prog *prog);
#endif /* _SOCK_REUSEPORT_H */
......@@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
}
int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*)(const struct sock *, const struct sock *),
int (*)(const struct sock *, const struct sock *, bool),
unsigned int hash2_nulladdr);
u32 udp_flow_hashrnd(void);
......@@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif,
struct udp_table *tbl);
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
......@@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net,
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *tbl);
int dif, struct udp_table *tbl,
struct sk_buff *skb);
/*
* SNMP statistics for UDP and UDP-Lite
......
......@@ -87,4 +87,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
#endif /* __ASM_GENERIC_SOCKET_H */
......@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
......
......@@ -50,6 +50,7 @@
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
/**
* sk_filter - run a packet through a socket filter
......@@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
struct bpf_prog *old_prog;
int err;
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
return -ENOMEM;
if (sk_unhashed(sk)) {
err = reuseport_alloc(sk);
if (err)
return err;
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
/* The socket wasn't bound with SO_REUSEPORT */
return -EINVAL;
}
old_prog = reuseport_attach_prog(sk, prog);
if (old_prog)
bpf_prog_destroy(old_prog);
return 0;
}
static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
......@@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
return ERR_PTR(-EPERM);
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
return -EINVAL;
return ERR_PTR(-EINVAL);
prog = bpf_prog_alloc(bpf_fsize, 0);
if (!prog)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
__bpf_prog_free(prog);
return -EFAULT;
return ERR_PTR(-EFAULT);
}
prog->len = fprog->len;
......@@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
err = bpf_prog_store_orig_filter(prog, fprog);
if (err) {
__bpf_prog_free(prog);
return -ENOMEM;
return ERR_PTR(-ENOMEM);
}
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
prog = bpf_prepare_filter(prog, NULL);
return bpf_prepare_filter(prog, NULL);
}
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
int err;
if (IS_ERR(prog))
return PTR_ERR(prog);
......@@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog;
struct bpf_prog *prog = __get_filter(fprog, sk);
int err;
if (IS_ERR(prog))
return PTR_ERR(prog);
err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
__bpf_prog_release(prog);
return err;
}
return 0;
}
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
return ERR_PTR(-EPERM);
prog = bpf_prog_get(ufd);
if (IS_ERR(prog))
return PTR_ERR(prog);
return prog;
if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
return -EINVAL;
return ERR_PTR(-EINVAL);
}
return prog;
}
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog = __get_bpf(ufd, sk);
int err;
if (IS_ERR(prog))
return PTR_ERR(prog);
err = __sk_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
......@@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog = __get_bpf(ufd, sk);
int err;
if (IS_ERR(prog))
return PTR_ERR(prog);
err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
return err;
}
return 0;
}
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
#define BPF_LDST_LEN 16U
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment