Commit a2e27255 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller
Browse files

net: Introduce recvmmsg socket syscall



Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c05e85a0
......@@ -497,6 +497,7 @@ sys_call_table:
.quad sys_signalfd
.quad sys_ni_syscall
.quad sys_eventfd
.quad sys_recvmmsg
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
......
......@@ -374,6 +374,7 @@
CALL(sys_pwritev)
CALL(sys_rt_tgsigqueueinfo)
CALL(sys_perf_event_open)
/* 365 */ CALL(sys_recvmmsg)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
......
......@@ -295,4 +295,5 @@ sys_call_table:
.long sys_signalfd
.long sys_ni_syscall /* 280, was sys_timerfd */
.long sys_eventfd
.long sys_recvmmsg
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
......@@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table)
.long _sys_pwritev
.long _sys_rt_tgsigqueueinfo
.long _sys_perf_event_open
.long _sys_recvmmsg /* 370 */
.rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall
......
......@@ -1806,6 +1806,7 @@ sys_call_table:
data8 sys_preadv
data8 sys_pwritev // 1320
data8 sys_rt_tgsigqueueinfo
data8 sys_recvmmsg
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
......@@ -371,3 +371,4 @@ ENTRY(sys_call_table)
.long sys_ni_syscall
.long sys_rt_tgsigqueueinfo /* 365 */
.long sys_perf_event_open
.long sys_recvmmsg
......@@ -583,6 +583,7 @@ einval: li v0, -ENOSYS
sys sys_rt_tgsigqueueinfo 4
sys sys_perf_event_open 5
sys sys_accept4 4
sys sys_recvmmsg 5
.endm
/* We pre-compute the number of _instruction_ bytes needed to
......
......@@ -420,4 +420,5 @@ sys_call_table:
PTR sys_rt_tgsigqueueinfo
PTR sys_perf_event_open
PTR sys_accept4
PTR sys_recvmmsg
.size sys_call_table,.-sys_call_table
......@@ -418,4 +418,5 @@ EXPORT(sysn32_call_table)
PTR compat_sys_rt_tgsigqueueinfo /* 5295 */
PTR sys_perf_event_open
PTR sys_accept4
PTR compat_sys_recvmmsg
.size sysn32_call_table,.-sysn32_call_table
......@@ -538,4 +538,5 @@ sys_call_table:
PTR compat_sys_rt_tgsigqueueinfo
PTR sys_perf_event_open
PTR sys_accept4
PTR compat_sys_recvmmsg
.size sys_call_table,.-sys_call_table
......@@ -391,3 +391,4 @@ sys_call_table:
.long sys_pwritev
.long sys_rt_tgsigqueueinfo
.long sys_perf_event_open
.long sys_recvmmsg /* 365 */
......@@ -82,5 +82,5 @@ sys_call_table:
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
......@@ -83,7 +83,7 @@ sys_call_table32:
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg
#endif /* CONFIG_COMPAT */
......@@ -158,4 +158,4 @@ sys_call_table:
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
......@@ -832,4 +832,5 @@ ia32_sys_call_table:
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
.quad compat_sys_recvmmsg
ia32_syscall_end:
......@@ -342,10 +342,11 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
#define __NR_recvmmsg 337
#ifdef __KERNEL__
#define NR_syscalls 337
#define NR_syscalls 338
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
......
......@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_event_open 298
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_recvmmsg 299
__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
......
......@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
.long sys_recvmmsg
......@@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3)
__SYSCALL(305, sys_ni_syscall, 0)
#define __NR_eventfd 306
__SYSCALL(306, sys_eventfd, 1)
#define __NR_recvmmsg 307
__SYSCALL(307, sys_recvmmsg, 5)
#define __NR_syscall_count 307
#define __NR_syscall_count 308
/*
* sysxtensa syscall handler
......
......@@ -41,6 +41,7 @@
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
#define SYS_ACCEPT4 18 /* sys_accept4(2) */
#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
typedef enum {
SS_FREE = 0, /* not allocated */
......
......@@ -65,6 +65,12 @@ struct msghdr {
unsigned msg_flags;
};
/* For recvmmsg/sendmmsg */
struct mmsghdr {
struct msghdr msg_hdr;
unsigned msg_len;
};
/*
* POSIX 1003.1g - ancillary data object information
* Ancillary data consits of a sequence of pairs of
......@@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
struct timespec;
extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout);
#endif
#endif /* not kernel and not glibc */
#endif /* _LINUX_SOCKET_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment