Commit 4f31d774 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus-4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:
 "This contains beside of random fixes/cleanups two bigger changes:

   - seccomp support by Mickaël Salaün

   - IRQ rework by Anton Ivanov"

* 'for-linus-4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: Use race-free temporary file creation
  um: Do not set unsecure permission for temporary file
  um: Fix build error and kconfig for i386
  um: Add seccomp support
  um: Add full asm/syscall.h support
  selftests/seccomp: Remove the need for HAVE_ARCH_TRACEHOOK
  um: Fix ptrace GETREGS/SETREGS bugs
  um: link with -lpthread
  um: Update UBD to use pread/pwrite family of functions
  um: Do not change hard IRQ flags in soft IRQ processing
  um: Prevent IRQ handler reentrancy
  uml: flush stdout before forking
  uml: fix hostfs mknod()
parents 1baa5efb 3e46b253
......@@ -33,7 +33,7 @@
| sh: | TODO |
| sparc: | TODO |
| tile: | ok |
| um: | TODO |
| um: | ok |
| unicore32: | TODO |
| x86: | ok |
| xtensa: | TODO |
......
......@@ -2,6 +2,7 @@ config UML
bool
default y
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_UID16
select HAVE_FUTEX_CMPXCHG if FUTEX
select GENERIC_IRQ_SHOW
......
......@@ -104,3 +104,19 @@ config PGTABLE_LEVELS
int
default 3 if 3_LEVEL_PGTABLES
default 2
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
---help---
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y.
......@@ -535,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len)
{
int err;
err = os_seek_file(fd, offset);
if (err < 0)
return err;
err = os_read_file(fd, buf, len);
err = os_pread_file(fd, buf, len, offset);
if (err < 0)
return err;
......@@ -1377,14 +1373,8 @@ static int update_bitmap(struct io_thread_req *req)
if(req->cow_offset == -1)
return 0;
n = os_seek_file(req->fds[1], req->cow_offset);
if(n < 0){
printk("do_io - bitmap lseek failed : err = %d\n", -n);
return 1;
}
n = os_write_file(req->fds[1], &req->bitmap_words,
sizeof(req->bitmap_words));
n = os_pwrite_file(req->fds[1], &req->bitmap_words,
sizeof(req->bitmap_words), req->cow_offset);
if(n != sizeof(req->bitmap_words)){
printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
req->fds[1]);
......@@ -1399,7 +1389,6 @@ static void do_io(struct io_thread_req *req)
char *buf;
unsigned long len;
int n, nsectors, start, end, bit;
int err;
__u64 off;
if (req->op == UBD_FLUSH) {
......@@ -1428,18 +1417,12 @@ static void do_io(struct io_thread_req *req)
len = (end - start) * req->sectorsize;
buf = &req->buffer[start * req->sectorsize];
err = os_seek_file(req->fds[bit], off);
if(err < 0){
printk("do_io - lseek failed : err = %d\n", -err);
req->error = 1;
return;
}
if(req->op == UBD_READ){
n = 0;
do {
buf = &buf[n];
len -= n;
n = os_read_file(req->fds[bit], buf, len);
n = os_pread_file(req->fds[bit], buf, len, off);
if (n < 0) {
printk("do_io - read failed, err = %d "
"fd = %d\n", -n, req->fds[bit]);
......@@ -1449,7 +1432,7 @@ static void do_io(struct io_thread_req *req)
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
} else {
n = os_write_file(req->fds[bit], buf, len);
n = os_pwrite_file(req->fds[bit], buf, len, off);
if(n != len){
printk("do_io - write failed err = %d "
"fd = %d\n", -n, req->fds[bit]);
......
#ifndef __ASM_UM_HARDIRQ_H
#define __ASM_UM_HARDIRQ_H
#include <linux/cache.h>
#include <linux/threads.h>
typedef struct {
unsigned int __softirq_pending;
} ____cacheline_aligned irq_cpustat_t;
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
#include <linux/irq.h>
#ifndef ack_bad_irq
static inline void ack_bad_irq(unsigned int irq)
{
printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
}
#endif
#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
#endif /* __ASM_UM_HARDIRQ_H */
/*
* Access to user system call parameters and results
*
* See asm-generic/syscall.h for function descriptions.
*
* Copyright (C) 2015 Mickaël Salaün <mic@digikod.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __UM_SYSCALL_GENERIC_H
#define __UM_SYSCALL_GENERIC_H
#include <asm/ptrace.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <sysdep/ptrace.h>
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
return PT_REGS_SYSCALL_NR(regs);
}
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
/* do nothing */
}
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
const long error = regs_return_value(regs);
return IS_ERR_VALUE(error) ? error : 0;
}
static inline long syscall_get_return_value(struct task_struct *task,
struct pt_regs *regs)
{
return regs_return_value(regs);
}
static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val);
}
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
unsigned long *args)
{
const struct uml_pt_regs *r = &regs->regs;
switch (i) {
case 0:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG1(r);
case 1:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG2(r);
case 2:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG3(r);
case 3:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG4(r);
case 4:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG5(r);
case 5:
if (!n--)
break;
*args++ = UPT_SYSCALL_ARG6(r);
case 6:
if (!n--)
break;
default:
BUG();
break;
}
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
const unsigned long *args)
{
struct uml_pt_regs *r = &regs->regs;
switch (i) {
case 0:
if (!n--)
break;
UPT_SYSCALL_ARG1(r) = *args++;
case 1:
if (!n--)
break;
UPT_SYSCALL_ARG2(r) = *args++;
case 2:
if (!n--)
break;
UPT_SYSCALL_ARG3(r) = *args++;
case 3:
if (!n--)
break;
UPT_SYSCALL_ARG4(r) = *args++;
case 4:
if (!n--)
break;
UPT_SYSCALL_ARG5(r) = *args++;
case 5:
if (!n--)
break;
UPT_SYSCALL_ARG6(r) = *args++;
case 6:
if (!n--)
break;
default:
BUG();
break;
}
}
/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
#endif /* __UM_SYSCALL_GENERIC_H */
......@@ -62,11 +62,13 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_AUDIT 6
#define TIF_RESTORE_SIGMASK 7
#define TIF_NOTIFY_RESUME 8
#define TIF_SECCOMP 9 /* secure computing */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#endif
......@@ -146,6 +146,8 @@ extern int os_read_file(int fd, void *buf, int len);
extern int os_write_file(int fd, const void *buf, int count);
extern int os_sync_file(int fd);
extern int os_file_size(const char *file, unsigned long long *size_out);
extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
extern int os_file_modtime(const char *file, unsigned long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_set_fd_async(int fd);
......@@ -282,7 +284,6 @@ extern void initial_thread_cb_skas(void (*proc)(void *),
void *arg);
extern void halt_skas(void);
extern void reboot_skas(void);
extern int get_syscall(struct uml_pt_regs *regs);
/* irq.c */
extern int os_waiting_for_events(struct irq_fd *active_fds);
......
......@@ -5,31 +5,38 @@
#include <linux/kernel.h>
#include <linux/ptrace.h>
#include <linux/seccomp.h>
#include <kern_util.h>
#include <sysdep/ptrace.h>
#include <sysdep/ptrace_user.h>
#include <sysdep/syscalls.h>
#include <os.h>
void handle_syscall(struct uml_pt_regs *r)
{
struct pt_regs *regs = container_of(r, struct pt_regs, regs);
long result;
int syscall;
if (syscall_trace_enter(regs)) {
result = -ENOSYS;
/* Initialize the syscall number and default return value. */
UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
/* Do the secure computing check first; failures should be fast. */
if (secure_computing() == -1)
return;
if (syscall_trace_enter(regs))
goto out;
}
syscall = get_syscall(r);
/* Update the syscall number after orig_ax has potentially been updated
* with ptrace.
*/
UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
syscall = UPT_SYSCALL_NR(r);
if ((syscall > __NR_syscall_max) || syscall < 0)
result = -ENOSYS;
else
result = EXECUTE_SYSCALL(syscall, regs);
if (syscall >= 0 && syscall <= __NR_syscall_max)
PT_REGS_SET_SYSCALL_RETURN(regs,
EXECUTE_SYSCALL(syscall, regs));
out:
PT_REGS_SET_SYSCALL_RETURN(regs, result);
syscall_trace_leave(regs);
}
......@@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len)
return n;
}
int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
{
int n = pread(fd, buf, len, offset);
if (n < 0)
return -errno;
return n;
}
int os_write_file(int fd, const void *buf, int len)
{
int n = write(fd, (void *) buf, len);
......@@ -282,6 +291,16 @@ int os_sync_file(int fd)
return n;
}
int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
{
int n = pwrite(fd, (void *) buf, len, offset);
if (n < 0)
return -errno;
return n;
}
int os_file_size(const char *file, unsigned long long *size_out)
{
struct uml_stat buf;
......
......@@ -106,6 +106,17 @@ static int __init make_tempfile(const char *template)
}
}
#ifdef O_TMPFILE
fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700);
/*
* If the running system does not support O_TMPFILE flag then retry
* without it.
*/
if (fd != -1 || (errno != EINVAL && errno != EISDIR &&
errno != EOPNOTSUPP))
return fd;
#endif
tempname = malloc(strlen(tempdir) + strlen(template) + 1);
if (tempname == NULL)
return -1;
......@@ -142,12 +153,6 @@ static int __init create_tmp_file(unsigned long long len)
if (fd < 0)
exit(1);
err = fchmod(fd, 0777);
if (err < 0) {
perror("fchmod");
exit(1);
}
/*
* Seek to len - 1 because writing a character there will
* increase the file size by one byte, to the desired length.
......
......@@ -62,6 +62,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
static int signals_enabled;
static unsigned int signals_pending;
static unsigned int signals_active = 0;
void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{
......@@ -101,7 +102,12 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
block_signals();
signals_active |= SIGALRM_MASK;
timer_real_alarm_handler(mc);
signals_active &= ~SIGALRM_MASK;
set_signals(enabled);
}
......@@ -286,8 +292,16 @@ void unblock_signals(void)
if (save_pending & SIGIO_MASK)
sig_handler_common(SIGIO, NULL, NULL);
if (save_pending & SIGALRM_MASK)
/* Do not reenter the handler */
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
timer_real_alarm_handler(NULL);
/* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */
if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK))
return;
}
}
......
......@@ -172,13 +172,6 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
handle_syscall(regs);
}
int get_syscall(struct uml_pt_regs *regs)
{
UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp);
return UPT_SYSCALL_NR(regs);
}
extern char __syscall_stub_start[];
static int userspace_tramp(void *stack)
......
......@@ -94,6 +94,8 @@ static int start_ptraced_child(void)
{
int pid, n, status;
fflush(stdout);
pid = fork();
if (pid == 0)
ptrace_child();
......
......@@ -17,7 +17,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
ifeq ($(CONFIG_X86_32),y)
obj-y += checksum_32.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
......
#ifndef __UM_ASM_SYSCALL_H
#define __UM_ASM_SYSCALL_H
#include <asm/syscall-generic.h>
#include <uapi/linux/audit.h>
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
......
......@@ -68,6 +68,7 @@ static const int reg_offsets[] = {
[EFL] = HOST_EFLAGS,
[UESP] = HOST_SP,
[SS] = HOST_SS,
[ORIG_EAX] = HOST_ORIG_AX,
};
int putreg(struct task_struct *child, int regno, unsigned long value)
......@@ -83,6 +84,7 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
case EAX:
case EIP:
case UESP:
case ORIG_EAX:
break;
case FS:
if (value && (value & 3) != 3)
......@@ -108,9 +110,6 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
value &= FLAG_MASK;
child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
return 0;
case ORIG_EAX:
child->thread.regs.regs.syscall = value;
return 0;
default :
panic("Bad register in putreg() : %d\n", regno);
}
......@@ -143,8 +142,6 @@ unsigned long getreg(struct task_struct *child, int regno)
regno >>= 2;
switch (regno) {
case ORIG_EAX:
return child->thread.regs.regs.syscall;
case FS:
case GS:
case DS:
......@@ -163,6 +160,7 @@ unsigned long getreg(struct task_struct *child, int regno)
case EDI:
case EBP:
case EFL:
case ORIG_EAX:
break;
default:
panic("Bad register in getreg() : %d\n", regno);
......
......@@ -730,15 +730,13 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
init_special_inode(inode, mode, dev);
err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
if (!err)
if (err)
goto out_free;
err = read_name(inode, name);
__putname(name);
if (err)
goto out_put;
if (err)
goto out_put;
d_instantiate(dentry, inode);
return 0;
......
......@@ -62,7 +62,7 @@ vmlinux_link()
-Wl,--start-group \
${KBUILD_VMLINUX_MAIN} \
-Wl,--end-group \
-lutil -lrt ${1}
-lutil -lrt -lpthread ${1}
rm -f linux
fi
}
......
......@@ -1246,11 +1246,24 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# error "Do not know how to find your architecture's registers and syscalls"
#endif
/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
#if defined(__x86_64__) || defined(__i386__)
#define HAVE_GETREGS
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
struct iovec iov;
ARCH_REGS regs;
#ifdef HAVE_GETREGS
EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
TH_LOG("PTRACE_GETREGS failed");
return -1;
}
#else
struct iovec iov;
iov.iov_base = &regs;
iov.iov_len = sizeof(regs);
......@@ -1258,6 +1271,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
TH_LOG("PTRACE_GETREGSET failed");
return -1;
}
#endif
return regs.SYSCALL_NUM;
}
......@@ -1266,13 +1280,16 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
void change_syscall(struct __test_metadata *_metadata,
pid_t tracee, int syscall)
{
struct iovec iov;
int ret;
ARCH_REGS regs;
#ifdef HAVE_GETREGS
ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);