fs: Better permission checking for submounts

To support unprivileged users mounting filesystems two permission
checks have to be performed: a test to see if the user allowed to
create a mount in the mount namespace, and a test to see if
the user is allowed to access the specified filesystem.

The automount case is special in that mounting the original filesystem
grants permission to mount the sub-filesystems, to any user who
happens to stumble across the their mountpoint and satisfies the
ordinary filesystem permission checks.

Attempting to handle the automount case by using override_creds
almost works.  It preserves the idea that permission to mount
the original filesystem is permission to mount the sub-filesystem.
Unfortunately using override_creds messes up the filesystems
ordinary permission checks.

Solve this by being explicit that a mount is a submount by introducing
vfs_submount, and using it where appropriate.

vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let
sget and friends know that a mount is a submount so they can take appropriate

sget and sget_userns are modified to not perform any permission checks
on submounts.

follow_automount is modified to stop using override_creds as that
has proven problemantic.

do_mount is modified to always remove the new MS_SUBMOUNT flag so
that we know userspace will never by able to specify it.

autofs4 is modified to stop using current_real_cred that was put in
there to handle the previous version of submount permission checking.

cifs is modified to pass the mountpoint all of the way down to vfs_submount.

debugfs is modified to pass the mountpoint all of the way down to
trace_automount by adding a new parameter.  To make this change easier
a new typedef debugfs_automount_t is introduced to capture the type of
the debugfs automount function.

Fixes: 069d5ac9 ("autofs:  Fix automounts by using current_real_cred()->uid")
Fixes: aeaa4a79

 ("fs: Call d_automount with the filesystems creds")
Reviewed-by: default avatarTrond Myklebust <>
Reviewed-by: default avatarSeth Forshee <>
Signed-off-by: default avatar"Eric W. Biederman" <>
......@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
free_page((unsigned long) devname);
......@@ -436,8 +436,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
wq->uid = current_real_cred()->uid;
wq->gid = current_real_cred()->gid;
wq->uid = current_cred()->uid;
wq->gid = current_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
......@@ -245,7 +245,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
* @fullpath: full path in UNC format
* @ref: server's referral
static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
struct cifs_sb_info *cifs_sb,
const char *fullpath, const struct dfs_info3_param *ref)
struct vfsmount *mnt;
......@@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
return mnt;
......@@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = cifs_dfs_do_refmount(cifs_sb,
mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
full_path, referrals + i);
cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
__func__, referrals[i].node_name, mnt);
......@@ -187,9 +187,9 @@ static const struct super_operations debugfs_super_operations = {
static struct vfsmount *debugfs_automount(struct path *path)
struct vfsmount *(*f)(void *);
f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
return f(d_inode(path->dentry)->i_private);
debugfs_automount_t f;
f = (debugfs_automount_t)path->dentry->d_fsdata;
return f(path->dentry, d_inode(path->dentry)->i_private);
static const struct dentry_operations debugfs_dops = {
......@@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
struct vfsmount *(*f)(void *),
debugfs_automount_t f,
void *data)
struct dentry *dentry = start_creating(name, parent);
......@@ -1100,7 +1100,6 @@ static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
struct vfsmount *mnt;
const struct cred *old_cred;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
......@@ -1129,9 +1128,7 @@ static int follow_automount(struct path *path, struct nameidata *nd,
if (nd->total_link_count >= 40)
return -ELOOP;
old_cred = override_creds(&init_cred);
mnt = path->dentry->d_op->d_automount(path);
if (IS_ERR(mnt)) {
* The filesystem is allowed to return -EISDIR here to indicate
......@@ -989,6 +989,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
struct vfsmount *
vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
const char *name, void *data)
/* Until it is worked out how to pass the user namespace
* through from the parent mount to the submount don't support
* unprivileged mounts with submounts.
if (mountpoint->d_sb->s_user_ns != &init_user_ns)
return ERR_PTR(-EPERM);
return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
......@@ -2794,7 +2809,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
......@@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
......@@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
if (!IS_ERR(mnt))
......@@ -469,7 +469,7 @@ struct super_block *sget_userns(struct file_system_type *type,
struct super_block *old;
int err;
if (!(flags & MS_KERNMOUNT) &&
if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
!(type->fs_flags & FS_USERNS_MOUNT) &&
return ERR_PTR(-EPERM);
......@@ -499,7 +499,7 @@ struct super_block *sget_userns(struct file_system_type *type,
if (!s) {
s = alloc_super(type, flags, user_ns);
s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
......@@ -540,8 +540,15 @@ struct super_block *sget(struct file_system_type *type,
struct user_namespace *user_ns = current_user_ns();
/* We don't yet pass the user namespace of the parent
* mount through to here so always use &init_user_ns
* until that changes.
if (flags & MS_SUBMOUNT)
user_ns = &init_user_ns;
/* Ensure the requestor has permissions over the target filesystem */
if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return sget_userns(type, test, set, flags, user_ns, data);
......@@ -97,9 +97,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
const char *dest);
typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
struct vfsmount *(*f)(void *),
debugfs_automount_t f,
void *data);
void debugfs_remove(struct dentry *dentry);
......@@ -90,6 +90,9 @@ struct file_system_type;
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data);
extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
struct file_system_type *type,
const char *name, void *data);
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
......@@ -132,6 +132,7 @@ struct inodes_stat_t {
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
#define MS_SUBMOUNT (1<<26)
#define MS_NOREMOTELOCK (1<<27)
#define MS_NOSEC (1<<28)
#define MS_BORN (1<<29)
......@@ -7503,7 +7503,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
ftrace_init_tracefs(tr, d_tracer);
static struct vfsmount *trace_automount(void *ingore)
static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
struct vfsmount *mnt;
struct file_system_type *type;
......@@ -7516,7 +7516,7 @@ static struct vfsmount *trace_automount(void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
mnt = vfs_submount(mntpt, type, "tracefs", NULL);
if (IS_ERR(mnt))
return NULL;
