Commit 50652963 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull misc VFS updates from Al Viro:
 "This cycle a lot of stuff sits on topical branches, so I'll be sending
  more or less one pull request per branch.

  This is the first pile; more to follow in a few.  In this one are
  several misc commits from early in the cycle (before I went for
  separate branches), plus the rework of mntput/dput ordering on umount,
  switching to use of fs_pin instead of convoluted games in
  namespace_unlock()"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  switch the IO-triggering parts of umount to fs_pin
  new fs_pin killing logics
  allow attaching fs_pin to a group not associated with some superblock
  get rid of the second argument of acct_kill()
  take count and rcu_head out of fs_pin
  dcache: let the dentry count go down to zero without taking d_lock
  pull bumping refcount into ->kill()
  kill pin_put()
  mode_t whack-a-mole: chelsio
  file->f_path.dentry is pinned down for as long as the file is open...
  get rid of lustre_dump_dentry()
  gut proc_register() a bit
  kill d_validate()
  ncpfs: get rid of d_validate() nonsense
  selinuxfs: don't open-code d_genocide()
parents e2b74f23 87b95ce0
......@@ -55,7 +55,7 @@ static const struct file_operations name##_debugfs_fops = { \
struct t4_debugfs_entry {
const char *name;
const struct file_operations *ops;
mode_t mode;
umode_t mode;
unsigned char data;
};
......
......@@ -263,14 +263,6 @@ void ll_invalidate_aliases(struct inode *inode)
dentry, dentry, dentry->d_parent,
dentry->d_inode, dentry->d_flags);
if (unlikely(dentry == dentry->d_sb->s_root)) {
CERROR("%s: called on root dentry=%p, fid="DFID"\n",
ll_get_fsname(dentry->d_sb, NULL, 0),
dentry, PFID(ll_inode2fid(inode)));
lustre_dump_dentry(dentry, 1);
dump_stack();
}
d_lustre_invalidate(dentry, 0);
}
ll_unlock_dcache(inode);
......
......@@ -816,7 +816,6 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry);
void ll_dirty_page_discard_warn(struct page *page, int ioret);
int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
struct super_block *, struct lookup_intent *);
void lustre_dump_dentry(struct dentry *, int recur);
int ll_obd_statfs(struct inode *inode, void *arg);
int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
......
......@@ -665,48 +665,6 @@ int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize)
return rc;
}
static void ll_dump_inode(struct inode *inode)
{
struct ll_d_hlist_node *tmp;
int dentry_count = 0;
LASSERT(inode != NULL);
ll_d_hlist_for_each(tmp, &inode->i_dentry)
dentry_count++;
CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino,
inode->i_mode, atomic_read(&inode->i_count), dentry_count);
}
void lustre_dump_dentry(struct dentry *dentry, int recur)
{
struct list_head *tmp;
int subdirs = 0;
LASSERT(dentry != NULL);
list_for_each(tmp, &dentry->d_subdirs)
subdirs++;
CERROR("dentry %p dump: name=%pd parent=%pd (%p), inode=%p, count=%u, flags=0x%x, fsdata=%p, %d subdirs\n",
dentry, dentry, dentry->d_parent, dentry->d_parent,
dentry->d_inode, d_count(dentry),
dentry->d_flags, dentry->d_fsdata, subdirs);
if (dentry->d_inode != NULL)
ll_dump_inode(dentry->d_inode);
if (recur == 0)
return;
list_for_each(tmp, &dentry->d_subdirs) {
struct dentry *d = list_entry(tmp, struct dentry, d_child);
lustre_dump_dentry(d, recur - 1);
}
}
static void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
......
......@@ -511,7 +511,7 @@ static void __dentry_kill(struct dentry *dentry)
* dentry_iput drops the locks, at which point nobody (except
* transient RCU lookups) can reach this dentry.
*/
BUG_ON((int)dentry->d_lockref.count > 0);
BUG_ON(dentry->d_lockref.count > 0);
this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
......@@ -564,7 +564,7 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
struct dentry *parent = dentry->d_parent;
if (IS_ROOT(dentry))
return NULL;
if (unlikely((int)dentry->d_lockref.count < 0))
if (unlikely(dentry->d_lockref.count < 0))
return NULL;
if (likely(spin_trylock(&parent->d_lock)))
return parent;
......@@ -593,6 +593,110 @@ again:
return parent;
}
/*
* Try to do a lockless dput(), and return whether that was successful.
*
* If unsuccessful, we return false, having already taken the dentry lock.
*
* The caller needs to hold the RCU read lock, so that the dentry is
* guaranteed to stay around even if the refcount goes down to zero!
*/
static inline bool fast_dput(struct dentry *dentry)
{
int ret;
unsigned int d_flags;
/*
* If we have a d_op->d_delete() operation, we sould not
* let the dentry count go to zero, so use "put__or_lock".
*/
if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
return lockref_put_or_lock(&dentry->d_lockref);
/*
* .. otherwise, we can try to just decrement the
* lockref optimistically.
*/
ret = lockref_put_return(&dentry->d_lockref);
/*
* If the lockref_put_return() failed due to the lock being held
* by somebody else, the fast path has failed. We will need to
* get the lock, and then check the count again.
*/
if (unlikely(ret < 0)) {
spin_lock(&dentry->d_lock);
if (dentry->d_lockref.count > 1) {
dentry->d_lockref.count--;
spin_unlock(&dentry->d_lock);
return 1;
}
return 0;
}
/*
* If we weren't the last ref, we're done.
*/
if (ret)
return 1;
/*
* Careful, careful. The reference count went down
* to zero, but we don't hold the dentry lock, so
* somebody else could get it again, and do another
* dput(), and we need to not race with that.
*
* However, there is a very special and common case
* where we don't care, because there is nothing to
* do: the dentry is still hashed, it does not have
* a 'delete' op, and it's referenced and already on
* the LRU list.
*
* NOTE! Since we aren't locked, these values are
* not "stable". However, it is sufficient that at
* some point after we dropped the reference the
* dentry was hashed and the flags had the proper
* value. Other dentry users may have re-gotten
* a reference to the dentry and change that, but
* our work is done - we can leave the dentry
* around with a zero refcount.
*/
smp_rmb();
d_flags = ACCESS_ONCE(dentry->d_flags);
d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST;
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
return 1;
/*
* Not the fast normal case? Get the lock. We've already decremented
* the refcount, but we'll need to re-check the situation after
* getting the lock.
*/
spin_lock(&dentry->d_lock);
/*
* Did somebody else grab a reference to it in the meantime, and
* we're no longer the last user after all? Alternatively, somebody
* else could have killed it and marked it dead. Either way, we
* don't need to do anything else.
*/
if (dentry->d_lockref.count) {
spin_unlock(&dentry->d_lock);
return 1;
}
/*
* Re-get the reference we optimistically dropped. We hold the
* lock, and we just tested that it was zero, so we can just
* set it to 1.
*/
dentry->d_lockref.count = 1;
return 0;
}
/*
* This is dput
*
......@@ -625,8 +729,14 @@ void dput(struct dentry *dentry)
return;
repeat:
if (lockref_put_or_lock(&dentry->d_lockref))
rcu_read_lock();
if (likely(fast_dput(dentry))) {
rcu_read_unlock();
return;
}
/* Slow case: now with the dentry lock held */
rcu_read_unlock();
/* Unreachable? Get rid of it */
if (unlikely(d_unhashed(dentry)))
......@@ -813,7 +923,7 @@ static void shrink_dentry_list(struct list_head *list)
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free it.
*/
if ((int)dentry->d_lockref.count > 0) {
if (dentry->d_lockref.count > 0) {
spin_unlock(&dentry->d_lock);
if (parent)
spin_unlock(&parent->d_lock);
......@@ -2191,37 +2301,6 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
}
EXPORT_SYMBOL(d_hash_and_lookup);
/**
* d_validate - verify dentry provided from insecure source (deprecated)
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
*
* This function is slow for big directories, and deprecated, do not use it.
*/
int d_validate(struct dentry *dentry, struct dentry *dparent)
{
struct dentry *child;
spin_lock(&dparent->d_lock);
list_for_each_entry(child, &dparent->d_subdirs, d_child) {
if (dentry == child) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
__dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dparent->d_lock);
return 1;
}
}
spin_unlock(&dparent->d_lock);
return 0;
}
EXPORT_SYMBOL(d_validate);
/*
* When a file is deleted, we have two options:
* - turn this dentry into a negative dentry
......
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs_pin.h>
#include "internal.h"
#include "mount.h"
static void pin_free_rcu(struct rcu_head *head)
{
kfree(container_of(head, struct fs_pin, rcu));
}
static DEFINE_SPINLOCK(pin_lock);
void pin_put(struct fs_pin *p)
{
if (atomic_long_dec_and_test(&p->count))
call_rcu(&p->rcu, pin_free_rcu);
}
void pin_remove(struct fs_pin *pin)
{
spin_lock(&pin_lock);
hlist_del(&pin->m_list);
hlist_del(&pin->s_list);
spin_unlock(&pin_lock);
spin_lock_irq(&pin->wait.lock);
pin->done = 1;
wake_up_locked(&pin->wait);
spin_unlock_irq(&pin->wait.lock);
}
void pin_insert(struct fs_pin *pin, struct vfsmount *m)
void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
{
spin_lock(&pin_lock);
hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
if (p)
hlist_add_head(&pin->s_list, p);
hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
spin_unlock(&pin_lock);
}
void pin_insert(struct fs_pin *pin, struct vfsmount *m)
{
pin_insert_group(pin, m, &m->mnt_sb->s_pins);
}
void pin_kill(struct fs_pin *p)
{
wait_queue_t wait;
if (!p) {
rcu_read_unlock();
return;
}
init_wait(&wait);
spin_lock_irq(&p->wait.lock);
if (likely(!p->done)) {
p->done = -1;
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
p->kill(p);
return;
}
if (p->done > 0) {
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
return;
}
__add_wait_queue(&p->wait, &wait);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
schedule();
rcu_read_lock();
if (likely(list_empty(&wait.task_list)))
break;
/* OK, we know p couldn't have been freed yet */
spin_lock_irq(&p->wait.lock);
if (p->done > 0) {
spin_unlock_irq(&p->wait.lock);
break;
}
}
rcu_read_unlock();
}
void mnt_pin_kill(struct mount *m)
{
while (1) {
struct hlist_node *p;
struct fs_pin *pin;
rcu_read_lock();
p = ACCESS_ONCE(m->mnt_pins.first);
if (!p) {
rcu_read_unlock();
break;
}
pin = hlist_entry(p, struct fs_pin, m_list);
if (!atomic_long_inc_not_zero(&pin->count)) {
rcu_read_unlock();
cpu_relax();
continue;
}
rcu_read_unlock();
pin->kill(pin);
pin_kill(hlist_entry(p, struct fs_pin, m_list));
}
}
void sb_pin_kill(struct super_block *sb)
void group_pin_kill(struct hlist_head *p)
{
while (1) {
struct hlist_node *p;
struct fs_pin *pin;
struct hlist_node *q;
rcu_read_lock();
p = ACCESS_ONCE(sb->s_pins.first);
if (!p) {
q = ACCESS_ONCE(p->first);
if (!q) {
rcu_read_unlock();
break;
}
pin = hlist_entry(p, struct fs_pin, s_list);
if (!atomic_long_inc_not_zero(&pin->count)) {
rcu_read_unlock();
cpu_relax();
continue;
}
rcu_read_unlock();
pin->kill(pin);
pin_kill(hlist_entry(q, struct fs_pin, s_list));
}
}
......@@ -144,7 +144,7 @@ extern const struct file_operations pipefifo_fops;
/*
* fs_pin.c
*/
extern void sb_pin_kill(struct super_block *sb);
extern void group_pin_kill(struct hlist_head *p);
extern void mnt_pin_kill(struct mount *m);
/*
......
......@@ -2,6 +2,7 @@
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
struct mnt_namespace {
atomic_t count;
......@@ -62,7 +63,8 @@ struct mount {
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
struct path mnt_ex_mountpoint;
struct fs_pin mnt_umount;
struct dentry *mnt_ex_mountpoint;
};
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
......
......@@ -190,6 +190,14 @@ unsigned int mnt_get_count(struct mount *mnt)
#endif
}
static void drop_mountpoint(struct fs_pin *p)
{
struct mount *m = container_of(p, struct mount, mnt_umount);
dput(m->mnt_ex_mountpoint);
pin_remove(p);
mntput(&m->mnt);
}
static struct mount *alloc_vfsmnt(const char *name)
{
struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
......@@ -229,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
}
return mnt;
......@@ -1289,7 +1298,6 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static void namespace_unlock(void)
{
struct mount *mnt;
struct hlist_head head = unmounted;
if (likely(hlist_empty(&head))) {
......@@ -1299,23 +1307,11 @@ static void namespace_unlock(void)
head.first->pprev = &head.first;
INIT_HLIST_HEAD(&unmounted);
/* undo decrements we'd done in umount_tree() */
hlist_for_each_entry(mnt, &head, mnt_hash)
if (mnt->mnt_ex_mountpoint.mnt)
mntget(mnt->mnt_ex_mountpoint.mnt);
up_write(&namespace_sem);
synchronize_rcu();
while (!hlist_empty(&head)) {
mnt = hlist_entry(head.first, struct mount, mnt_hash);
hlist_del_init(&mnt->mnt_hash);
if (mnt->mnt_ex_mountpoint.mnt)
path_put(&mnt->mnt_ex_mountpoint);
mntput(&mnt->mnt);
}
group_pin_kill(&head);
}
static inline void namespace_lock(void)
......@@ -1334,7 +1330,6 @@ void umount_tree(struct mount *mnt, int how)
{
HLIST_HEAD(tmp_list);
struct mount *p;
struct mount *last = NULL;
for (p = mnt; p; p = next_mnt(p, mnt)) {
hlist_del_init_rcu(&p->mnt_hash);
......@@ -1347,33 +1342,28 @@ void umount_tree(struct mount *mnt, int how)
if (how)
propagate_umount(&tmp_list);
hlist_for_each_entry(p, &tmp_list, mnt_hash) {
while (!hlist_empty(&tmp_list)) {
p = hlist_entry(tmp_list.first, struct mount, mnt_hash);
hlist_del_init_rcu(&p->mnt_hash);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
if (how < 2)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted);
if (mnt_has_parent(p)) {
hlist_del_init(&p->mnt_mp_list);
put_mountpoint(p->mnt_mp);
mnt_add_count(p->mnt_parent, -1);
/* move the reference to mountpoint into ->mnt_ex_mountpoint */
p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
/* old mountpoint will be dropped when we can do that */
p->mnt_ex_mountpoint = p->mnt_mountpoint;
p->mnt_mountpoint = p->mnt.mnt_root;
p->mnt_parent = p;
p->mnt_mp = NULL;
}
change_mnt_propagation(p, MS_PRIVATE);
last = p;
}
if (last) {
last->mnt_hash.next = unmounted.first;
if (unmounted.first)
unmounted.first->pprev = &last->mnt_hash.next;
unmounted.first = tmp_list.first;
unmounted.first->pprev = &unmounted.first;
}
}
......
......@@ -77,6 +77,7 @@ static int ncp_hash_dentry(const struct dentry *, struct qstr *);
static int ncp_compare_dentry(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
static int ncp_delete_dentry(const struct dentry *);
static void ncp_d_prune(struct dentry *dentry);
const struct dentry_operations ncp_dentry_operations =
{
......@@ -84,6 +85,7 @@ const struct dentry_operations ncp_dentry_operations =
.d_hash = ncp_hash_dentry,
.d_compare = ncp_compare_dentry,
.d_delete = ncp_delete_dentry,
.d_prune = ncp_d_prune,
};
#define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber])
......@@ -384,42 +386,6 @@ finished:
return val;
}
static struct dentry *
ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
{
struct dentry *dent = dentry;
if (d_validate(dent, parent)) {
if (dent->d_name.len <= NCP_MAXPATHLEN &&
(unsigned long)dent->d_fsdata == fpos) {
if (!dent->d_inode) {
dput(dent);
dent = NULL;
}
return dent;
}
dput(dent);
}
/* If a pointer is invalid, we search the dentry. */
spin_lock(&parent->d_lock);
list_for_each_entry(dent, &parent->d_subdirs, d_child) {
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
dget(dent);
else
dent = NULL;
spin_unlock(&parent->d_lock);
goto out;
}
}
spin_unlock(&parent->d_lock);
return NULL;
out:
return dent;
}
static time_t ncp_obtain_mtime(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
......@@ -435,6 +401,20 @@ static time_t ncp_obtain_mtime(struct dentry *dentry)
return ncp_date_dos2unix(i.modifyTime, i.modifyDate);
}
static inline void
ncp_invalidate_dircache_entries(struct dentry *parent)
{
struct ncp_server *server = NCP_SERVER(parent->d_inode);
struct dentry *dentry;
spin_lock(&parent->d_lock);
list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
dentry->d_fsdata = NULL;
ncp_age_dentry(server, dentry);
}
spin_unlock(&parent->d_lock);
}
static int ncp_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
......@@ -500,10 +480,21 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
struct dentry *dent;
bool over;