dcache.c 88.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * fs/dcache.c
 *
 * Complete reimplementation
 * (C) 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

/*
 * Notes on the allocation strategy:
 *
 * The dcache is a master of the icache - whenever a dcache entry
 * exists, the inode will always exist. "iput()" is done either when
 * the dcache entry is deleted or garbage collected.
 */

#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
21
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/cache.h>
26
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29
30
31
32
33
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
34
#include <linux/fs_struct.h>
35
#include <linux/hardirq.h>
36
37
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
38
#include <linux/prefetch.h>
39
#include <linux/ratelimit.h>
40
#include <linux/list_lru.h>
41
42
#include <linux/kasan.h>

43
#include "internal.h"
44
#include "mount.h"
Linus Torvalds's avatar
Linus Torvalds committed
45

Nick Piggin's avatar
Nick Piggin committed
46
47
/*
 * Usage:
48
 * dcache->d_inode->i_lock protects:
49
 *   - i_dentry, d_u.d_alias, d_inode of aliases
50
51
52
53
 * dcache_hash_bucket lock protects:
 *   - the dcache hash table
 * s_anon bl list spinlock protects:
 *   - the s_anon list (see __d_drop)
54
 * dentry->d_sb->s_dentry_lru_lock protects:
Nick Piggin's avatar
Nick Piggin committed
55
56
57
58
59
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
 *   - d_name
 *   - d_lru
Nick Piggin's avatar
Nick Piggin committed
60
 *   - d_count
Nick Piggin's avatar
Nick Piggin committed
61
 *   - d_unhashed()
Nick Piggin's avatar
Nick Piggin committed
62
63
 *   - d_parent and d_subdirs
 *   - childrens' d_child and d_parent
64
 *   - d_u.d_alias, d_inode
Nick Piggin's avatar
Nick Piggin committed
65
66
 *
 * Ordering:
67
 * dentry->d_inode->i_lock
Nick Piggin's avatar
Nick Piggin committed
68
 *   dentry->d_lock
69
 *     dentry->d_sb->s_dentry_lru_lock
70
71
 *     dcache_hash_bucket lock
 *     s_anon lock
Nick Piggin's avatar
Nick Piggin committed
72
 *
Nick Piggin's avatar
Nick Piggin committed
73
74
75
76
77
78
79
 * If there is an ancestor relationship:
 * dentry->d_parent->...->d_parent->d_lock
 *   ...
 *     dentry->d_parent->d_lock
 *       dentry->d_lock
 *
 * If no ancestor relationship:
Nick Piggin's avatar
Nick Piggin committed
80
81
82
83
 * if (dentry1 < dentry2)
 *   dentry1->d_lock
 *     dentry2->d_lock
 */
84
int sysctl_vfs_cache_pressure __read_mostly = 100;
Linus Torvalds's avatar
Linus Torvalds committed
85
86
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

Al Viro's avatar
Al Viro committed
87
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
88

89
EXPORT_SYMBOL(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
90

91
static struct kmem_cache *dentry_cache __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
92
93
94
95
96
97
98
99
100
101

/*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
 * to make this good - I've just made it work.
 *
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */

102
103
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
104

105
static struct hlist_bl_head *dentry_hashtable __read_mostly;
106

107
static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108
					unsigned int hash)
109
{
110
	hash += (unsigned long) parent / L1_CACHE_BYTES;
111
	return dentry_hashtable + hash_32(hash, d_hash_shift);
112
113
}

Linus Torvalds's avatar
Linus Torvalds committed
114
115
116
117
118
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
	.age_limit = 45,
};

119
static DEFINE_PER_CPU(long, nr_dentry);
120
static DEFINE_PER_CPU(long, nr_dentry_unused);
121
122

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
123
124
125
126
127
128
129
130
131
132
133
134
135

/*
 * Here we resort to our own counters instead of using generic per-cpu counters
 * for consistency with what the vfs inode code does. We are expected to harvest
 * better code and performance by having our own specialized counters.
 *
 * Please note that the loop is done over all possible CPUs, not over all online
 * CPUs. The reason for this is that we don't want to play games with CPUs going
 * on and off. If one of them goes off, we will just keep their counters.
 *
 * glommer: See cffbc8a for details, and if you ever intend to change this,
 * please update all vfs counters to match.
 */
136
static long get_nr_dentry(void)
137
138
{
	int i;
139
	long sum = 0;
140
141
142
143
144
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry, i);
	return sum < 0 ? 0 : sum;
}

145
146
147
148
149
150
151
152
153
static long get_nr_dentry_unused(void)
{
	int i;
	long sum = 0;
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry_unused, i);
	return sum < 0 ? 0 : sum;
}

154
int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
155
156
		   size_t *lenp, loff_t *ppos)
{
157
	dentry_stat.nr_dentry = get_nr_dentry();
158
	dentry_stat.nr_unused = get_nr_dentry_unused();
159
	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
160
161
162
}
#endif

163
164
165
166
/*
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
167
168
169
170
171
172
173
174
175
176
177
178
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 * aligned allocation for this particular component. We don't
 * strictly need the load_unaligned_zeropad() safety, but it
 * doesn't hurt either.
 *
 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 * need the careful unaligned handling.
 */
179
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
180
{
181
182
183
	unsigned long a,b,mask;

	for (;;) {
184
		a = *(unsigned long *)cs;
185
		b = load_unaligned_zeropad(ct);
186
187
188
189
190
191
192
193
194
195
		if (tcount < sizeof(unsigned long))
			break;
		if (unlikely(a != b))
			return 1;
		cs += sizeof(unsigned long);
		ct += sizeof(unsigned long);
		tcount -= sizeof(unsigned long);
		if (!tcount)
			return 0;
	}
196
	mask = bytemask_from_count(tcount);
197
	return unlikely(!!((a ^ b) & mask));
198
199
}

200
#else
201

202
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
203
{
204
205
206
207
208
209
210
211
212
213
	do {
		if (*cs != *ct)
			return 1;
		cs++;
		ct++;
		tcount--;
	} while (tcount);
	return 0;
}

214
215
#endif

216
217
static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
218
	const unsigned char *cs;
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
	/*
	 * Be careful about RCU walk racing with rename:
	 * use ACCESS_ONCE to fetch the name pointer.
	 *
	 * NOTE! Even if a rename will mean that the length
	 * was not loaded atomically, we don't care. The
	 * RCU walk will check the sequence count eventually,
	 * and catch it. And we won't overrun the buffer,
	 * because we're reading the name pointer atomically,
	 * and a dentry name is guaranteed to be properly
	 * terminated with a NUL byte.
	 *
	 * End result: even if 'len' is wrong, we'll exit
	 * early because the data cannot match (there can
	 * be no NUL in the ct/tcount data)
	 */
235
236
237
	cs = ACCESS_ONCE(dentry->d_name.name);
	smp_read_barrier_depends();
	return dentry_string_cmp(cs, ct, tcount);
238
239
}

240
241
242
243
244
245
246
247
248
249
250
251
252
struct external_name {
	union {
		atomic_t count;
		struct rcu_head head;
	} u;
	unsigned char name[];
};

static inline struct external_name *external_name(struct dentry *dentry)
{
	return container_of(dentry->d_name.name, struct external_name, name[0]);
}

Christoph Hellwig's avatar
Christoph Hellwig committed
253
static void __d_free(struct rcu_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
254
{
Christoph Hellwig's avatar
Christoph Hellwig committed
255
256
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);

257
258
259
260
261
262
263
	kmem_cache_free(dentry_cache, dentry); 
}

static void __d_free_external(struct rcu_head *head)
{
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
	kfree(external_name(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
264
265
266
	kmem_cache_free(dentry_cache, dentry); 
}

267
268
269
270
271
static inline int dname_external(const struct dentry *dentry)
{
	return dentry->d_name.name != dentry->d_iname;
}

Al Viro's avatar
Al Viro committed
272
273
static void dentry_free(struct dentry *dentry)
{
274
	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
275
276
277
278
279
280
281
	if (unlikely(dname_external(dentry))) {
		struct external_name *p = external_name(dentry);
		if (likely(atomic_dec_and_test(&p->u.count))) {
			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
			return;
		}
	}
Al Viro's avatar
Al Viro committed
282
283
284
285
286
287
288
	/* if dentry was never visible to RCU, immediate free is OK */
	if (!(dentry->d_flags & DCACHE_RCUACCESS))
		__d_free(&dentry->d_u.d_rcu);
	else
		call_rcu(&dentry->d_u.d_rcu, __d_free);
}

Nick Piggin's avatar
Nick Piggin committed
289
290
/**
 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
291
 * @dentry: the target dentry
Nick Piggin's avatar
Nick Piggin committed
292
293
294
295
296
297
298
299
300
301
302
 * After this call, in-progress rcu-walk path lookup will fail. This
 * should be called after unhashing, and after changing d_inode (if
 * the dentry has not already been unhashed).
 */
static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
{
	assert_spin_locked(&dentry->d_lock);
	/* Go through a barrier */
	write_seqcount_barrier(&dentry->d_seq);
}

Linus Torvalds's avatar
Linus Torvalds committed
303
304
/*
 * Release the dentry's inode, using the filesystem
Nick Piggin's avatar
Nick Piggin committed
305
306
 * d_iput() operation if defined. Dentry has no refcount
 * and is unhashed.
Linus Torvalds's avatar
Linus Torvalds committed
307
 */
308
static void dentry_iput(struct dentry * dentry)
309
	__releases(dentry->d_lock)
310
	__releases(dentry->d_inode->i_lock)
Linus Torvalds's avatar
Linus Torvalds committed
311
312
313
314
{
	struct inode *inode = dentry->d_inode;
	if (inode) {
		dentry->d_inode = NULL;
315
		hlist_del_init(&dentry->d_u.d_alias);
Linus Torvalds's avatar
Linus Torvalds committed
316
		spin_unlock(&dentry->d_lock);
317
		spin_unlock(&inode->i_lock);
318
319
		if (!inode->i_nlink)
			fsnotify_inoderemove(inode);
Linus Torvalds's avatar
Linus Torvalds committed
320
321
322
323
324
325
326
327
328
		if (dentry->d_op && dentry->d_op->d_iput)
			dentry->d_op->d_iput(dentry, inode);
		else
			iput(inode);
	} else {
		spin_unlock(&dentry->d_lock);
	}
}

Nick Piggin's avatar
Nick Piggin committed
329
330
331
332
333
334
/*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined. dentry remains in-use.
 */
static void dentry_unlink_inode(struct dentry * dentry)
	__releases(dentry->d_lock)
335
	__releases(dentry->d_inode->i_lock)
Nick Piggin's avatar
Nick Piggin committed
336
337
{
	struct inode *inode = dentry->d_inode;
338
	__d_clear_type(dentry);
Nick Piggin's avatar
Nick Piggin committed
339
	dentry->d_inode = NULL;
340
	hlist_del_init(&dentry->d_u.d_alias);
Nick Piggin's avatar
Nick Piggin committed
341
342
	dentry_rcuwalk_barrier(dentry);
	spin_unlock(&dentry->d_lock);
343
	spin_unlock(&inode->i_lock);
Nick Piggin's avatar
Nick Piggin committed
344
345
346
347
348
349
350
351
	if (!inode->i_nlink)
		fsnotify_inoderemove(inode);
	if (dentry->d_op && dentry->d_op->d_iput)
		dentry->d_op->d_iput(dentry, inode);
	else
		iput(inode);
}

352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
/*
 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
 * is in use - which includes both the "real" per-superblock
 * LRU list _and_ the DCACHE_SHRINK_LIST use.
 *
 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
 * on the shrink list (ie not on the superblock LRU list).
 *
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
static void d_lru_add(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, 0);
	dentry->d_flags |= DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_lru_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_shrink_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	list_del_init(&dentry->d_lru);
	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	this_cpu_dec(nr_dentry_unused);
}

static void d_shrink_add(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, 0);
	list_add(&dentry->d_lru, list);
	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
}

/*
 * These can only be called under the global LRU lock, ie during the
 * callback for freeing the LRU list. "isolate" removes it from the
 * LRU lists entirely, while shrink_move moves it to the indicated
 * private list.
 */
405
static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
406
407
408
409
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
410
	list_lru_isolate(lru, &dentry->d_lru);
411
412
}

413
414
static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
			      struct list_head *list)
415
416
417
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags |= DCACHE_SHRINK_LIST;
418
	list_lru_isolate_move(lru, &dentry->d_lru, list);
419
420
}

421
/*
422
 * dentry_lru_(add|del)_list) must be called with d_lock held.
423
424
425
 */
static void dentry_lru_add(struct dentry *dentry)
{
426
427
	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
		d_lru_add(dentry);
428
429
}

Nick Piggin's avatar
Nick Piggin committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 * be found through a VFS lookup any more. Note that this is different from
 * deleting the dentry - d_delete will try to mark the dentry negative if
 * possible, giving a successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 * reason (NFS timeouts or autofs deletes).
 *
 * __d_drop requires dentry->d_lock.
 */
void __d_drop(struct dentry *dentry)
{
447
	if (!d_unhashed(dentry)) {
448
		struct hlist_bl_head *b;
449
450
451
452
453
454
		/*
		 * Hashed dentries are normally on the dentry hashtable,
		 * with the exception of those newly allocated by
		 * d_obtain_alias, which are always IS_ROOT:
		 */
		if (unlikely(IS_ROOT(dentry)))
455
456
457
458
459
460
461
462
			b = &dentry->d_sb->s_anon;
		else
			b = d_hash(dentry->d_parent, dentry->d_name.hash);

		hlist_bl_lock(b);
		__hlist_bl_del(&dentry->d_hash);
		dentry->d_hash.pprev = NULL;
		hlist_bl_unlock(b);
463
		dentry_rcuwalk_barrier(dentry);
Nick Piggin's avatar
Nick Piggin committed
464
465
466
467
468
469
470
471
472
473
474
475
	}
}
EXPORT_SYMBOL(__d_drop);

void d_drop(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_drop);

Al Viro's avatar
Al Viro committed
476
static void __dentry_kill(struct dentry *dentry)
477
{
478
479
480
	struct dentry *parent = NULL;
	bool can_free = true;
	if (!IS_ROOT(dentry))
481
		parent = dentry->d_parent;
Nick Piggin's avatar
Nick Piggin committed
482

483
484
485
486
487
	/*
	 * The dentry is now unrecoverably dead to the world.
	 */
	lockref_mark_dead(&dentry->d_lockref);

Sage Weil's avatar
Sage Weil committed
488
489
490
491
	/*
	 * inform the fs via d_prune that this dentry is about to be
	 * unhashed and destroyed.
	 */
492
	if (dentry->d_flags & DCACHE_OP_PRUNE)
Yan, Zheng's avatar
Yan, Zheng committed
493
494
		dentry->d_op->d_prune(dentry);

495
496
497
498
	if (dentry->d_flags & DCACHE_LRU_LIST) {
		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
			d_lru_del(dentry);
	}
499
500
	/* if it was on the hash then remove it */
	__d_drop(dentry);
Al Viro's avatar
Al Viro committed
501
	__list_del_entry(&dentry->d_child);
Al Viro's avatar
Al Viro committed
502
503
504
505
506
507
508
509
510
511
512
513
	/*
	 * Inform d_walk() that we are no longer attached to the
	 * dentry tree
	 */
	dentry->d_flags |= DCACHE_DENTRY_KILLED;
	if (parent)
		spin_unlock(&parent->d_lock);
	dentry_iput(dentry);
	/*
	 * dentry_iput drops the locks, at which point nobody (except
	 * transient RCU lookups) can reach this dentry.
	 */
514
	BUG_ON(dentry->d_lockref.count > 0);
Al Viro's avatar
Al Viro committed
515
516
517
518
	this_cpu_dec(nr_dentry);
	if (dentry->d_op && dentry->d_op->d_release)
		dentry->d_op->d_release(dentry);

519
520
521
522
523
524
525
526
	spin_lock(&dentry->d_lock);
	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
		dentry->d_flags |= DCACHE_MAY_FREE;
		can_free = false;
	}
	spin_unlock(&dentry->d_lock);
	if (likely(can_free))
		dentry_free(dentry);
Al Viro's avatar
Al Viro committed
527
528
529
530
531
532
533
534
}

/*
 * Finish off a dentry we've decided to kill.
 * dentry->d_lock must be held, returns with it unlocked.
 * If ref is non-zero, then decrement the refcount too.
 * Returns dentry requiring refcount drop, or NULL if we're done.
 */
535
static struct dentry *dentry_kill(struct dentry *dentry)
Al Viro's avatar
Al Viro committed
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
	__releases(dentry->d_lock)
{
	struct inode *inode = dentry->d_inode;
	struct dentry *parent = NULL;

	if (inode && unlikely(!spin_trylock(&inode->i_lock)))
		goto failed;

	if (!IS_ROOT(dentry)) {
		parent = dentry->d_parent;
		if (unlikely(!spin_trylock(&parent->d_lock))) {
			if (inode)
				spin_unlock(&inode->i_lock);
			goto failed;
		}
	}

	__dentry_kill(dentry);
Al Viro's avatar
Al Viro committed
554
	return parent;
Al Viro's avatar
Al Viro committed
555
556

failed:
557
558
	spin_unlock(&dentry->d_lock);
	cpu_relax();
Al Viro's avatar
Al Viro committed
559
	return dentry; /* try again with same dentry */
560
561
}

562
563
564
565
566
static inline struct dentry *lock_parent(struct dentry *dentry)
{
	struct dentry *parent = dentry->d_parent;
	if (IS_ROOT(dentry))
		return NULL;
567
	if (unlikely(dentry->d_lockref.count < 0))
568
		return NULL;
569
570
571
	if (likely(spin_trylock(&parent->d_lock)))
		return parent;
	rcu_read_lock();
572
	spin_unlock(&dentry->d_lock);
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
again:
	parent = ACCESS_ONCE(dentry->d_parent);
	spin_lock(&parent->d_lock);
	/*
	 * We can't blindly lock dentry until we are sure
	 * that we won't violate the locking order.
	 * Any changes of dentry->d_parent must have
	 * been done with parent->d_lock held, so
	 * spin_lock() above is enough of a barrier
	 * for checking if it's still our child.
	 */
	if (unlikely(parent != dentry->d_parent)) {
		spin_unlock(&parent->d_lock);
		goto again;
	}
	rcu_read_unlock();
	if (parent != dentry)
590
		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
591
592
593
594
595
	else
		parent = NULL;
	return parent;
}

596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
/*
 * Try to do a lockless dput(), and return whether that was successful.
 *
 * If unsuccessful, we return false, having already taken the dentry lock.
 *
 * The caller needs to hold the RCU read lock, so that the dentry is
 * guaranteed to stay around even if the refcount goes down to zero!
 */
static inline bool fast_dput(struct dentry *dentry)
{
	int ret;
	unsigned int d_flags;

	/*
	 * If we have a d_op->d_delete() operation, we sould not
	 * let the dentry count go to zero, so use "put__or_lock".
	 */
	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
		return lockref_put_or_lock(&dentry->d_lockref);

	/*
	 * .. otherwise, we can try to just decrement the
	 * lockref optimistically.
	 */
	ret = lockref_put_return(&dentry->d_lockref);

	/*
	 * If the lockref_put_return() failed due to the lock being held
	 * by somebody else, the fast path has failed. We will need to
	 * get the lock, and then check the count again.
	 */
	if (unlikely(ret < 0)) {
		spin_lock(&dentry->d_lock);
		if (dentry->d_lockref.count > 1) {
			dentry->d_lockref.count--;
			spin_unlock(&dentry->d_lock);
			return 1;
		}
		return 0;
	}

	/*
	 * If we weren't the last ref, we're done.
	 */
	if (ret)
		return 1;

	/*
	 * Careful, careful. The reference count went down
	 * to zero, but we don't hold the dentry lock, so
	 * somebody else could get it again, and do another
	 * dput(), and we need to not race with that.
	 *
	 * However, there is a very special and common case
	 * where we don't care, because there is nothing to
	 * do: the dentry is still hashed, it does not have
	 * a 'delete' op, and it's referenced and already on
	 * the LRU list.
	 *
	 * NOTE! Since we aren't locked, these values are
	 * not "stable". However, it is sufficient that at
	 * some point after we dropped the reference the
	 * dentry was hashed and the flags had the proper
	 * value. Other dentry users may have re-gotten
	 * a reference to the dentry and change that, but
	 * our work is done - we can leave the dentry
	 * around with a zero refcount.
	 */
	smp_rmb();
	d_flags = ACCESS_ONCE(dentry->d_flags);
	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST;

	/* Nothing to do? Dropping the reference was all we needed? */
	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
		return 1;

	/*
	 * Not the fast normal case? Get the lock. We've already decremented
	 * the refcount, but we'll need to re-check the situation after
	 * getting the lock.
	 */
	spin_lock(&dentry->d_lock);

	/*
	 * Did somebody else grab a reference to it in the meantime, and
	 * we're no longer the last user after all? Alternatively, somebody
	 * else could have killed it and marked it dead. Either way, we
	 * don't need to do anything else.
	 */
	if (dentry->d_lockref.count) {
		spin_unlock(&dentry->d_lock);
		return 1;
	}

	/*
	 * Re-get the reference we optimistically dropped. We hold the
	 * lock, and we just tested that it was zero, so we can just
	 * set it to 1.
	 */
	dentry->d_lockref.count = 1;
	return 0;
}


Linus Torvalds's avatar
Linus Torvalds committed
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
/* 
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release 
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 */
void dput(struct dentry *dentry)
{
728
	if (unlikely(!dentry))
Linus Torvalds's avatar
Linus Torvalds committed
729
730
731
		return;

repeat:
732
733
734
	rcu_read_lock();
	if (likely(fast_dput(dentry))) {
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
735
		return;
736
737
738
739
	}

	/* Slow case: now with the dentry lock held */
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
740

741
742
743
744
745
	/* Unreachable? Get rid of it */
	if (unlikely(d_unhashed(dentry)))
		goto kill_it;

	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
Linus Torvalds's avatar
Linus Torvalds committed
746
		if (dentry->d_op->d_delete(dentry))
Nick Piggin's avatar
Nick Piggin committed
747
			goto kill_it;
Linus Torvalds's avatar
Linus Torvalds committed
748
	}
749

750
751
	if (!(dentry->d_flags & DCACHE_REFERENCED))
		dentry->d_flags |= DCACHE_REFERENCED;
752
	dentry_lru_add(dentry);
753

754
	dentry->d_lockref.count--;
Nick Piggin's avatar
Nick Piggin committed
755
	spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
756
757
	return;

758
kill_it:
759
	dentry = dentry_kill(dentry);
760
761
	if (dentry)
		goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
762
}
763
EXPORT_SYMBOL(dput);
Linus Torvalds's avatar
Linus Torvalds committed
764
765


Nick Piggin's avatar
Nick Piggin committed
766
/* This must be called with d_lock held */
767
static inline void __dget_dlock(struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
768
{
769
	dentry->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
770
771
}

772
static inline void __dget(struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
773
{
774
	lockref_get(&dentry->d_lockref);
Linus Torvalds's avatar
Linus Torvalds committed
775
776
}

Nick Piggin's avatar
Nick Piggin committed
777
778
struct dentry *dget_parent(struct dentry *dentry)
{
779
	int gotref;
Nick Piggin's avatar
Nick Piggin committed
780
781
	struct dentry *ret;

782
783
784
785
786
787
788
789
790
791
792
793
794
795
	/*
	 * Do optimistic parent lookup without any
	 * locking.
	 */
	rcu_read_lock();
	ret = ACCESS_ONCE(dentry->d_parent);
	gotref = lockref_get_not_zero(&ret->d_lockref);
	rcu_read_unlock();
	if (likely(gotref)) {
		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
			return ret;
		dput(ret);
	}

Nick Piggin's avatar
Nick Piggin committed
796
repeat:
797
798
799
800
801
	/*
	 * Don't need rcu_dereference because we re-check it was correct under
	 * the lock.
	 */
	rcu_read_lock();
Nick Piggin's avatar
Nick Piggin committed
802
	ret = dentry->d_parent;
803
804
805
806
	spin_lock(&ret->d_lock);
	if (unlikely(ret != dentry->d_parent)) {
		spin_unlock(&ret->d_lock);
		rcu_read_unlock();
Nick Piggin's avatar
Nick Piggin committed
807
808
		goto repeat;
	}
809
	rcu_read_unlock();
810
811
	BUG_ON(!ret->d_lockref.count);
	ret->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
812
813
814
815
816
	spin_unlock(&ret->d_lock);
	return ret;
}
EXPORT_SYMBOL(dget_parent);

Linus Torvalds's avatar
Linus Torvalds committed
817
818
819
820
821
822
823
824
/**
 * d_find_alias - grab a hashed alias of inode
 * @inode: inode in question
 *
 * If inode has a hashed alias, or is a directory and has any alias,
 * acquire the reference to alias and return it. Otherwise return NULL.
 * Notice that if inode is a directory there can be only one alias and
 * it can be unhashed only if it has no children, or if it is the root
825
826
 * of a filesystem, or if the directory was renamed and d_revalidate
 * was the first vfs operation to notice.
Linus Torvalds's avatar
Linus Torvalds committed
827
 *
828
 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
829
 * any other hashed alias over that one.
Linus Torvalds's avatar
Linus Torvalds committed
830
 */
831
static struct dentry *__d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
832
{
Nick Piggin's avatar
Nick Piggin committed
833
	struct dentry *alias, *discon_alias;
Linus Torvalds's avatar
Linus Torvalds committed
834

Nick Piggin's avatar
Nick Piggin committed
835
836
again:
	discon_alias = NULL;
837
	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
Nick Piggin's avatar
Nick Piggin committed
838
		spin_lock(&alias->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
839
 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
840
			if (IS_ROOT(alias) &&
Nick Piggin's avatar
Nick Piggin committed
841
			    (alias->d_flags & DCACHE_DISCONNECTED)) {
Linus Torvalds's avatar
Linus Torvalds committed
842
				discon_alias = alias;
843
			} else {
844
				__dget_dlock(alias);
Nick Piggin's avatar
Nick Piggin committed
845
846
847
848
849
850
851
852
853
854
				spin_unlock(&alias->d_lock);
				return alias;
			}
		}
		spin_unlock(&alias->d_lock);
	}
	if (discon_alias) {
		alias = discon_alias;
		spin_lock(&alias->d_lock);
		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
855
856
857
			__dget_dlock(alias);
			spin_unlock(&alias->d_lock);
			return alias;
Linus Torvalds's avatar
Linus Torvalds committed
858
		}
Nick Piggin's avatar
Nick Piggin committed
859
860
		spin_unlock(&alias->d_lock);
		goto again;
Linus Torvalds's avatar
Linus Torvalds committed
861
	}
Nick Piggin's avatar
Nick Piggin committed
862
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
863
864
}

Nick Piggin's avatar
Nick Piggin committed
865
struct dentry *d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
866
{
867
868
	struct dentry *de = NULL;

869
	if (!hlist_empty(&inode->i_dentry)) {
870
		spin_lock(&inode->i_lock);
871
		de = __d_find_alias(inode);
872
		spin_unlock(&inode->i_lock);
873
	}
Linus Torvalds's avatar
Linus Torvalds committed
874
875
	return de;
}
876
EXPORT_SYMBOL(d_find_alias);
Linus Torvalds's avatar
Linus Torvalds committed
877
878
879
880
881
882
883

/*
 *	Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
 */
void d_prune_aliases(struct inode *inode)
{
884
	struct dentry *dentry;
Linus Torvalds's avatar
Linus Torvalds committed
885
restart:
886
	spin_lock(&inode->i_lock);
887
	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
Linus Torvalds's avatar
Linus Torvalds committed
888
		spin_lock(&dentry->d_lock);
889
		if (!dentry->d_lockref.count) {
890
891
892
			struct dentry *parent = lock_parent(dentry);
			if (likely(!dentry->d_lockref.count)) {
				__dentry_kill(dentry);
893
				dput(parent);
894
895
896
897
				goto restart;
			}
			if (parent)
				spin_unlock(&parent->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
898
899
900
		}
		spin_unlock(&dentry->d_lock);
	}
901
	spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
902
}
903
EXPORT_SYMBOL(d_prune_aliases);
Linus Torvalds's avatar
Linus Torvalds committed
904

905
static void shrink_dentry_list(struct list_head *list)
Linus Torvalds's avatar
Linus Torvalds committed
906
{
Al Viro's avatar
Al Viro committed
907
	struct dentry *dentry, *parent;
908

909
	while (!list_empty(list)) {
910
		struct inode *inode;
911
		dentry = list_entry(list->prev, struct dentry, d_lru);
912
		spin_lock(&dentry->d_lock);
913
914
		parent = lock_parent(dentry);

915
916
917
918
919
		/*
		 * The dispose list is isolated and dentries are not accounted
		 * to the LRU here, so we can simply remove it from the list
		 * here regardless of whether it is referenced or not.
		 */
920
		d_shrink_del(dentry);
921

Linus Torvalds's avatar
Linus Torvalds committed
922
923
		/*
		 * We found an inuse dentry which was not removed from
924
		 * the LRU because of laziness during lookup. Do not free it.
Linus Torvalds's avatar
Linus Torvalds committed
925
		 */
926
		if (dentry->d_lockref.count > 0) {
927
			spin_unlock(&dentry->d_lock);
928
929
			if (parent)
				spin_unlock(&parent->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
930
931
			continue;
		}
932

933
934
935
936

		if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
			bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
			spin_unlock(&dentry->d_lock);
937
938
			if (parent)
				spin_unlock(&parent->d_lock);
939
940
941
942
943
			if (can_free)
				dentry_free(dentry);
			continue;
		}

944
945
		inode = dentry->d_inode;
		if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
946
			d_shrink_add(dentry, list);
947
			spin_unlock(&dentry->d_lock);
948
949
			if (parent)
				spin_unlock(&parent->d_lock);
Al Viro's avatar
Al Viro committed
950
			continue;
951
		}
952
953

		__dentry_kill(dentry);
954

Al Viro's avatar
Al Viro committed
955
956
957
958
959
960
961
		/*
		 * We need to prune ancestors too. This is necessary to prevent
		 * quadratic behavior of shrink_dcache_parent(), but is also
		 * expected to be beneficial in reducing dentry cache
		 * fragmentation.
		 */
		dentry = parent;
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
		while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
			parent = lock_parent(dentry);
			if (dentry->d_lockref.count != 1) {
				dentry->d_lockref.count--;
				spin_unlock(&dentry->d_lock);
				if (parent)
					spin_unlock(&parent->d_lock);
				break;
			}
			inode = dentry->d_inode;	/* can't be NULL */
			if (unlikely(!spin_trylock(&inode->i_lock))) {
				spin_unlock(&dentry->d_lock);
				if (parent)
					spin_unlock(&parent->d_lock);
				cpu_relax();
				continue;
			}
			__dentry_kill(dentry);
			dentry = parent;
		}
982
	}
983
984
}

985
986
static enum lru_status dentry_lru_isolate(struct list_head *item,
		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
{
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);


	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

	/*
	 * Referenced dentries are still in use. If they have active
	 * counts, just remove them from the LRU. Otherwise give them
	 * another pass through the LRU.
	 */
	if (dentry->d_lockref.count) {
1006
		d_lru_isolate(lru, dentry);
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
		spin_unlock(&dentry->d_lock);
		return LRU_REMOVED;
	}

	if (dentry->d_flags & DCACHE_REFERENCED) {
		dentry->d_flags &= ~DCACHE_REFERENCED;
		spin_unlock(&dentry->d_lock);

		/*
		 * The list move itself will be made by the common LRU code. At
		 * this point, we've dropped the dentry->d_lock but keep the
		 * lru lock. This is safe to do, since every list movement is
		 * protected by the lru lock even if both locks are held.
		 *
		 * This is guaranteed by the fact that all LRU management
		 * functions are intermediated by the LRU API calls like
		 * list_lru_add and list_lru_del. List movement in this file
		 * only ever occur through this functions or through callbacks
		 * like this one, that are called from the LRU API.
		 *
		 * The only exceptions to this are functions like
		 * shrink_dentry_list, and code that first checks for the
		 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
		 * operating only with stack provided lists after they are
		 * properly isolated from the main list.  It is thus, always a
		 * local access.
		 */
		return LRU_ROTATE;
	}

1037
	d_lru_shrink_move(lru, dentry, freeable);
1038
1039
1040
1041
1042
	spin_unlock(&dentry->d_lock);

	return LRU_REMOVED;
}

1043
/**
1044
1045
 * prune_dcache_sb - shrink the dcache
 * @sb: superblock
1046
 * @sc: shrink control, passed to list_lru_shrink_walk()
1047
 *
1048
1049
 * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
 * is done when we need more memory and called from the superblock shrinker
1050
 * function.
1051
 *
1052
1053
 * This function may fail to free any resources if all the dentries are in
 * use.
1054
 */
1055
long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
1056
{
1057
1058
	LIST_HEAD(dispose);
	long freed;
1059

1060
1061
	freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
				     dentry_lru_isolate, &dispose);
1062
	shrink_dentry_list(&dispose);
1063
	return freed;
1064
}
Nick Piggin's avatar
Nick Piggin committed
1065

1066
static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1067
		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1068
{
1069
1070
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);
1071

1072
1073
1074
1075
1076
1077
1078
1079
	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

1080
	d_lru_shrink_move(lru, dentry, freeable);
1081
	spin_unlock(&dentry->d_lock);
1082

1083
	return LRU_REMOVED;
1084
1085
}

1086

Linus Torvalds's avatar
Linus Torvalds committed
1087
1088
1089
1090
/**
 * shrink_dcache_sb - shrink dcache for a superblock
 * @sb: superblock
 *
1091
1092
 * Shrink the dcache for the specified super block. This is used to free
 * the dcache before unmounting a file system.
Linus Torvalds's avatar
Linus Torvalds committed
1093
 */
1094
void shrink_dcache_sb(struct super_block *sb)
Linus Torvalds's avatar
Linus Torvalds committed
1095
{
1096
1097
1098
1099
1100
1101
1102
	long freed;

	do {
		LIST_HEAD(dispose);

		freed = list_lru_walk(&sb->s_dentry_lru,
			dentry_lru_isolate_shrink, &dispose, UINT_MAX);
1103

1104
1105
1106
		this_cpu_sub(nr_dentry_unused, freed);
		shrink_dentry_list(&dispose);
	} while (freed > 0);
Linus Torvalds's avatar
Linus Torvalds committed
1107
}
1108
EXPORT_SYMBOL(shrink_dcache_sb);
Linus Torvalds's avatar
Linus Torvalds committed
1109

Miklos Szeredi's avatar
Miklos Szeredi committed
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
/**
 * enum d_walk_ret - action to talke during tree walk
 * @D_WALK_CONTINUE:	contrinue walk
 * @D_WALK_QUIT:	quit walk
 * @D_WALK_NORETRY:	quit when retry is needed
 * @D_WALK_SKIP:	skip this dentry and its children
 */
enum d_walk_ret {
	D_WALK_CONTINUE,
	D_WALK_QUIT,
	D_WALK_NORETRY,
	D_WALK_SKIP,
};
1123

Linus Torvalds's avatar
Linus Torvalds committed
1124
/**
Miklos Szeredi's avatar
Miklos Szeredi committed
1125
1126
1127
1128
1129
 * d_walk - walk the dentry tree
 * @parent:	start of walk
 * @data:	data passed to @enter() and @finish()
 * @enter:	callback when first entering the dentry
 * @finish:	callback when successfully finished the walk
Linus Torvalds's avatar
Linus Torvalds committed
1130
 *
Miklos Szeredi's avatar
Miklos Szeredi committed
1131
 * The @enter() and @finish() callbacks are called with d_lock held.
Linus Torvalds's avatar
Linus Torvalds committed
1132
 */
Miklos Szeredi's avatar
Miklos Szeredi committed
1133
1134
1135
static void d_walk(struct dentry *parent, void *data,
		   enum d_walk_ret (*enter)(void *, struct dentry *),
		   void (*finish)(void *))
Linus Torvalds's avatar
Linus Torvalds committed
1136
{
1137
	struct dentry *this_parent;
Linus Torvalds's avatar
Linus Torvalds committed
1138
	struct list_head *next;
1139
	unsigned seq = 0;
Miklos Szeredi's avatar
Miklos Szeredi committed
1140
1141
	enum d_walk_ret ret;
	bool retry = true;
1142

1143
again:
1144
	read_seqbegin_or_lock(&rename_lock, &seq);
1145
	this_parent = parent;
Nick Piggin's avatar
Nick Piggin committed
1146
	spin_lock(&this_parent->d_lock);
Miklos Szeredi's avatar
Miklos Szeredi committed
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158

	ret = enter(data, this_parent);
	switch (ret) {
	case D_WALK_CONTINUE:
		break;
	case D_WALK_QUIT:
	case D_WALK_SKIP:
		goto out_unlock;
	case D_WALK_NORETRY:
		retry = false;
		break;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1159
1160
1161
1162
1163
repeat:
	next = this_parent->d_subdirs.next;
resume:
	while (next != &this_parent->d_subdirs) {
		struct list_head *tmp = next;
1164
		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
Linus Torvalds's avatar
Linus Torvalds committed
1165
		next = tmp->next;
Nick Piggin's avatar
Nick Piggin committed
1166
1167

		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
Miklos Szeredi's avatar
Miklos Szeredi committed
1168
1169
1170
1171
1172
1173

		ret = enter(data, dentry);
		switch (ret) {
		case D_WALK_CONTINUE:
			break;
		case D_WALK_QUIT:
Nick Piggin's avatar
Nick Piggin committed
1174
			spin_unlock(&dentry->d_lock);
Miklos Szeredi's avatar
Miklos Szeredi committed
1175
1176
1177
1178
1179
1180
1181
			goto out_unlock;
		case D_WALK_NORETRY:
			retry = false;
			break;
		case D_WALK_SKIP:
			spin_unlock(&dentry->d_lock);
			continue;
Nick Piggin's avatar
Nick Piggin committed
1182
		}
Miklos Szeredi's avatar
Miklos Szeredi committed
1183

Linus Torvalds's avatar
Linus Torvalds committed
1184
		if (!list_empty(&dentry->d_subdirs)) {
Nick Piggin's avatar
Nick Piggin committed
1185
1186
			spin_unlock(&this_parent->d_lock);
			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
1187
			this_parent = dentry;
Nick Piggin's avatar
Nick Piggin committed
1188
			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
1189
1190
			goto repeat;
		}
Nick Piggin's avatar
Nick Piggin committed
1191
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1192
1193
1194
1195
	}
	/*
	 * All done at this level ... ascend and resume the search.
	 */
Al Viro's avatar
Al Viro committed
1196
1197
	rcu_read_lock();
ascend:
Linus Torvalds's avatar
Linus Torvalds committed
1198
	if (this_parent != parent) {
1199
		struct dentry *child = this_parent;
1200
1201
1202
1203
1204
		this_parent = child->d_parent;

		spin_unlock(&child->d_lock);
		spin_lock(&this_parent->d_lock);

Al Viro's avatar
Al Viro committed
1205
1206
		/* might go back up the wrong parent if we have had a rename. */
		if (need_seqretry(&rename_lock, seq))
1207
			goto rename_retry;
Al Viro's avatar
Al Viro committed
1208
1209
1210
1211
1212
1213
		next = child->d_child.next;
		while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
			if (next == &this_parent->d_subdirs)
				goto ascend;
			child = list_entry(next, struct dentry, d_child);
			next = next->next;
1214
1215
		}
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1216
1217
		goto resume;
	}
Al Viro's avatar
Al Viro committed
1218
	if (need_seqretry(&rename_lock, seq))
1219
		goto rename_retry;
Al Viro's avatar
Al Viro committed
1220
	rcu_read_unlock();
Miklos Szeredi's avatar
Miklos Szeredi committed
1221
1222
1223
1224
1225
	if (finish)
		finish(data);

out_unlock:
	spin_unlock(&this_parent->d_lock);
1226
	done_seqretry(&rename_lock, seq);
Miklos Szeredi's avatar
Miklos Szeredi committed
1227
	return;
1228
1229

rename_retry:
Al Viro's avatar
Al Viro committed
1230
1231
1232
	spin_unlock(&this_parent->d_lock);
	rcu_read_unlock();
	BUG_ON(seq & 1);
Miklos Szeredi's avatar
Miklos Szeredi committed
1233
1234
	if (!retry)
		return;
1235
	seq = 1;
1236
	goto again;
Linus Torvalds's avatar
Linus Torvalds committed
1237
}
Miklos Szeredi's avatar
Miklos Szeredi committed
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254

/*
 * Search for at least 1 mount point in the dentry's subdirs.
 * We descend to the next level whenever the d_subdirs
 * list is non-empty and continue searching.
 */

static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
{
	int *ret = data;
	if (d_mountpoint(dentry)) {
		*ret = 1;
		return D_WALK_QUIT;
	}
	return D_WALK_CONTINUE;
}

1255
1256
1257
1258
1259
1260
1261
/**
 * have_submounts - check for mounts over a dentry
 * @parent: dentry to check.
 *
 * Return true if the parent or its subdirectories contain
 * a mount point
 */
Miklos Szeredi's avatar
Miklos Szeredi committed
1262
1263
1264
1265
1266
1267
1268
1269
int have_submounts(struct dentry *parent)
{
	int ret = 0;

	d_walk(parent, &ret, check_mount, NULL);