dcache.c 87.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * fs/dcache.c
 *
 * Complete reimplementation
 * (C) 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

/*
 * Notes on the allocation strategy:
 *
 * The dcache is a master of the icache - whenever a dcache entry
 * exists, the inode will always exist. "iput()" is done either when
 * the dcache entry is deleted or garbage collected.
 */

#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
21
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/cache.h>
26
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
28
29
30
31
32
33
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
34
#include <linux/fs_struct.h>
35
#include <linux/hardirq.h>
36
37
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
38
#include <linux/prefetch.h>
39
#include <linux/ratelimit.h>
40
#include <linux/list_lru.h>
41
#include "internal.h"
42
#include "mount.h"
Linus Torvalds's avatar
Linus Torvalds committed
43

Nick Piggin's avatar
Nick Piggin committed
44
45
/*
 * Usage:
46
47
 * dcache->d_inode->i_lock protects:
 *   - i_dentry, d_alias, d_inode of aliases
48
49
50
51
 * dcache_hash_bucket lock protects:
 *   - the dcache hash table
 * s_anon bl list spinlock protects:
 *   - the s_anon list (see __d_drop)
52
 * dentry->d_sb->s_dentry_lru_lock protects:
Nick Piggin's avatar
Nick Piggin committed
53
54
55
56
57
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
 *   - d_name
 *   - d_lru
Nick Piggin's avatar
Nick Piggin committed
58
 *   - d_count
Nick Piggin's avatar
Nick Piggin committed
59
 *   - d_unhashed()
Nick Piggin's avatar
Nick Piggin committed
60
61
 *   - d_parent and d_subdirs
 *   - childrens' d_child and d_parent
Nick Piggin's avatar
Nick Piggin committed
62
 *   - d_alias, d_inode
Nick Piggin's avatar
Nick Piggin committed
63
64
 *
 * Ordering:
65
 * dentry->d_inode->i_lock
Nick Piggin's avatar
Nick Piggin committed
66
 *   dentry->d_lock
67
 *     dentry->d_sb->s_dentry_lru_lock
68
69
 *     dcache_hash_bucket lock
 *     s_anon lock
Nick Piggin's avatar
Nick Piggin committed
70
 *
Nick Piggin's avatar
Nick Piggin committed
71
72
73
74
75
76
77
 * If there is an ancestor relationship:
 * dentry->d_parent->...->d_parent->d_lock
 *   ...
 *     dentry->d_parent->d_lock
 *       dentry->d_lock
 *
 * If no ancestor relationship:
Nick Piggin's avatar
Nick Piggin committed
78
79
80
81
 * if (dentry1 < dentry2)
 *   dentry1->d_lock
 *     dentry2->d_lock
 */
82
int sysctl_vfs_cache_pressure __read_mostly = 100;
Linus Torvalds's avatar
Linus Torvalds committed
83
84
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

Al Viro's avatar
Al Viro committed
85
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
86

87
EXPORT_SYMBOL(rename_lock);
Linus Torvalds's avatar
Linus Torvalds committed
88

89
static struct kmem_cache *dentry_cache __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
90

91
92
/**
 * read_seqbegin_or_lock - begin a sequence number check or locking block
93
94
 * @lock: sequence lock
 * @seq : sequence number to be checked
95
96
97
98
99
100
101
102
 *
 * First try it once optimistically without taking the lock. If that fails,
 * take the lock. The sequence number is also used as a marker for deciding
 * whether to be a reader (even) or writer (odd).
 * N.B. seq must be initialized to an even number to begin with.
 */
static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
{
103
	if (!(*seq & 1))	/* Even */
104
		*seq = read_seqbegin(lock);
105
	else			/* Odd */
106
		read_seqlock_excl(lock);
107
108
}

109
static inline int need_seqretry(seqlock_t *lock, int seq)
110
{
111
112
113
114
115
116
	return !(seq & 1) && read_seqretry(lock, seq);
}

static inline void done_seqretry(seqlock_t *lock, int seq)
{
	if (seq & 1)
117
		read_sequnlock_excl(lock);
118
119
}

Linus Torvalds's avatar
Linus Torvalds committed
120
121
122
123
124
125
126
127
128
129
130
/*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
 * to make this good - I've just made it work.
 *
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */
#define D_HASHBITS     d_hash_shift
#define D_HASHMASK     d_hash_mask

131
132
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
133

134
static struct hlist_bl_head *dentry_hashtable __read_mostly;
135

136
static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
137
					unsigned int hash)
138
{
139
140
	hash += (unsigned long) parent / L1_CACHE_BYTES;
	hash = hash + (hash >> D_HASHBITS);
141
142
143
	return dentry_hashtable + (hash & D_HASHMASK);
}

Linus Torvalds's avatar
Linus Torvalds committed
144
145
146
147
148
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
	.age_limit = 45,
};

149
static DEFINE_PER_CPU(long, nr_dentry);
150
static DEFINE_PER_CPU(long, nr_dentry_unused);
151
152

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
153
154
155
156
157
158
159
160
161
162
163
164
165

/*
 * Here we resort to our own counters instead of using generic per-cpu counters
 * for consistency with what the vfs inode code does. We are expected to harvest
 * better code and performance by having our own specialized counters.
 *
 * Please note that the loop is done over all possible CPUs, not over all online
 * CPUs. The reason for this is that we don't want to play games with CPUs going
 * on and off. If one of them goes off, we will just keep their counters.
 *
 * glommer: See cffbc8a for details, and if you ever intend to change this,
 * please update all vfs counters to match.
 */
166
static long get_nr_dentry(void)
167
168
{
	int i;
169
	long sum = 0;
170
171
172
173
174
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry, i);
	return sum < 0 ? 0 : sum;
}

175
176
177
178
179
180
181
182
183
static long get_nr_dentry_unused(void)
{
	int i;
	long sum = 0;
	for_each_possible_cpu(i)
		sum += per_cpu(nr_dentry_unused, i);
	return sum < 0 ? 0 : sum;
}

184
185
186
int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
		   size_t *lenp, loff_t *ppos)
{
187
	dentry_stat.nr_dentry = get_nr_dentry();
188
	dentry_stat.nr_unused = get_nr_dentry_unused();
189
	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
190
191
192
}
#endif

193
194
195
196
/*
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
197
198
199
200
201
202
203
204
205
206
207
208
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 * aligned allocation for this particular component. We don't
 * strictly need the load_unaligned_zeropad() safety, but it
 * doesn't hurt either.
 *
 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 * need the careful unaligned handling.
 */
209
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
210
{
211
212
213
	unsigned long a,b,mask;

	for (;;) {
214
		a = *(unsigned long *)cs;
215
		b = load_unaligned_zeropad(ct);
216
217
218
219
220
221
222
223
224
225
226
227
		if (tcount < sizeof(unsigned long))
			break;
		if (unlikely(a != b))
			return 1;
		cs += sizeof(unsigned long);
		ct += sizeof(unsigned long);
		tcount -= sizeof(unsigned long);
		if (!tcount)
			return 0;
	}
	mask = ~(~0ul << tcount*8);
	return unlikely(!!((a ^ b) & mask));
228
229
}

230
#else
231

232
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
233
{
234
235
236
237
238
239
240
241
242
243
	do {
		if (*cs != *ct)
			return 1;
		cs++;
		ct++;
		tcount--;
	} while (tcount);
	return 0;
}

244
245
#endif

246
247
static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
248
	const unsigned char *cs;
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
	/*
	 * Be careful about RCU walk racing with rename:
	 * use ACCESS_ONCE to fetch the name pointer.
	 *
	 * NOTE! Even if a rename will mean that the length
	 * was not loaded atomically, we don't care. The
	 * RCU walk will check the sequence count eventually,
	 * and catch it. And we won't overrun the buffer,
	 * because we're reading the name pointer atomically,
	 * and a dentry name is guaranteed to be properly
	 * terminated with a NUL byte.
	 *
	 * End result: even if 'len' is wrong, we'll exit
	 * early because the data cannot match (there can
	 * be no NUL in the ct/tcount data)
	 */
265
266
267
	cs = ACCESS_ONCE(dentry->d_name.name);
	smp_read_barrier_depends();
	return dentry_string_cmp(cs, ct, tcount);
268
269
}

Christoph Hellwig's avatar
Christoph Hellwig committed
270
static void __d_free(struct rcu_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
271
{
Christoph Hellwig's avatar
Christoph Hellwig committed
272
273
	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);

274
	WARN_ON(!hlist_unhashed(&dentry->d_alias));
Linus Torvalds's avatar
Linus Torvalds committed
275
276
277
278
279
280
	if (dname_external(dentry))
		kfree(dentry->d_name.name);
	kmem_cache_free(dentry_cache, dentry); 
}

/*
Nick Piggin's avatar
Nick Piggin committed
281
 * no locks, please.
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
 */
static void d_free(struct dentry *dentry)
{
285
	BUG_ON((int)dentry->d_lockref.count > 0);
286
	this_cpu_dec(nr_dentry);
Linus Torvalds's avatar
Linus Torvalds committed
287
288
	if (dentry->d_op && dentry->d_op->d_release)
		dentry->d_op->d_release(dentry);
289

290
291
	/* if dentry was never visible to RCU, immediate free is OK */
	if (!(dentry->d_flags & DCACHE_RCUACCESS))
Christoph Hellwig's avatar
Christoph Hellwig committed
292
		__d_free(&dentry->d_u.d_rcu);
293
	else
Christoph Hellwig's avatar
Christoph Hellwig committed
294
		call_rcu(&dentry->d_u.d_rcu, __d_free);
Linus Torvalds's avatar
Linus Torvalds committed
295
296
}

Nick Piggin's avatar
Nick Piggin committed
297
298
/**
 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
299
 * @dentry: the target dentry
Nick Piggin's avatar
Nick Piggin committed
300
301
302
303
304
305
306
307
308
309
310
 * After this call, in-progress rcu-walk path lookup will fail. This
 * should be called after unhashing, and after changing d_inode (if
 * the dentry has not already been unhashed).
 */
static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
{
	assert_spin_locked(&dentry->d_lock);
	/* Go through a barrier */
	write_seqcount_barrier(&dentry->d_seq);
}

Linus Torvalds's avatar
Linus Torvalds committed
311
312
/*
 * Release the dentry's inode, using the filesystem
Nick Piggin's avatar
Nick Piggin committed
313
314
 * d_iput() operation if defined. Dentry has no refcount
 * and is unhashed.
Linus Torvalds's avatar
Linus Torvalds committed
315
 */
316
static void dentry_iput(struct dentry * dentry)
317
	__releases(dentry->d_lock)
318
	__releases(dentry->d_inode->i_lock)
Linus Torvalds's avatar
Linus Torvalds committed
319
320
321
322
{
	struct inode *inode = dentry->d_inode;
	if (inode) {
		dentry->d_inode = NULL;
323
		hlist_del_init(&dentry->d_alias);
Linus Torvalds's avatar
Linus Torvalds committed
324
		spin_unlock(&dentry->d_lock);
325
		spin_unlock(&inode->i_lock);
326
327
		if (!inode->i_nlink)
			fsnotify_inoderemove(inode);
Linus Torvalds's avatar
Linus Torvalds committed
328
329
330
331
332
333
334
335
336
		if (dentry->d_op && dentry->d_op->d_iput)
			dentry->d_op->d_iput(dentry, inode);
		else
			iput(inode);
	} else {
		spin_unlock(&dentry->d_lock);
	}
}

Nick Piggin's avatar
Nick Piggin committed
337
338
339
340
341
342
/*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined. dentry remains in-use.
 */
static void dentry_unlink_inode(struct dentry * dentry)
	__releases(dentry->d_lock)
343
	__releases(dentry->d_inode->i_lock)
Nick Piggin's avatar
Nick Piggin committed
344
345
346
{
	struct inode *inode = dentry->d_inode;
	dentry->d_inode = NULL;
347
	hlist_del_init(&dentry->d_alias);
Nick Piggin's avatar
Nick Piggin committed
348
349
	dentry_rcuwalk_barrier(dentry);
	spin_unlock(&dentry->d_lock);
350
	spin_unlock(&inode->i_lock);
Nick Piggin's avatar
Nick Piggin committed
351
352
353
354
355
356
357
358
	if (!inode->i_nlink)
		fsnotify_inoderemove(inode);
	if (dentry->d_op && dentry->d_op->d_iput)
		dentry->d_op->d_iput(dentry, inode);
	else
		iput(inode);
}

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
/*
 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
 * is in use - which includes both the "real" per-superblock
 * LRU list _and_ the DCACHE_SHRINK_LIST use.
 *
 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
 * on the shrink list (ie not on the superblock LRU list).
 *
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
static void d_lru_add(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, 0);
	dentry->d_flags |= DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_lru_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_shrink_del(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	list_del_init(&dentry->d_lru);
	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
	this_cpu_dec(nr_dentry_unused);
}

static void d_shrink_add(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, 0);
	list_add(&dentry->d_lru, list);
	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
	this_cpu_inc(nr_dentry_unused);
}

/*
 * These can only be called under the global LRU lock, ie during the
 * callback for freeing the LRU list. "isolate" removes it from the
 * LRU lists entirely, while shrink_move moves it to the indicated
 * private list.
 */
static void d_lru_isolate(struct dentry *dentry)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags &= ~DCACHE_LRU_LIST;
	this_cpu_dec(nr_dentry_unused);
	list_del_init(&dentry->d_lru);
}

static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list)
{
	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
	dentry->d_flags |= DCACHE_SHRINK_LIST;
	list_move_tail(&dentry->d_lru, list);
}

427
/*
428
 * dentry_lru_(add|del)_list) must be called with d_lock held.
429
430
431
 */
static void dentry_lru_add(struct dentry *dentry)
{
432
433
	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
		d_lru_add(dentry);
434
435
}

Sage Weil's avatar
Sage Weil committed
436
437
/*
 * Remove a dentry with references from the LRU.
438
439
440
441
 *
 * If we are on the shrink list, then we can get to try_prune_one_dentry() and
 * lose our last reference through the parent walk. In this case, we need to
 * remove ourselves from the shrink list, not the LRU.
Sage Weil's avatar
Sage Weil committed
442
 */
443
444
static void dentry_lru_del(struct dentry *dentry)
{
445
446
447
448
	if (dentry->d_flags & DCACHE_LRU_LIST) {
		if (dentry->d_flags & DCACHE_SHRINK_LIST)
			return d_shrink_del(dentry);
		d_lru_del(dentry);
449
450
451
	}
}

452
453
454
/**
 * d_kill - kill dentry and return parent
 * @dentry: dentry to kill
455
 * @parent: parent dentry
456
 *
457
 * The dentry must already be unhashed and removed from the LRU.
458
459
 *
 * If this is the root of the dentry tree, return NULL.
Nick Piggin's avatar
Nick Piggin committed
460
 *
Nick Piggin's avatar
Nick Piggin committed
461
462
 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 * d_kill.
463
 */
Nick Piggin's avatar
Nick Piggin committed
464
static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
465
	__releases(dentry->d_lock)
Nick Piggin's avatar
Nick Piggin committed
466
	__releases(parent->d_lock)
467
	__releases(dentry->d_inode->i_lock)
468
469
{
	list_del(&dentry->d_u.d_child);
470
471
472
473
	/*
	 * Inform try_to_ascend() that we are no longer attached to the
	 * dentry tree
	 */
474
	dentry->d_flags |= DCACHE_DENTRY_KILLED;
Nick Piggin's avatar
Nick Piggin committed
475
476
	if (parent)
		spin_unlock(&parent->d_lock);
477
	dentry_iput(dentry);
Nick Piggin's avatar
Nick Piggin committed
478
479
480
481
	/*
	 * dentry_iput drops the locks, at which point nobody (except
	 * transient RCU lookups) can reach this dentry.
	 */
482
	d_free(dentry);
483
	return parent;
484
485
}

Nick Piggin's avatar
Nick Piggin committed
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 * be found through a VFS lookup any more. Note that this is different from
 * deleting the dentry - d_delete will try to mark the dentry negative if
 * possible, giving a successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 * reason (NFS timeouts or autofs deletes).
 *
 * __d_drop requires dentry->d_lock.
 */
void __d_drop(struct dentry *dentry)
{
503
	if (!d_unhashed(dentry)) {
504
505
506
507
508
509
510
511
512
513
		struct hlist_bl_head *b;
		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
			b = &dentry->d_sb->s_anon;
		else
			b = d_hash(dentry->d_parent, dentry->d_name.hash);

		hlist_bl_lock(b);
		__hlist_bl_del(&dentry->d_hash);
		dentry->d_hash.pprev = NULL;
		hlist_bl_unlock(b);
514
		dentry_rcuwalk_barrier(dentry);
Nick Piggin's avatar
Nick Piggin committed
515
516
517
518
519
520
521
522
523
524
525
526
	}
}
EXPORT_SYMBOL(__d_drop);

void d_drop(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_drop);

527
528
529
530
531
532
/*
 * Finish off a dentry we've decided to kill.
 * dentry->d_lock must be held, returns with it unlocked.
 * If ref is non-zero, then decrement the refcount too.
 * Returns dentry requiring refcount drop, or NULL if we're done.
 */
533
534
static inline struct dentry *
dentry_kill(struct dentry *dentry, int unlock_on_failure)
535
536
	__releases(dentry->d_lock)
{
537
	struct inode *inode;
538
539
	struct dentry *parent;

540
541
	inode = dentry->d_inode;
	if (inode && !spin_trylock(&inode->i_lock)) {
542
relock:
543
544
545
546
		if (unlock_on_failure) {
			spin_unlock(&dentry->d_lock);
			cpu_relax();
		}
547
548
549
550
551
552
553
		return dentry; /* try again with same dentry */
	}
	if (IS_ROOT(dentry))
		parent = NULL;
	else
		parent = dentry->d_parent;
	if (parent && !spin_trylock(&parent->d_lock)) {
554
555
		if (inode)
			spin_unlock(&inode->i_lock);
556
557
		goto relock;
	}
Nick Piggin's avatar
Nick Piggin committed
558

559
560
561
562
563
	/*
	 * The dentry is now unrecoverably dead to the world.
	 */
	lockref_mark_dead(&dentry->d_lockref);

Sage Weil's avatar
Sage Weil committed
564
565
566
567
	/*
	 * inform the fs via d_prune that this dentry is about to be
	 * unhashed and destroyed.
	 */
568
	if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
Yan, Zheng's avatar
Yan, Zheng committed
569
570
571
		dentry->d_op->d_prune(dentry);

	dentry_lru_del(dentry);
572
573
574
575
576
	/* if it was on the hash then remove it */
	__d_drop(dentry);
	return d_kill(dentry, parent);
}

Linus Torvalds's avatar
Linus Torvalds committed
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
/* 
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release 
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 */
void dput(struct dentry *dentry)
{
605
	if (unlikely(!dentry))
Linus Torvalds's avatar
Linus Torvalds committed
606
607
608
		return;

repeat:
609
	if (lockref_put_or_lock(&dentry->d_lockref))
Linus Torvalds's avatar
Linus Torvalds committed
610
611
		return;

612
613
614
615
616
	/* Unreachable? Get rid of it */
	if (unlikely(d_unhashed(dentry)))
		goto kill_it;

	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
Linus Torvalds's avatar
Linus Torvalds committed
617
		if (dentry->d_op->d_delete(dentry))
Nick Piggin's avatar
Nick Piggin committed
618
			goto kill_it;
Linus Torvalds's avatar
Linus Torvalds committed
619
	}
620

Jeff Layton's avatar
Jeff Layton committed
621
	dentry->d_flags |= DCACHE_REFERENCED;
622
	dentry_lru_add(dentry);
623

624
	dentry->d_lockref.count--;
Nick Piggin's avatar
Nick Piggin committed
625
	spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
626
627
	return;

628
kill_it:
629
	dentry = dentry_kill(dentry, 1);
630
631
	if (dentry)
		goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
632
}
633
EXPORT_SYMBOL(dput);
Linus Torvalds's avatar
Linus Torvalds committed
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651

/**
 * d_invalidate - invalidate a dentry
 * @dentry: dentry to invalidate
 *
 * Try to invalidate the dentry if it turns out to be
 * possible. If there are other dentries that can be
 * reached through this one we can't delete it and we
 * return -EBUSY. On success we return 0.
 *
 * no dcache lock.
 */
 
int d_invalidate(struct dentry * dentry)
{
	/*
	 * If it's already been dropped, return OK.
	 */
Nick Piggin's avatar
Nick Piggin committed
652
	spin_lock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
653
	if (d_unhashed(dentry)) {
Nick Piggin's avatar
Nick Piggin committed
654
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
655
656
657
658
659
660
661
		return 0;
	}
	/*
	 * Check whether to do a partial shrink_dcache
	 * to get rid of unused child entries.
	 */
	if (!list_empty(&dentry->d_subdirs)) {
Nick Piggin's avatar
Nick Piggin committed
662
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
663
		shrink_dcache_parent(dentry);
Nick Piggin's avatar
Nick Piggin committed
664
		spin_lock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
665
666
667
668
669
670
671
672
673
674
675
	}

	/*
	 * Somebody else still using it?
	 *
	 * If it's a directory, we can't drop it
	 * for fear of somebody re-populating it
	 * with children (even though dropping it
	 * would make it unreachable from the root,
	 * we might still populate it if it was a
	 * working directory or similar).
676
677
	 * We also need to leave mountpoints alone,
	 * directory or not.
Linus Torvalds's avatar
Linus Torvalds committed
678
	 */
679
	if (dentry->d_lockref.count > 1 && dentry->d_inode) {
680
		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
Linus Torvalds's avatar
Linus Torvalds committed
681
682
683
684
685
686
687
688
689
			spin_unlock(&dentry->d_lock);
			return -EBUSY;
		}
	}

	__d_drop(dentry);
	spin_unlock(&dentry->d_lock);
	return 0;
}
690
EXPORT_SYMBOL(d_invalidate);
Linus Torvalds's avatar
Linus Torvalds committed
691

Nick Piggin's avatar
Nick Piggin committed
692
/* This must be called with d_lock held */
693
static inline void __dget_dlock(struct dentry *dentry)
Nick Piggin's avatar
Nick Piggin committed
694
{
695
	dentry->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
696
697
}

698
static inline void __dget(struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
699
{
700
	lockref_get(&dentry->d_lockref);
Linus Torvalds's avatar
Linus Torvalds committed
701
702
}

Nick Piggin's avatar
Nick Piggin committed
703
704
struct dentry *dget_parent(struct dentry *dentry)
{
705
	int gotref;
Nick Piggin's avatar
Nick Piggin committed
706
707
	struct dentry *ret;

708
709
710
711
712
713
714
715
716
717
718
719
720
721
	/*
	 * Do optimistic parent lookup without any
	 * locking.
	 */
	rcu_read_lock();
	ret = ACCESS_ONCE(dentry->d_parent);
	gotref = lockref_get_not_zero(&ret->d_lockref);
	rcu_read_unlock();
	if (likely(gotref)) {
		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
			return ret;
		dput(ret);
	}

Nick Piggin's avatar
Nick Piggin committed
722
repeat:
723
724
725
726
727
	/*
	 * Don't need rcu_dereference because we re-check it was correct under
	 * the lock.
	 */
	rcu_read_lock();
Nick Piggin's avatar
Nick Piggin committed
728
	ret = dentry->d_parent;
729
730
731
732
	spin_lock(&ret->d_lock);
	if (unlikely(ret != dentry->d_parent)) {
		spin_unlock(&ret->d_lock);
		rcu_read_unlock();
Nick Piggin's avatar
Nick Piggin committed
733
734
		goto repeat;
	}
735
	rcu_read_unlock();
736
737
	BUG_ON(!ret->d_lockref.count);
	ret->d_lockref.count++;
Nick Piggin's avatar
Nick Piggin committed
738
739
740
741
742
	spin_unlock(&ret->d_lock);
	return ret;
}
EXPORT_SYMBOL(dget_parent);

Linus Torvalds's avatar
Linus Torvalds committed
743
744
745
/**
 * d_find_alias - grab a hashed alias of inode
 * @inode: inode in question
746
747
 * @want_discon:  flag, used by d_splice_alias, to request
 *          that only a DISCONNECTED alias be returned.
Linus Torvalds's avatar
Linus Torvalds committed
748
749
750
751
752
753
754
 *
 * If inode has a hashed alias, or is a directory and has any alias,
 * acquire the reference to alias and return it. Otherwise return NULL.
 * Notice that if inode is a directory there can be only one alias and
 * it can be unhashed only if it has no children, or if it is the root
 * of a filesystem.
 *
755
 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
756
757
 * any other hashed alias over that one unless @want_discon is set,
 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
Linus Torvalds's avatar
Linus Torvalds committed
758
 */
759
static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
Linus Torvalds's avatar
Linus Torvalds committed
760
{
Nick Piggin's avatar
Nick Piggin committed
761
	struct dentry *alias, *discon_alias;
Linus Torvalds's avatar
Linus Torvalds committed
762

Nick Piggin's avatar
Nick Piggin committed
763
764
again:
	discon_alias = NULL;
765
	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
Nick Piggin's avatar
Nick Piggin committed
766
		spin_lock(&alias->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
767
 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
768
			if (IS_ROOT(alias) &&
Nick Piggin's avatar
Nick Piggin committed
769
			    (alias->d_flags & DCACHE_DISCONNECTED)) {
Linus Torvalds's avatar
Linus Torvalds committed
770
				discon_alias = alias;
771
			} else if (!want_discon) {
772
				__dget_dlock(alias);
Nick Piggin's avatar
Nick Piggin committed
773
774
775
776
777
778
779
780
781
782
783
784
				spin_unlock(&alias->d_lock);
				return alias;
			}
		}
		spin_unlock(&alias->d_lock);
	}
	if (discon_alias) {
		alias = discon_alias;
		spin_lock(&alias->d_lock);
		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
			if (IS_ROOT(alias) &&
			    (alias->d_flags & DCACHE_DISCONNECTED)) {
785
				__dget_dlock(alias);
Nick Piggin's avatar
Nick Piggin committed
786
				spin_unlock(&alias->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
787
788
789
				return alias;
			}
		}
Nick Piggin's avatar
Nick Piggin committed
790
791
		spin_unlock(&alias->d_lock);
		goto again;
Linus Torvalds's avatar
Linus Torvalds committed
792
	}
Nick Piggin's avatar
Nick Piggin committed
793
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
794
795
}

Nick Piggin's avatar
Nick Piggin committed
796
struct dentry *d_find_alias(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
797
{
798
799
	struct dentry *de = NULL;

800
	if (!hlist_empty(&inode->i_dentry)) {
801
		spin_lock(&inode->i_lock);
802
		de = __d_find_alias(inode, 0);
803
		spin_unlock(&inode->i_lock);
804
	}
Linus Torvalds's avatar
Linus Torvalds committed
805
806
	return de;
}
807
EXPORT_SYMBOL(d_find_alias);
Linus Torvalds's avatar
Linus Torvalds committed
808
809
810
811
812
813
814

/*
 *	Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
 */
void d_prune_aliases(struct inode *inode)
{
815
	struct dentry *dentry;
Linus Torvalds's avatar
Linus Torvalds committed
816
restart:
817
	spin_lock(&inode->i_lock);
818
	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
Linus Torvalds's avatar
Linus Torvalds committed
819
		spin_lock(&dentry->d_lock);
820
		if (!dentry->d_lockref.count) {
821
822
823
824
825
826
827
828
			/*
			 * inform the fs via d_prune that this dentry
			 * is about to be unhashed and destroyed.
			 */
			if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
			    !d_unhashed(dentry))
				dentry->d_op->d_prune(dentry);

829
			__dget_dlock(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
830
831
			__d_drop(dentry);
			spin_unlock(&dentry->d_lock);
832
			spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
833
834
835
836
837
			dput(dentry);
			goto restart;
		}
		spin_unlock(&dentry->d_lock);
	}
838
	spin_unlock(&inode->i_lock);
Linus Torvalds's avatar
Linus Torvalds committed
839
}
840
EXPORT_SYMBOL(d_prune_aliases);
Linus Torvalds's avatar
Linus Torvalds committed
841
842

/*
843
844
845
 * Try to throw away a dentry - free the inode, dput the parent.
 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 * Releases dentry->d_lock.
846
 *
847
 * This may fail if locks cannot be acquired no problem, just try again.
Linus Torvalds's avatar
Linus Torvalds committed
848
 */
849
static struct dentry * try_prune_one_dentry(struct dentry *dentry)
850
	__releases(dentry->d_lock)
Linus Torvalds's avatar
Linus Torvalds committed
851
{
852
	struct dentry *parent;
853

854
	parent = dentry_kill(dentry, 0);
855
	/*
856
857
858
859
860
861
862
863
	 * If dentry_kill returns NULL, we have nothing more to do.
	 * if it returns the same dentry, trylocks failed. In either
	 * case, just loop again.
	 *
	 * Otherwise, we need to prune ancestors too. This is necessary
	 * to prevent quadratic behavior of shrink_dcache_parent(), but
	 * is also expected to be beneficial in reducing dentry cache
	 * fragmentation.
864
	 */
865
	if (!parent)
866
		return NULL;
867
	if (parent == dentry)
868
		return dentry;
869
870
871

	/* Prune ancestors. */
	dentry = parent;
872
	while (dentry) {
873
		if (lockref_put_or_lock(&dentry->d_lockref))
874
875
			return NULL;
		dentry = dentry_kill(dentry, 1);
876
	}
877
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
878
879
}

880
static void shrink_dentry_list(struct list_head *list)
Linus Torvalds's avatar
Linus Torvalds committed
881
{
882
883
	struct dentry *dentry;

884
885
886
887
888
	rcu_read_lock();
	for (;;) {
		dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
		if (&dentry->d_lru == list)
			break; /* empty */
889
890
891
892
893
894

		/*
		 * Get the dentry lock, and re-verify that the dentry is
		 * this on the shrinking list. If it is, we know that
		 * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
		 */
895
896
897
		spin_lock(&dentry->d_lock);
		if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
			spin_unlock(&dentry->d_lock);
Nick Piggin's avatar
Nick Piggin committed
898
899
900
			continue;
		}

901
902
903
904
905
		/*
		 * The dispose list is isolated and dentries are not accounted
		 * to the LRU here, so we can simply remove it from the list
		 * here regardless of whether it is referenced or not.
		 */
906
		d_shrink_del(dentry);
907

Linus Torvalds's avatar
Linus Torvalds committed
908
909
		/*
		 * We found an inuse dentry which was not removed from
910
		 * the LRU because of laziness during lookup. Do not free it.
Linus Torvalds's avatar
Linus Torvalds committed
911
		 */
912
		if (dentry->d_lockref.count) {
913
			spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar
Linus Torvalds committed
914
915
			continue;
		}
916
		rcu_read_unlock();
917

918
919
920
921
922
923
924
925
926
		/*
		 * If 'try_to_prune()' returns a dentry, it will
		 * be the same one we passed in, and d_lock will
		 * have been held the whole time, so it will not
		 * have been added to any other lists. We failed
		 * to get the inode lock.
		 *
		 * We just add it back to the shrink list.
		 */
927
		dentry = try_prune_one_dentry(dentry);
928

929
		rcu_read_lock();
930
		if (dentry) {
931
			d_shrink_add(dentry, list);
932
933
			spin_unlock(&dentry->d_lock);
		}
934
	}
935
	rcu_read_unlock();
936
937
}

938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
static enum lru_status
dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
{
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);


	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

	/*
	 * Referenced dentries are still in use. If they have active
	 * counts, just remove them from the LRU. Otherwise give them
	 * another pass through the LRU.
	 */
	if (dentry->d_lockref.count) {
959
		d_lru_isolate(dentry);
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
		spin_unlock(&dentry->d_lock);
		return LRU_REMOVED;
	}

	if (dentry->d_flags & DCACHE_REFERENCED) {
		dentry->d_flags &= ~DCACHE_REFERENCED;
		spin_unlock(&dentry->d_lock);

		/*
		 * The list move itself will be made by the common LRU code. At
		 * this point, we've dropped the dentry->d_lock but keep the
		 * lru lock. This is safe to do, since every list movement is
		 * protected by the lru lock even if both locks are held.
		 *
		 * This is guaranteed by the fact that all LRU management
		 * functions are intermediated by the LRU API calls like
		 * list_lru_add and list_lru_del. List movement in this file
		 * only ever occur through this functions or through callbacks
		 * like this one, that are called from the LRU API.
		 *
		 * The only exceptions to this are functions like
		 * shrink_dentry_list, and code that first checks for the
		 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
		 * operating only with stack provided lists after they are
		 * properly isolated from the main list.  It is thus, always a
		 * local access.
		 */
		return LRU_ROTATE;
	}

990
	d_lru_shrink_move(dentry, freeable);
991
992
993
994
995
	spin_unlock(&dentry->d_lock);

	return LRU_REMOVED;
}

996
/**
997
998
 * prune_dcache_sb - shrink the dcache
 * @sb: superblock
999
 * @nr_to_scan : number of entries to try to free
1000
 * @nid: which node to scan for freeable entities
1001
 *
1002
 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
1003
1004
 * done when we need more memory an called from the superblock shrinker
 * function.
1005
 *
1006
1007
 * This function may fail to free any resources if all the dentries are in
 * use.
1008
 */
1009
1010
long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
		     int nid)
1011
{
1012
1013
	LIST_HEAD(dispose);
	long freed;
1014

1015
1016
	freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
				       &dispose, &nr_to_scan);
1017
	shrink_dentry_list(&dispose);
1018
	return freed;
1019
}
Nick Piggin's avatar
Nick Piggin committed
1020

1021
1022
static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
						spinlock_t *lru_lock, void *arg)
1023
{
1024
1025
	struct list_head *freeable = arg;
	struct dentry	*dentry = container_of(item, struct dentry, d_lru);
1026

1027
1028
1029
1030
1031
1032
1033
1034
	/*
	 * we are inverting the lru lock/dentry->d_lock here,
	 * so use a trylock. If we fail to get the lock, just skip
	 * it
	 */
	if (!spin_trylock(&dentry->d_lock))
		return LRU_SKIP;

1035
	d_lru_shrink_move(dentry, freeable);
1036
	spin_unlock(&dentry->d_lock);
1037

1038
	return LRU_REMOVED;
1039
1040
}

1041

Linus Torvalds's avatar
Linus Torvalds committed
1042
1043
1044
1045
/**
 * shrink_dcache_sb - shrink dcache for a superblock
 * @sb: superblock
 *
1046
1047
 * Shrink the dcache for the specified super block. This is used to free
 * the dcache before unmounting a file system.
Linus Torvalds's avatar
Linus Torvalds committed
1048
 */
1049
void shrink_dcache_sb(struct super_block *sb)
Linus Torvalds's avatar
Linus Torvalds committed
1050
{
1051
1052
1053
1054
1055
1056
1057
	long freed;

	do {
		LIST_HEAD(dispose);

		freed = list_lru_walk(&sb->s_dentry_lru,
			dentry_lru_isolate_shrink, &dispose, UINT_MAX);
1058

1059
1060
1061
		this_cpu_sub(nr_dentry_unused, freed);
		shrink_dentry_list(&dispose);
	} while (freed > 0);
Linus Torvalds's avatar
Linus Torvalds committed
1062
}
1063
EXPORT_SYMBOL(shrink_dcache_sb);
Linus Torvalds's avatar
Linus Torvalds committed
1064

1065
1066
1067
1068
1069
1070
/*
 * This tries to ascend one level of parenthood, but
 * we can race with renaming, so we need to re-check
 * the parenthood after dropping the lock and check
 * that the sequence number still matches.
 */
1071
static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
{
	struct dentry *new = old->d_parent;

	rcu_read_lock();
	spin_unlock(&old->d_lock);
	spin_lock(&new->d_lock);

	/*
	 * might go back up the wrong parent if we have had a rename
	 * or deletion
	 */
	if (new != old->d_parent ||
1084
		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1085
		 need_seqretry(&rename_lock, seq)) {
1086
1087
1088
1089
1090
1091
1092
		spin_unlock(&new->d_lock);
		new = NULL;
	}
	rcu_read_unlock();
	return new;
}

Miklos Szeredi's avatar
Miklos Szeredi committed
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
/**
 * enum d_walk_ret - action to talke during tree walk
 * @D_WALK_CONTINUE:	contrinue walk
 * @D_WALK_QUIT:	quit walk
 * @D_WALK_NORETRY:	quit when retry is needed
 * @D_WALK_SKIP:	skip this dentry and its children
 */
enum d_walk_ret {
	D_WALK_CONTINUE,
	D_WALK_QUIT,
	D_WALK_NORETRY,
	D_WALK_SKIP,
};
1106

Linus Torvalds's avatar
Linus Torvalds committed
1107
/**
Miklos Szeredi's avatar
Miklos Szeredi committed
1108
1109
1110
1111
1112
 * d_walk - walk the dentry tree
 * @parent:	start of walk
 * @data:	data passed to @enter() and @finish()
 * @enter:	callback when first entering the dentry
 * @finish:	callback when successfully finished the walk
Linus Torvalds's avatar
Linus Torvalds committed
1113
 *
Miklos Szeredi's avatar
Miklos Szeredi committed
1114
 * The @enter() and @finish() callbacks are called with d_lock held.
Linus Torvalds's avatar
Linus Torvalds committed
1115
 */
Miklos Szeredi's avatar
Miklos Szeredi committed
1116
1117
1118
static void d_walk(struct dentry *parent, void *data,
		   enum d_walk_ret (*enter)(void *, struct dentry *),
		   void (*finish)(void *))
Linus Torvalds's avatar
Linus Torvalds committed
1119
{
1120
	struct dentry *this_parent;
Linus Torvalds's avatar
Linus Torvalds committed
1121
	struct list_head *next;
1122
	unsigned seq = 0;
Miklos Szeredi's avatar
Miklos Szeredi committed
1123
1124
	enum d_walk_ret ret;
	bool retry = true;
1125

1126
again:
1127
	read_seqbegin_or_lock(&rename_lock, &seq);
1128
	this_parent = parent;
Nick Piggin's avatar
Nick Piggin committed
1129
	spin_lock(&this_parent->d_lock);
Miklos Szeredi's avatar
Miklos Szeredi committed
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141

	ret = enter(data, this_parent);
	switch (ret) {
	case D_WALK_CONTINUE:
		break;
	case D_WALK_QUIT:
	case D_WALK_SKIP:
		goto out_unlock;
	case D_WALK_NORETRY:
		retry = false;
		break;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1142
1143
1144
1145
1146
repeat:
	next = this_parent->d_subdirs.next;
resume:
	while (next != &this_parent->d_subdirs) {
		struct list_head *tmp = next;
Eric Dumazet's avatar
Eric Dumazet committed
1147
		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
Linus Torvalds's avatar
Linus Torvalds committed
1148
		next = tmp->next;
Nick Piggin's avatar
Nick Piggin committed
1149
1150

		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
Miklos Szeredi's avatar
Miklos Szeredi committed
1151
1152
1153
1154
1155
1156

		ret = enter(data, dentry);
		switch (ret) {
		case D_WALK_CONTINUE:
			break;
		case D_WALK_QUIT:
Nick Piggin's avatar
Nick Piggin committed
1157
			spin_unlock(&dentry->d_lock);
Miklos Szeredi's avatar
Miklos Szeredi committed
1158
1159
1160
1161
1162
1163
1164
			goto out_unlock;
		case D_WALK_NORETRY:
			retry = false;
			break;
		case D_WALK_SKIP:
			spin_unlock(&dentry->d_lock);
			continue;
Nick Piggin's avatar
Nick Piggin committed
1165
		}
Miklos Szeredi's avatar
Miklos Szeredi committed
1166

Linus Torvalds's avatar
Linus Torvalds committed
1167
		if (!list_empty(&dentry->d_subdirs)) {
Nick Piggin's avatar
Nick Piggin committed
1168
1169
			spin_unlock(&this_parent->d_lock);
			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
1170
			this_parent = dentry;
Nick Piggin's avatar
Nick Piggin committed
1171
			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
Linus Torvalds's avatar
Linus Torvalds committed
1172
1173
			goto repeat;
		}
Nick Piggin's avatar
Nick Piggin committed
1174
		spin_unlock(&dentry->d_lock);
Linus Torvalds's avatar