namei.c 91.6 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50

51
static struct buffer_head *ext4_append(handle_t *handle,
52
					struct inode *inode,
53
					ext4_lblk_t *block)
54
55
{
	struct buffer_head *bh;
56
	int err = 0;
57

58
59
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
60
61
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
		return ERR_PTR(-ENOSPC);
62

63
64
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

65
66
67
68
69
	bh = ext4_bread(handle, inode, *block, 1, &err);
	if (!bh)
		return ERR_PTR(err);
	inode->i_size += inode->i_sb->s_blocksize;
	EXT4_I(inode)->i_disksize = inode->i_size;
70
	BUFFER_TRACE(bh, "get_write_access");
71
72
73
74
75
	err = ext4_journal_get_write_access(handle, bh);
	if (err) {
		brelse(bh);
		ext4_std_error(inode->i_sb, err);
		return ERR_PTR(err);
Carlos Maiolino's avatar
Carlos Maiolino committed
76
	}
77
78
79
	return bh;
}

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent);

typedef enum {
	EITHER, INDEX, DIRENT
} dirblock_type_t;

#define ext4_read_dirblock(inode, block, type) \
	__ext4_read_dirblock((inode), (block), (type), __LINE__)

static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
					      ext4_lblk_t block,
					      dirblock_type_t type,
					      unsigned int line)
{
	struct buffer_head *bh;
	struct ext4_dir_entry *dirent;
	int err = 0, is_dx_block = 0;

	bh = ext4_bread(NULL, inode, block, 0, &err);
	if (!bh) {
		if (err == 0) {
			ext4_error_inode(inode, __func__, line, block,
					       "Directory hole found");
			return ERR_PTR(-EIO);
		}
		__ext4_warning(inode->i_sb, __func__, line,
			       "error reading directory block "
			       "(ino %lu, block %lu)", inode->i_ino,
			       (unsigned long) block);
		return ERR_PTR(err);
	}
	dirent = (struct ext4_dir_entry *) bh->b_data;
	/* Determine whether or not we have an index block */
	if (is_dx(inode)) {
		if (block == 0)
			is_dx_block = 1;
		else if (ext4_rec_len_from_disk(dirent->rec_len,
						inode->i_sb->s_blocksize) ==
			 inode->i_sb->s_blocksize)
			is_dx_block = 1;
	}
	if (!is_dx_block && type == INDEX) {
		ext4_error_inode(inode, __func__, line, block,
		       "directory leaf block found instead of index block");
		return ERR_PTR(-EIO);
	}
	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
	    buffer_verified(bh))
		return bh;

	/*
	 * An empty leaf block can get mistaken for a index block; for
	 * this reason, we can only check the index checksum when the
	 * caller is sure it should be an index block.
	 */
	if (is_dx_block && type == INDEX) {
		if (ext4_dx_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory index failed checksum");
143
			brelse(bh);
144
			return ERR_PTR(-EIO);
145
		}
146
	}
147
148
149
150
151
152
153
154
155
	if (!is_dx_block) {
		if (ext4_dirent_csum_verify(inode, dirent))
			set_buffer_verified(bh);
		else {
			ext4_error_inode(inode, __func__, line, block,
				"Directory block failed checksum");
			brelse(bh);
			return ERR_PTR(-EIO);
		}
Carlos Maiolino's avatar
Carlos Maiolino committed
156
	}
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
231
232
	u16 offs;
	u16 size;
233
234
};

235
236
237
238
239
240
241
242
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
243
244
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
245
246
247
248
249
250
251
252
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
253
static struct dx_frame *dx_probe(const struct qstr *d_name,
254
255
256
257
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
258
static void dx_release(struct dx_frame *frames);
259
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
260
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
261
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
262
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
263
		struct dx_map_entry *offsets, int count, unsigned blocksize);
264
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
265
266
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
267
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
268
269
270
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
271
272
273
274
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
275
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
276
277
			     struct inode *inode);

278
/* checksumming functions */
279
280
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

333
334
335
336
337
338
static void warn_no_space_for_csum(struct inode *inode)
{
	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
		     "checksum.  Please run e2fsck -D.", inode->i_ino);
}

339
340
341
342
343
344
345
346
347
348
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
349
		warn_no_space_for_csum(inode);
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
371
		warn_no_space_for_csum(inode);
372
373
374
375
376
377
378
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

379
380
381
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
382
383
384
385
386
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
420
421
	__u32 csum;
	__le32 save_csum;
422
423
424
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
425
	save_csum = t->dt_checksum;
426
427
428
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
429
	t->dt_checksum = save_csum;
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
454
		warn_no_space_for_csum(inode);
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
484
		warn_no_space_for_csum(inode);
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

500
501
502
503
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
504
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
505
506
{
	return (struct ext4_dir_entry_2 *)((char *)p +
507
		ext4_rec_len_from_disk(p->rec_len, blocksize));
508
509
}

510
511
512
513
514
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
515
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
516
517
518
519
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
520
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
521
522
523
524
{
	entry->block = cpu_to_le32(value);
}

525
static inline unsigned dx_get_hash(struct dx_entry *entry)
526
527
528
529
{
	return le32_to_cpu(entry->hash);
}

530
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
531
532
533
534
{
	entry->hash = cpu_to_le32(value);
}

535
static inline unsigned dx_get_count(struct dx_entry *entries)
536
537
538
539
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

540
static inline unsigned dx_get_limit(struct dx_entry *entries)
541
542
543
544
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

545
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
546
547
548
549
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

550
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
551
552
553
554
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

555
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
556
{
557
558
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
559
560
561
562

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
563
	return entry_space / sizeof(struct dx_entry);
564
565
}

566
static inline unsigned dx_node_limit(struct inode *dir)
567
{
568
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
569
570
571
572

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
573
	return entry_space / sizeof(struct dx_entry);
574
575
576
577
578
579
}

/*
 * Debug
 */
#ifdef DX_DEBUG
580
static void dx_show_index(char * label, struct dx_entry *entries)
581
{
582
	int i, n = dx_get_count (entries);
583
	printk(KERN_DEBUG "%s index ", label);
584
	for (i = 0; i < n; i++) {
585
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
586
				0, (unsigned long)dx_get_block(entries + i));
587
588
	}
	printk("\n");
589
590
591
592
593
594
595
596
597
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

598
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
615
				ext4fs_dirhash(de->name, de->name_len, &h);
616
				printk(":%x.%u ", h.hash,
617
				       (unsigned) ((char *) de - base));
618
			}
619
			space += EXT4_DIR_REC_LEN(de->name_len);
620
621
			names++;
		}
622
		de = ext4_next_entry(de, size);
623
624
625
626
627
628
629
630
631
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
632
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
633
634
635
636
637
638
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
639
640
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
641
642
643
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
644
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
645
646
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
647
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
648
649
650
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
651
		brelse(bh);
652
653
	}
	if (bcount)
654
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
655
656
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
657
658
659
660
661
662
663
664
665
666
667
668
669
670
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
671
dx_probe(const struct qstr *d_name, struct inode *dir,
672
673
674
675
676
677
678
679
680
681
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
682
683
684
	bh = ext4_read_dirblock(dir, 0, INDEX);
	if (IS_ERR(bh)) {
		*err = PTR_ERR(bh);
685
		goto fail;
Carlos Maiolino's avatar
Carlos Maiolino committed
686
	}
687
688
689
690
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
691
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
692
693
694
695
696
697
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
698
699
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
700
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
701
702
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
703
704
705
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
706
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
707
708
709
710
711
712
713
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
714
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
715
716
717
718
719
720
721
722
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
723
724
725

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
726
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
727
728
729
730
731
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

732
	dxtrace(printk("Look up %x", hash));
733
734
735
	while (1)
	{
		count = dx_get_count(entries);
736
		if (!count || count > dx_get_limit(entries)) {
737
			ext4_warning(dir->i_sb,
738
739
740
741
742
743
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
778
779
780
		bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
781
			goto fail2;
782
		}
783
		entries = ((struct dx_node *) bh->b_data)->entries;
784

785
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
786
			ext4_warning(dir->i_sb,
787
788
789
790
791
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
792
		frame++;
793
		frame->bh = NULL;
794
795
796
797
798
799
800
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
801
	if (*err == ERR_BAD_DX_DIR)
802
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
803
			     "Corrupt dir inode %lu, running e2fsck is "
804
			     "recommended.", dir->i_ino);
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
835
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
836
837
838
839
840
841
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
842
	int num_frames = 0;
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
881
882
883
		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
		if (IS_ERR(bh))
			return PTR_ERR(bh);
884
		p++;
885
		brelse(p->bh);
886
887
888
889
890
891
892
893
894
895
896
897
898
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
899
				  struct inode *dir, ext4_lblk_t block,
900
901
902
903
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
904
	struct ext4_dir_entry_2 *de, *top;
905
	int err = 0, count = 0;
906

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
907
908
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
909
910
911
	bh = ext4_read_dirblock(dir, block, DIRENT);
	if (IS_ERR(bh))
		return PTR_ERR(bh);
912

913
914
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
915
					   dir->i_sb->s_blocksize -
916
					   EXT4_DIR_REC_LEN(0));
917
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
918
		if (ext4_check_dir_entry(dir, NULL, de, bh,
919
				bh->b_data, bh->b_size,
920
921
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
922
923
			/* silently ignore the rest of the block */
			break;
924
		}
925
		ext4fs_dirhash(de->name, de->name_len, hinfo);
926
927
928
929
930
931
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
932
		if ((err = ext4_htree_store_dirent(dir_file,
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
952
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
953
954
955
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
956
	struct ext4_dir_entry_2 *de;
957
958
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
959
	ext4_lblk_t block;
960
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
961
	int ret, err;
962
963
	__u32 hashval;

964
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
965
		       start_hash, start_minor_hash));
Al Viro's avatar
Al Viro committed
966
	dir = file_inode(dir_file);
967
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
968
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
969
970
971
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
972
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
973
974
975
976
977
978
979
980
981
982
983
		if (ext4_has_inline_data(dir)) {
			int has_inline_data = 1;
			count = htree_inlinedir_to_tree(dir_file, dir, 0,
							&hinfo, start_hash,
							start_minor_hash,
							&has_inline_data);
			if (has_inline_data) {
				*next_hash = ~0;
				return count;
			}
		}
984
985
986
987
988
989
990
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
991
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
992
993
994
995
996
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
997
998
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
999
1000
1001
1002
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1003
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1004
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1005
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
1020
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1037
1038
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1039
1040
1041
1042
1043
1044
	return count;
errout:
	dx_release(frames);
	return (err);
}

1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			  d_name, offset, res_dir);
}

1055
1056
1057
1058
/*
 * Directory block splitting, compacting
 */

1059
1060
1061
1062
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1063
1064
1065
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
1066
1067
1068
1069
1070
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1071
	while ((char *) de < base + blocksize) {
1072
		if (de->name_len && de->inode) {
1073
			ext4fs_dirhash(de->name, de->name_len, &h);
1074
1075
			map_tail--;
			map_tail->hash = h.hash;
1076
			map_tail->offs = ((char *) de - base)>>2;
1077
			map_tail->size = le16_to_cpu(de->rec_len);
1078
1079
1080
1081
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1082
		de = ext4_next_entry(de, blocksize);
1083
1084
1085
1086
	}
	return count;
}

1087
/* Sort map by hash value */
1088
1089
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
	struct dx_map_entry *p, *q, *top = map + count - 1;
	int more;
	/* Combsort until bubble sort doesn't suck */
	while (count > 2) {
		count = count*10/13;
		if (count - 9 < 2) /* 9, 10 -> 11 */
			count = 11;
		for (p = top, q = p - count; q >= map; p--, q--)
			if (p->hash < q->hash)
				swap(*p, *q);
	}
	/* Garden variety bubble sort */
	do {
		more = 0;
		q = top;
		while (q-- > map) {
			if (q[1].hash >= q[0].hash)
1107
				continue;
1108
1109
			swap(*(q+1), *q);
			more = 1;
1110
1111
1112
1113
		}
	} while(more);
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1114
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
{
	struct dx_entry *entries = frame->entries;
	struct dx_entry *old = frame->at, *new = old + 1;
	int count = dx_get_count(entries);

	assert(count < dx_get_limit(entries));
	assert(old < entries + count);
	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
	dx_set_hash(new, hash);
	dx_set_block(new, block);
	dx_set_count(entries, count + 1);
}

/*
1129
 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1130
 *
1131
 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1132
1133
 * `de != NULL' is guaranteed by caller.
 */
1134
1135
static inline int ext4_match (int len, const char * const name,
			      struct ext4_dir_entry_2 * de)
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
{
	if (len != de->name_len)
		return 0;
	if (!de->inode)
		return 0;
	return !memcmp(name, de->name, len);
}

/*
 * Returns 0 if not found, -1 on failure, and 1 on success
 */
1147
1148
1149
1150
1151
1152
1153
int search_dir(struct buffer_head *bh,
	       char *search_buf,
	       int buf_size,
	       struct inode *dir,
	       const struct qstr *d_name,
	       unsigned int offset,
	       struct ext4_dir_entry_2 **res_dir)
1154
{
1155
	struct ext4_dir_entry_2 * de;
1156
1157
	char * dlimit;
	int de_len;
1158
1159
	const char *name = d_name->name;
	int namelen = d_name->len;
1160

1161
1162
	de = (struct ext4_dir_entry_2 *)search_buf;
	dlimit = search_buf + buf_size;
1163
1164
1165
1166
1167
	while ((char *) de < dlimit) {
		/* this code is executed quadratically often */
		/* do minimal checking `by hand' */

		if ((char *) de + namelen <= dlimit &&
1168
		    ext4_match (namelen, name, de)) {
1169
			/* found a match - just to be sure, do a full check */
1170
1171
			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
						 bh->b_size, offset))
1172
1173
1174
1175
1176
				return -1;
			*res_dir = de;
			return 1;
		}
		/* prevent looping on a bad block */
1177
1178
		de_len = ext4_rec_len_from_disk(de->rec_len,
						dir->i_sb->s_blocksize);
1179
1180
1181
		if (de_len <= 0)
			return -1;
		offset += de_len;
1182
		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1183
1184
1185
1186
	}
	return 0;
}

1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
			       struct ext4_dir_entry *de)
{
	struct super_block *sb = dir->i_sb;

	if (!is_dx(dir))
		return 0;
	if (block == 0)
		return 1;
	if (de->inode == 0 &&
	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
			sb->s_blocksize)
		return 1;
	return 0;
}
1202
1203

/*
1204
 *	ext4_find_entry()
1205
1206
1207
1208
1209
1210
1211
1212
1213
 *
 * finds an entry in the specified directory with the wanted name. It
 * returns the cache buffer in which the entry was found, and the entry
 * itself (as a parameter - res_dir). It does NOT read the inode of the
 * entry - you'll have to do that yourself if you want to.
 *
 * The returned buffer_head has ->b_count elevated.  The caller is expected
 * to brelse() it when appropriate.
 */
1214
1215
static struct buffer_head * ext4_find_entry (struct inode *dir,
					const struct qstr *d_name,
1216
1217
					struct ext4_dir_entry_2 **res_dir,
					int *inlined)
1218
{
1219
1220
1221
	struct super_block *sb;
	struct buffer_head *bh_use[NAMEI_RA_SIZE];
	struct buffer_head *bh, *ret = NULL;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1222
	ext4_lblk_t start, block, b;
1223
	const u8 *name = d_name->name;
1224
1225
1226
1227
1228
	int ra_max = 0;		/* Number of bh's in the readahead
				   buffer, bh_use[] */
	int ra_ptr = 0;		/* Current index into readahead
				   buffer */
	int num = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1229
1230
	ext4_lblk_t  nblocks;
	int i, err;
1231
1232
1233
1234
	int namelen;

	*res_dir = NULL;
	sb = dir->i_sb;
1235
	namelen = d_name->len;
1236
	if (namelen > EXT4_NAME_LEN)
1237
		return NULL;
1238
1239
1240
1241
1242

	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		ret = ext4_find_inline_entry(dir, d_name, res_dir,
					     &has_inline_data);
1243
1244
1245
		if (has_inline_data) {
			if (inlined)
				*inlined = 1;
1246
			return ret;
1247
		}
1248
1249
	}

1250
	if ((namelen <= 2) && (name[0] == '.') &&
1251
	    (name[1] == '.' || name[1] == '\0')) {
1252
1253
1254
1255
1256
1257
1258
1259
		/*
		 * "." or ".." will only be in the first block
		 * NFS may look up ".."; "." should be handled by the VFS
		 */
		block = start = 0;
		nblocks = 1;
		goto restart;
	}
1260
	if (is_dx(dir)) {
1261
		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1262
1263
1264
1265
1266
1267
1268
		/*
		 * On success, or if the error was file not found,
		 * return.  Otherwise, fall back to doing a search the
		 * old fashioned way.
		 */
		if (bh || (err != ERR_BAD_DX_DIR))
			return bh;
1269
1270
		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
			       "falling back\n"));
1271
	}
1272
1273
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
	start = EXT4_I(dir)->i_dir_start_lookup;
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
	if (start >= nblocks)
		start = 0;
	block = start;
restart:
	do {
		/*
		 * We deal with the read-ahead logic here.
		 */
		if (ra_ptr >= ra_max) {
			/* Refill the readahead buffer */
			ra_ptr = 0;
			b = block;
			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				/*
				 * Terminate if we reach the end of the
				 * directory and must wrap, or if our
				 * search has finished at this block.
				 */
				if (b >= nblocks || (num && block == start)) {
					bh_use[ra_max] = NULL;
					break;
				}
				num++;
1297
				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1298
1299
				bh_use[ra_max] = bh;
				if (bh)
1300
1301
					ll_rw_block(READ | REQ_META | REQ_PRIO,
						    1, &bh);
1302
1303
1304
1305
1306
1307
1308
			}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
			goto next;
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
1309
1310
			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
					 (unsigned long) block);
1311
1312
1313
			brelse(bh);
			goto next;
		}
1314
		if (!buffer_verified(bh) &&
1315
1316
		    !is_dx_internal_node(dir, block,
					 (struct ext4_dir_entry *)bh->b_data) &&
1317
1318
1319
1320
1321
1322
1323
1324
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			goto next;
		}
		set_buffer_verified(bh);
1325
		i = search_dirblock(bh, dir, d_name,
1326
			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1327
		if (i == 1) {
1328
			EXT4_I(dir)->i_dir_start_lookup = block;
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
			ret = bh;
			goto cleanup_and_exit;
		} else {
			brelse(bh);
			if (i < 0)
				goto cleanup_and_exit;
		}
	next:
		if (++block >= nblocks)
			block = 0;
	} while (block != start);

	/*
	 * If the directory has grown while we were searching, then
	 * search the last part of the directory before giving up.
	 */
	block = nblocks;
1346
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1347
1348
1349
1350
1351
1352
1353
1354
	if (block < nblocks) {
		start = 0;
		goto restart;
	}

cleanup_and_exit:
	/* Clean up the read-ahead blocks */
	for (; ra_ptr < ra_max; ra_ptr++)
1355
		brelse(bh_use[ra_ptr]);
1356
1357
1358
	return ret;
}

1359
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1360
		       struct ext4_dir_entry_2 **res_dir, int *err)
1361
{
1362
	struct super_block * sb = dir->i_sb;
1363
1364
1365
	struct dx_hash_info	hinfo;
	struct dx_frame frames[2], *frame;
	struct buffer_head *bh;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1366
	ext4_lblk_t block;
1367
1368
	int retval;

1369
1370
	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
		return NULL;
1371
1372
	do {
		block = dx_get_block(frame->at);
1373
1374
1375
		bh = ext4_read_dirblock(dir, block, DIRENT);
		if (IS_ERR(bh)) {
			*err = PTR_ERR(bh);
1376
1377
			goto errout;
		}
1378
1379
1380
1381
1382
1383
		retval = search_dirblock(bh, dir, d_name,
					 block << EXT4_BLOCK_SIZE_BITS(sb),
					 res_dir);
		if (retval == 1) { 	/* Success! */
			dx_release(frames);
			return bh;
1384
		}
1385
		brelse(bh);
1386
1387
1388
1389
1390
		if (retval == -1) {
			*err = ERR_BAD_DX_DIR;
			goto errout;
		}

1391
		/* Check to see if we should continue to search */
1392
		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1393
1394
					       frames, NULL);
		if (retval < 0) {
1395
			ext4_warning(sb,
1396
1397
1398
1399