namei.c 86.5 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/namei.c
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *  Directory entry file type support and forward compatibility hooks
 *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
 *  Hash Tree Directory indexing (c)
 *	Daniel Phillips, 2001
 *  Hash Tree Directory indexing porting
 *	Christopher Li, 2002
 *  Hash Tree Directory indexing cleanup
 *	Theodore Ts'o, 2002
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
29
#include <linux/jbd2.h>
30
31
32
33
34
35
36
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
37
38
#include "ext4.h"
#include "ext4_jbd2.h"
39
40
41
42

#include "xattr.h"
#include "acl.h"

43
#include <trace/events/ext4.h>
44
45
46
47
48
/*
 * define how far ahead to read directories while searching them.
 */
#define NAMEI_RA_CHUNKS  2
#define NAMEI_RA_BLOCKS  4
Dave Kleikamp's avatar
Dave Kleikamp committed
49
#define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50
51
#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))

52
static struct buffer_head *ext4_append(handle_t *handle,
53
					struct inode *inode,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
54
					ext4_lblk_t *block, int *err)
55
56
57
{
	struct buffer_head *bh;

58
59
60
61
62
63
64
	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
		     ((inode->i_size >> 10) >=
		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
		*err = -ENOSPC;
		return NULL;
	}

65
66
	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;

67
68
	bh = ext4_bread(handle, inode, *block, 1, err);
	if (bh) {
69
		inode->i_size += inode->i_sb->s_blocksize;
70
		EXT4_I(inode)->i_disksize = inode->i_size;
71
72
73
74
75
		*err = ext4_journal_get_write_access(handle, bh);
		if (*err) {
			brelse(bh);
			bh = NULL;
		}
76
	}
Carlos Maiolino's avatar
Carlos Maiolino committed
77
78
79
80
81
82
	if (!bh && !(*err)) {
		*err = -EIO;
		ext4_error(inode->i_sb,
			   "Directory hole detected on inode %lu\n",
			   inode->i_ino);
	}
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
	return bh;
}

#ifndef assert
#define assert(test) J_ASSERT(test)
#endif

#ifdef DX_DEBUG
#define dxtrace(command) command
#else
#define dxtrace(command)
#endif

struct fake_dirent
{
	__le32 inode;
	__le16 rec_len;
	u8 name_len;
	u8 file_type;
};

struct dx_countlimit
{
	__le16 limit;
	__le16 count;
};

struct dx_entry
{
	__le32 hash;
	__le32 block;
};

/*
 * dx_root_info is laid out so that if it should somehow get overlaid by a
 * dirent the two low bits of the hash version will be zero.  Therefore, the
 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 */

struct dx_root
{
	struct fake_dirent dot;
	char dot_name[4];
	struct fake_dirent dotdot;
	char dotdot_name[4];
	struct dx_root_info
	{
		__le32 reserved_zero;
		u8 hash_version;
		u8 info_length; /* 8 */
		u8 indirect_levels;
		u8 unused_flags;
	}
	info;
	struct dx_entry	entries[0];
};

struct dx_node
{
	struct fake_dirent fake;
	struct dx_entry	entries[0];
};


struct dx_frame
{
	struct buffer_head *bh;
	struct dx_entry *entries;
	struct dx_entry *at;
};

struct dx_map_entry
{
	u32 hash;
157
158
	u16 offs;
	u16 size;
159
160
};

161
162
163
164
165
166
167
168
/*
 * This goes at the end of each htree block.
 */
struct dx_tail {
	u32 dt_reserved;
	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
};

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
169
170
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
171
172
173
174
175
176
177
178
static inline unsigned dx_get_hash(struct dx_entry *entry);
static void dx_set_hash(struct dx_entry *entry, unsigned value);
static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
static unsigned dx_node_limit(struct inode *dir);
179
static struct dx_frame *dx_probe(const struct qstr *d_name,
180
181
182
183
				 struct inode *dir,
				 struct dx_hash_info *hinfo,
				 struct dx_frame *frame,
				 int *err);
184
static void dx_release(struct dx_frame *frames);
185
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
186
		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
187
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
188
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
189
		struct dx_map_entry *offsets, int count, unsigned blocksize);
190
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
191
192
static void dx_insert_block(struct dx_frame *frame,
					u32 hash, ext4_lblk_t block);
193
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
194
195
196
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash);
197
198
199
200
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
		const struct qstr *d_name,
		struct ext4_dir_entry_2 **res_dir,
		int *err);
201
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
202
203
			     struct inode *inode);

204
/* checksumming functions */
205
206
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
			    unsigned int blocksize)
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
{
	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
	t->det_rec_len = ext4_rec_len_to_disk(
			sizeof(struct ext4_dir_entry_tail), blocksize);
	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
}

/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
						   struct ext4_dir_entry *de)
{
	struct ext4_dir_entry_tail *t;

#ifdef PARANOID
	struct ext4_dir_entry *d, *top;

	d = de;
	top = (struct ext4_dir_entry *)(((void *)de) +
		(EXT4_BLOCK_SIZE(inode->i_sb) -
		sizeof(struct ext4_dir_entry_tail)));
	while (d < top && d->rec_len)
		d = (struct ext4_dir_entry *)(((void *)d) +
		    le16_to_cpu(d->rec_len));

	if (d != top)
		return NULL;

	t = (struct ext4_dir_entry_tail *)d;
#else
	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
#endif

	if (t->det_reserved_zero1 ||
	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
	    t->det_reserved_zero2 ||
	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
		return NULL;

	return t;
}

static __le32 ext4_dirent_csum(struct inode *inode,
			       struct ext4_dir_entry *dirent, int size)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum;

	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	return cpu_to_le32(csum);
}

259
260
261
262
263
264
static void warn_no_space_for_csum(struct inode *inode)
{
	ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
		     "checksum.  Please run e2fsck -D.", inode->i_ino);
}

265
266
267
268
269
270
271
272
273
274
int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
275
		warn_no_space_for_csum(inode);
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
		return 0;
	}

	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
						(void *)t - (void *)dirent))
		return 0;

	return 1;
}

static void ext4_dirent_csum_set(struct inode *inode,
				 struct ext4_dir_entry *dirent)
{
	struct ext4_dir_entry_tail *t;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	t = get_dirent_tail(inode, dirent);
	if (!t) {
297
		warn_no_space_for_csum(inode);
298
299
300
301
302
303
304
		return;
	}

	t->det_checksum = ext4_dirent_csum(inode, dirent,
					   (void *)t - (void *)dirent);
}

305
306
307
int ext4_handle_dirty_dirent_node(handle_t *handle,
				  struct inode *inode,
				  struct buffer_head *bh)
308
309
310
311
312
{
	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
					       struct ext4_dir_entry *dirent,
					       int *offset)
{
	struct ext4_dir_entry *dp;
	struct dx_root_info *root;
	int count_offset;

	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
		count_offset = 8;
	else if (le16_to_cpu(dirent->rec_len) == 12) {
		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
		if (le16_to_cpu(dp->rec_len) !=
		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
			return NULL;
		root = (struct dx_root_info *)(((void *)dp + 12));
		if (root->reserved_zero ||
		    root->info_length != sizeof(struct dx_root_info))
			return NULL;
		count_offset = 32;
	} else
		return NULL;

	if (offset)
		*offset = count_offset;
	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
}

static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
			   int count_offset, int count, struct dx_tail *t)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_inode_info *ei = EXT4_I(inode);
	__u32 csum, old_csum;
	int size;

	size = count_offset + (count * sizeof(struct dx_entry));
	old_csum = t->dt_checksum;
	t->dt_checksum = 0;
	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
	t->dt_checksum = old_csum;

	return cpu_to_le32(csum);
}

static int ext4_dx_csum_verify(struct inode *inode,
			       struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return 1;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return 1;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
379
		warn_no_space_for_csum(inode);
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
		return 1;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
					    count, t))
		return 0;
	return 1;
}

static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
{
	struct dx_countlimit *c;
	struct dx_tail *t;
	int count_offset, limit, count;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		return;

	c = get_dx_countlimit(inode, dirent, &count_offset);
	if (!c) {
		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
		return;
	}
	limit = le16_to_cpu(c->limit);
	count = le16_to_cpu(c->count);
	if (count_offset + (limit * sizeof(struct dx_entry)) >
	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
409
		warn_no_space_for_csum(inode);
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
		return;
	}
	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);

	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
}

static inline int ext4_handle_dirty_dx_node(handle_t *handle,
					    struct inode *inode,
					    struct buffer_head *bh)
{
	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

425
426
427
428
/*
 * p is at least 6 bytes before the end of page
 */
static inline struct ext4_dir_entry_2 *
429
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
430
431
{
	return (struct ext4_dir_entry_2 *)((char *)p +
432
		ext4_rec_len_from_disk(p->rec_len, blocksize));
433
434
}

435
436
437
438
439
/*
 * Future: use high four bits of block for coalesce-on-delete flags
 * Mask them off for now.
 */

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
440
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
441
442
443
444
{
	return le32_to_cpu(entry->block) & 0x00ffffff;
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
445
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
446
447
448
449
{
	entry->block = cpu_to_le32(value);
}

450
static inline unsigned dx_get_hash(struct dx_entry *entry)
451
452
453
454
{
	return le32_to_cpu(entry->hash);
}

455
static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
456
457
458
459
{
	entry->hash = cpu_to_le32(value);
}

460
static inline unsigned dx_get_count(struct dx_entry *entries)
461
462
463
464
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
}

465
static inline unsigned dx_get_limit(struct dx_entry *entries)
466
467
468
469
{
	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
}

470
static inline void dx_set_count(struct dx_entry *entries, unsigned value)
471
472
473
474
{
	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
}

475
static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
476
477
478
479
{
	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

480
static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
481
{
482
483
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
		EXT4_DIR_REC_LEN(2) - infosize;
484
485
486
487

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
488
	return entry_space / sizeof(struct dx_entry);
489
490
}

491
static inline unsigned dx_node_limit(struct inode *dir)
492
{
493
	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
494
495
496
497

	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
		entry_space -= sizeof(struct dx_tail);
498
	return entry_space / sizeof(struct dx_entry);
499
500
501
502
503
504
}

/*
 * Debug
 */
#ifdef DX_DEBUG
505
static void dx_show_index(char * label, struct dx_entry *entries)
506
{
507
	int i, n = dx_get_count (entries);
508
	printk(KERN_DEBUG "%s index ", label);
509
	for (i = 0; i < n; i++) {
510
		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
511
				0, (unsigned long)dx_get_block(entries + i));
512
513
	}
	printk("\n");
514
515
516
517
518
519
520
521
522
}

struct stats
{
	unsigned names;
	unsigned space;
	unsigned bcount;
};

523
static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
				 int size, int show_names)
{
	unsigned names = 0, space = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

	printk("names: ");
	while ((char *) de < base + size)
	{
		if (de->inode)
		{
			if (show_names)
			{
				int len = de->name_len;
				char *name = de->name;
				while (len--) printk("%c", *name++);
540
				ext4fs_dirhash(de->name, de->name_len, &h);
541
				printk(":%x.%u ", h.hash,
542
				       (unsigned) ((char *) de - base));
543
			}
544
			space += EXT4_DIR_REC_LEN(de->name_len);
545
546
			names++;
		}
547
		de = ext4_next_entry(de, size);
548
549
550
551
552
553
554
555
556
	}
	printk("(%i)\n", names);
	return (struct stats) { names, space, 1 };
}

struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
			     struct dx_entry *entries, int levels)
{
	unsigned blocksize = dir->i_sb->s_blocksize;
557
	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
558
559
560
561
562
563
	unsigned bcount = 0;
	struct buffer_head *bh;
	int err;
	printk("%i indexed blocks...\n", count);
	for (i = 0; i < count; i++, entries++)
	{
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
564
565
		ext4_lblk_t block = dx_get_block(entries);
		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
566
567
568
		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
		struct stats stats;
		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
569
		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
570
571
		stats = levels?
		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
572
		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
573
574
575
		names += stats.names;
		space += stats.space;
		bcount += stats.bcount;
576
		brelse(bh);
577
578
	}
	if (bcount)
579
		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
580
581
		       levels ? "" : "   ", names, space/bcount,
		       (space/bcount)*100/blocksize);
582
583
584
585
586
587
588
589
590
591
592
593
594
595
	return (struct stats) { names, space, bcount};
}
#endif /* DX_DEBUG */

/*
 * Probe for a directory leaf block to search.
 *
 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 * error in the directory index, and the caller should fall back to
 * searching the directory normally.  The callers of dx_probe **MUST**
 * check for this error code, and make sure it never gets reflected
 * back to userspace.
 */
static struct dx_frame *
596
dx_probe(const struct qstr *d_name, struct inode *dir,
597
598
599
600
601
602
603
604
605
606
	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
{
	unsigned count, indirect;
	struct dx_entry *at, *entries, *p, *q, *m;
	struct dx_root *root;
	struct buffer_head *bh;
	struct dx_frame *frame = frame_in;
	u32 hash;

	frame->bh = NULL;
Carlos Maiolino's avatar
Carlos Maiolino committed
607
608
609
	if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
		if (*err == 0)
			*err = ERR_BAD_DX_DIR;
610
		goto fail;
Carlos Maiolino's avatar
Carlos Maiolino committed
611
	}
612
613
614
615
	root = (struct dx_root *) bh->b_data;
	if (root->info.hash_version != DX_HASH_TEA &&
	    root->info.hash_version != DX_HASH_HALF_MD4 &&
	    root->info.hash_version != DX_HASH_LEGACY) {
616
		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
617
618
619
620
621
622
			     root->info.hash_version);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	hinfo->hash_version = root->info.hash_version;
623
624
	if (hinfo->hash_version <= DX_HASH_TEA)
		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
625
	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
626
627
	if (d_name)
		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
628
629
630
	hash = hinfo->hash;

	if (root->info.unused_flags & 1) {
631
		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
632
633
634
635
636
637
638
			     root->info.unused_flags);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

	if ((indirect = root->info.indirect_levels) > 1) {
639
		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
640
641
642
643
644
645
			     root->info.indirect_levels);
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

646
647
648
649
650
651
652
653
654
	if (!buffer_verified(bh) &&
	    !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
		ext4_warning(dir->i_sb, "Root failed checksum");
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}
	set_buffer_verified(bh);

655
656
	entries = (struct dx_entry *) (((char *)&root->info) +
				       root->info.info_length);
657
658
659

	if (dx_get_limit(entries) != dx_root_limit(dir,
						   root->info.info_length)) {
660
		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
661
662
663
664
665
		brelse(bh);
		*err = ERR_BAD_DX_DIR;
		goto fail;
	}

666
	dxtrace(printk("Look up %x", hash));
667
668
669
	while (1)
	{
		count = dx_get_count(entries);
670
		if (!count || count > dx_get_limit(entries)) {
671
			ext4_warning(dir->i_sb,
672
673
674
675
676
677
				     "dx entry: no count or count > limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}

678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
		p = entries + 1;
		q = entries + count - 1;
		while (p <= q)
		{
			m = p + (q - p)/2;
			dxtrace(printk("."));
			if (dx_get_hash(m) > hash)
				q = m - 1;
			else
				p = m + 1;
		}

		if (0) // linear search cross check
		{
			unsigned n = count - 1;
			at = entries;
			while (n--)
			{
				dxtrace(printk(","));
				if (dx_get_hash(++at) > hash)
				{
					at--;
					break;
				}
			}
			assert (at == p - 1);
		}

		at = p - 1;
		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
		frame->bh = bh;
		frame->entries = entries;
		frame->at = at;
		if (!indirect--) return frame;
Carlos Maiolino's avatar
Carlos Maiolino committed
712
713
714
		if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
			if (!(*err))
				*err = ERR_BAD_DX_DIR;
715
			goto fail2;
Carlos Maiolino's avatar
Carlos Maiolino committed
716
		}
717
		entries = ((struct dx_node *) bh->b_data)->entries;
718
719
720
721
722
723
724

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
725
			goto fail2;
726
727
728
		}
		set_buffer_verified(bh);

729
		if (dx_get_limit(entries) != dx_node_limit (dir)) {
730
			ext4_warning(dir->i_sb,
731
732
733
734
735
				     "dx entry: limit != node limit");
			brelse(bh);
			*err = ERR_BAD_DX_DIR;
			goto fail2;
		}
736
		frame++;
737
		frame->bh = NULL;
738
739
740
741
742
743
744
	}
fail2:
	while (frame >= frame_in) {
		brelse(frame->bh);
		frame--;
	}
fail:
745
	if (*err == ERR_BAD_DX_DIR)
746
		ext4_warning(dir->i_sb,
Zheng Liu's avatar
Zheng Liu committed
747
			     "Corrupt dir inode %lu, running e2fsck is "
748
			     "recommended.", dir->i_ino);
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
	return NULL;
}

static void dx_release (struct dx_frame *frames)
{
	if (frames[0].bh == NULL)
		return;

	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
		brelse(frames[1].bh);
	brelse(frames[0].bh);
}

/*
 * This function increments the frame pointer to search the next leaf
 * block, and reads in the necessary intervening nodes if the search
 * should be necessary.  Whether or not the search is necessary is
 * controlled by the hash parameter.  If the hash value is even, then
 * the search is only continued if the next block starts with that
 * hash value.  This is used if we are searching for a specific file.
 *
 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 *
 * This function returns 1 if the caller should continue to search,
 * or 0 if it should not.  If there is an error reading one of the
 * index blocks, it will a negative error code.
 *
 * If start_hash is non-null, it will be filled in with the starting
 * hash of the next page.
 */
779
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
				 struct dx_frame *frame,
				 struct dx_frame *frames,
				 __u32 *start_hash)
{
	struct dx_frame *p;
	struct buffer_head *bh;
	int err, num_frames = 0;
	__u32 bhash;

	p = frame;
	/*
	 * Find the next leaf page by incrementing the frame pointer.
	 * If we run out of entries in the interior node, loop around and
	 * increment pointer in the parent node.  When we break out of
	 * this loop, num_frames indicates the number of interior
	 * nodes need to be read.
	 */
	while (1) {
		if (++(p->at) < p->entries + dx_get_count(p->entries))
			break;
		if (p == frames)
			return 0;
		num_frames++;
		p--;
	}

	/*
	 * If the hash is 1, then continue only if the next page has a
	 * continuation hash of any value.  This is used for readdir
	 * handling.  Otherwise, check to see if the hash matches the
	 * desired contiuation hash.  If it doesn't, return since
	 * there's no point to read in the successive index pages.
	 */
	bhash = dx_get_hash(p->at);
	if (start_hash)
		*start_hash = bhash;
	if ((hash & 1) == 0) {
		if ((bhash & ~1) != hash)
			return 0;
	}
	/*
	 * If the hash is HASH_NB_ALWAYS, we always go to the next
	 * block so no check is necessary
	 */
	while (num_frames--) {
825
		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
Carlos Maiolino's avatar
Carlos Maiolino committed
826
827
828
829
830
831
832
				      0, &err))) {
			if (!err) {
				ext4_error(dir->i_sb,
					   "Directory hole detected on inode %lu\n",
					   dir->i_ino);
				return -EIO;
			}
833
			return err; /* Failure */
Carlos Maiolino's avatar
Carlos Maiolino committed
834
		}
835
836
837
838
839

		if (!buffer_verified(bh) &&
		    !ext4_dx_csum_verify(dir,
					 (struct ext4_dir_entry *)bh->b_data)) {
			ext4_warning(dir->i_sb, "Node failed checksum");
840
			brelse(bh);
841
842
843
844
			return -EIO;
		}
		set_buffer_verified(bh);

845
		p++;
846
		brelse(p->bh);
847
848
849
850
851
852
853
854
855
856
857
858
859
		p->bh = bh;
		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
	}
	return 1;
}


/*
 * This function fills a red-black tree with information from a
 * directory block.  It returns the number directory entries loaded
 * into the tree.  If there is an error it is returned in err.
 */
static int htree_dirblock_to_tree(struct file *dir_file,
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
860
				  struct inode *dir, ext4_lblk_t block,
861
862
863
864
				  struct dx_hash_info *hinfo,
				  __u32 start_hash, __u32 start_minor_hash)
{
	struct buffer_head *bh;
865
	struct ext4_dir_entry_2 *de, *top;
866
	int err = 0, count = 0;
867

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
868
869
	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
							(unsigned long)block));
Carlos Maiolino's avatar
Carlos Maiolino committed
870
871
872
873
874
875
876
	if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
		if (!err) {
			err = -EIO;
			ext4_error(dir->i_sb,
				   "Directory hole detected on inode %lu\n",
				   dir->i_ino);
		}
877
		return err;
Carlos Maiolino's avatar
Carlos Maiolino committed
878
	}
879

880
	if (!buffer_verified(bh) &&
881
882
883
			!ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
		brelse(bh);
884
		return -EIO;
885
	}
886
887
	set_buffer_verified(bh);

888
889
	de = (struct ext4_dir_entry_2 *) bh->b_data;
	top = (struct ext4_dir_entry_2 *) ((char *) de +
890
					   dir->i_sb->s_blocksize -
891
					   EXT4_DIR_REC_LEN(0));
892
	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
893
		if (ext4_check_dir_entry(dir, NULL, de, bh,
894
				bh->b_data, bh->b_size,
895
896
				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
					 + ((char *)de - bh->b_data))) {
897
898
899
			/* On error, skip the f_pos to the next block. */
			dir_file->f_pos = (dir_file->f_pos |
					(dir->i_sb->s_blocksize - 1)) + 1;
900
			brelse(bh);
901
902
			return count;
		}
903
		ext4fs_dirhash(de->name, de->name_len, hinfo);
904
905
906
907
908
909
		if ((hinfo->hash < start_hash) ||
		    ((hinfo->hash == start_hash) &&
		     (hinfo->minor_hash < start_minor_hash)))
			continue;
		if (de->inode == 0)
			continue;
910
		if ((err = ext4_htree_store_dirent(dir_file,
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
			brelse(bh);
			return err;
		}
		count++;
	}
	brelse(bh);
	return count;
}


/*
 * This function fills a red-black tree with information from a
 * directory.  We start scanning the directory in hash order, starting
 * at start_hash and start_minor_hash.
 *
 * This function returns the number of entries inserted into the tree,
 * or a negative error code.
 */
930
int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
931
932
933
			 __u32 start_minor_hash, __u32 *next_hash)
{
	struct dx_hash_info hinfo;
934
	struct ext4_dir_entry_2 *de;
935
936
	struct dx_frame frames[2], *frame;
	struct inode *dir;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
937
	ext4_lblk_t block;
938
	int count = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
939
	int ret, err;
940
941
	__u32 hashval;

942
	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
943
		       start_hash, start_minor_hash));
944
	dir = dir_file->f_path.dentry->d_inode;
945
	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
946
		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
947
948
949
		if (hinfo.hash_version <= DX_HASH_TEA)
			hinfo.hash_version +=
				EXT4_SB(dir->i_sb)->s_hash_unsigned;
950
		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
951
952
953
954
955
956
957
		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
					       start_hash, start_minor_hash);
		*next_hash = ~0;
		return count;
	}
	hinfo.hash = start_hash;
	hinfo.minor_hash = 0;
958
	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
959
960
961
962
963
	if (!frame)
		return err;

	/* Add '.' and '..' from the htree header */
	if (!start_hash && !start_minor_hash) {
964
965
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
966
967
968
969
			goto errout;
		count++;
	}
	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
970
		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
971
		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
972
		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
973
974
975
976
977
978
979
980
981
982
983
984
985
986
			goto errout;
		count++;
	}

	while (1) {
		block = dx_get_block(frame->at);
		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
					     start_hash, start_minor_hash);
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		count += ret;
		hashval = ~0;
987
		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
					    frame, frames, &hashval);
		*next_hash = hashval;
		if (ret < 0) {
			err = ret;
			goto errout;
		}
		/*
		 * Stop if:  (a) there are no more entries, or
		 * (b) we have inserted at least one entry and the
		 * next hash value is not a continuation
		 */
		if ((ret == 0) ||
		    (count && ((hashval & 1) == 0)))
			break;
	}
	dx_release(frames);
1004
1005
	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
		       "next hash: %x\n", count, *next_hash));
1006
1007
1008
1009
1010
1011
	return count;
errout:
	dx_release(frames);
	return (err);
}

1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
static inline int search_dirblock(struct buffer_head *bh,
				  struct inode *dir,
				  const struct qstr *d_name,
				  unsigned int offset,
				  struct ext4_dir_entry_2 **res_dir)
{
	return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
			  d_name, offset, res_dir);
}

1022
1023
1024
1025
/*
 * Directory block splitting, compacting
 */

1026
1027
1028
1029
/*
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
1030
1031
1032
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
		       struct dx_hash_info *hinfo,
		       struct dx_map_entry *map_tail)
1033
1034
1035
1036
1037
{
	int count = 0;
	char *base = (char *) de;
	struct dx_hash_info h = *hinfo;

1038
	while ((char *) de < base + blocksize) {
1039
		if (de->name_len && de->inode) {
1040
			ext4fs_dirhash(de->name, de->name_len, &h);
1041
1042
			map_tail--;
			map_tail->hash = h.hash;
1043
			map_tail->offs = ((char *) de - base)>>2;
1044
			map_tail->size = le16_to_cpu(de->rec_len);
1045
1046
1047
1048
			count++;
			cond_resched();
		}
		/* XXX: do we need to check rec_len == 0 case? -Chris */
1049
		de = ext4_next_entry(de, blocksize);
1050
1051
1052
1053
	}
	return count;
}

1054
/* Sort map by hash value */
1055
1056
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
	struct dx_map_entry *p, *q, *top = map + count - 1;
	int more;
	/* Combsort until bubble sort doesn't suck */
	while (count > 2) {
		count = count*10/13;
		if (count - 9 < 2) /* 9, 10 -> 11 */
			count = 11;
		for (p = top, q = p - count; q >= map; p--, q--)
			if (p->hash < q->hash)
				swap(*p, *q);
	}
	/* Garden variety bubble sort */
	do {
		more = 0;
		q = top;
		while (q-- > map) {
			if (q[1].hash >= q[0].hash)
1074
				continue;
1075
1076
			swap(*(q+1), *q);
			more = 1;
1077
1078
1079
1080
		}
	} while(more);
}

Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1081
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
{
	struct dx_entry *entries = frame->entries;
	struct dx_entry *old = frame->at, *new = old + 1;
	int count = dx_get_count(entries);

	assert(count < dx_get_limit(entries));
	assert(old < entries + count);
	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
	dx_set_hash(new, hash);
	dx_set_block(new, block);
	dx_set_count(entries, count + 1);
}

/*
1096
 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1097
 *
1098
 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1099
1100
 * `de != NULL' is guaranteed by caller.
 */
1101
1102
static inline int ext4_match (int len, const char * const name,
			      struct ext4_dir_entry_2 * de)
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
{
	if (len != de->name_len)
		return 0;
	if (!de->inode)
		return 0;
	return !memcmp(name, de->name, len);
}

/*
 * Returns 0 if not found, -1 on failure, and 1 on success
 */
1114
1115
1116
1117
1118
1119
1120
int search_dir(struct buffer_head *bh,
	       char *search_buf,
	       int buf_size,
	       struct inode *dir,
	       const struct qstr *d_name,
	       unsigned int offset,
	       struct ext4_dir_entry_2 **res_dir)
1121
{
1122
	struct ext4_dir_entry_2 * de;
1123
1124
	char * dlimit;
	int de_len;
1125
1126
	const char *name = d_name->name;
	int namelen = d_name->len;
1127

1128
1129
	de = (struct ext4_dir_entry_2 *)search_buf;
	dlimit = search_buf + buf_size;
1130
1131
1132
1133
1134
	while ((char *) de < dlimit) {
		/* this code is executed quadratically often */
		/* do minimal checking `by hand' */

		if ((char *) de + namelen <= dlimit &&
1135
		    ext4_match (namelen, name, de)) {
1136
			/* found a match - just to be sure, do a full check */
1137
1138
			if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
						 bh->b_size, offset))
1139
1140
1141
1142
1143
				return -1;
			*res_dir = de;
			return 1;
		}
		/* prevent looping on a bad block */
1144
1145
		de_len = ext4_rec_len_from_disk(de->rec_len,
						dir->i_sb->s_blocksize);
1146
1147
1148
		if (de_len <= 0)
			return -1;
		offset += de_len;
1149
		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1150
1151
1152
1153
	}
	return 0;
}

1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
			       struct ext4_dir_entry *de)
{
	struct super_block *sb = dir->i_sb;

	if (!is_dx(dir))
		return 0;
	if (block == 0)
		return 1;
	if (de->inode == 0 &&
	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
			sb->s_blocksize)
		return 1;
	return 0;
}
1169
1170

/*
1171
 *	ext4_find_entry()
1172
1173
1174
1175
1176
1177
1178
1179
1180
 *
 * finds an entry in the specified directory with the wanted name. It
 * returns the cache buffer in which the entry was found, and the entry
 * itself (as a parameter - res_dir). It does NOT read the inode of the
 * entry - you'll have to do that yourself if you want to.
 *
 * The returned buffer_head has ->b_count elevated.  The caller is expected
 * to brelse() it when appropriate.
 */
1181
1182
static struct buffer_head * ext4_find_entry (struct inode *dir,
					const struct qstr *d_name,
1183
1184
					struct ext4_dir_entry_2 **res_dir,
					int *inlined)
1185
{
1186
1187
1188
	struct super_block *sb;
	struct buffer_head *bh_use[NAMEI_RA_SIZE];
	struct buffer_head *bh, *ret = NULL;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1189
	ext4_lblk_t start, block, b;
1190
	const u8 *name = d_name->name;
1191
1192
1193
1194
1195
	int ra_max = 0;		/* Number of bh's in the readahead
				   buffer, bh_use[] */
	int ra_ptr = 0;		/* Current index into readahead
				   buffer */
	int num = 0;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1196
1197
	ext4_lblk_t  nblocks;
	int i, err;
1198
1199
1200
1201
	int namelen;

	*res_dir = NULL;
	sb = dir->i_sb;
1202
	namelen = d_name->len;
1203
	if (namelen > EXT4_NAME_LEN)
1204
		return NULL;
1205
1206
1207
1208
1209

	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		ret = ext4_find_inline_entry(dir, d_name, res_dir,
					     &has_inline_data);
1210
1211
1212
		if (has_inline_data) {
			if (inlined)
				*inlined = 1;
1213
			return ret;
1214
		}
1215
1216
	}

1217
	if ((namelen <= 2) && (name[0] == '.') &&
1218
	    (name[1] == '.' || name[1] == '\0')) {
1219
1220
1221
1222
1223
1224
1225
1226
		/*
		 * "." or ".." will only be in the first block
		 * NFS may look up ".."; "." should be handled by the VFS
		 */
		block = start = 0;
		nblocks = 1;
		goto restart;
	}
1227
	if (is_dx(dir)) {
1228
		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1229
1230
1231
1232
1233
1234
1235
		/*
		 * On success, or if the error was file not found,
		 * return.  Otherwise, fall back to doing a search the
		 * old fashioned way.
		 */
		if (bh || (err != ERR_BAD_DX_DIR))
			return bh;
1236
1237
		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
			       "falling back\n"));
1238
	}
1239
1240
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
	start = EXT4_I(dir)->i_dir_start_lookup;
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
	if (start >= nblocks)
		start = 0;
	block = start;
restart:
	do {
		/*
		 * We deal with the read-ahead logic here.
		 */
		if (ra_ptr >= ra_max) {
			/* Refill the readahead buffer */
			ra_ptr = 0;
			b = block;
			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				/*
				 * Terminate if we reach the end of the
				 * directory and must wrap, or if our
				 * search has finished at this block.
				 */
				if (b >= nblocks || (num && block == start)) {
					bh_use[ra_max] = NULL;
					break;
				}
				num++;
1264
				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1265
1266
				bh_use[ra_max] = bh;
				if (bh)
1267
1268
					ll_rw_block(READ | REQ_META | REQ_PRIO,
						    1, &bh);
1269
1270
1271
1272
1273
1274
1275
			}
		}
		if ((bh = bh_use[ra_ptr++]) == NULL)
			goto next;
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* read error, skip block & hope for the best */
1276
1277
			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
					 (unsigned long) block);
1278
1279
1280
			brelse(bh);
			goto next;
		}
1281
		if (!buffer_verified(bh) &&
1282
1283
		    !is_dx_internal_node(dir, block,
					 (struct ext4_dir_entry *)bh->b_data) &&
1284
1285
1286
1287
1288
1289
1290
1291
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			goto next;
		}
		set_buffer_verified(bh);
1292
		i = search_dirblock(bh, dir, d_name,
1293
			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1294
		if (i == 1) {
1295
			EXT4_I(dir)->i_dir_start_lookup = block;
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
			ret = bh;
			goto cleanup_and_exit;
		} else {
			brelse(bh);
			if (i < 0)
				goto cleanup_and_exit;
		}
	next:
		if (++block >= nblocks)
			block = 0;
	} while (block != start);

	/*
	 * If the directory has grown while we were searching, then
	 * search the last part of the directory before giving up.
	 */
	block = nblocks;
1313
	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1314
1315
1316
1317
1318
1319
1320
1321
	if (block < nblocks) {
		start = 0;
		goto restart;
	}

cleanup_and_exit:
	/* Clean up the read-ahead blocks */
	for (; ra_ptr < ra_max; ra_ptr++)
1322
		brelse(bh_use[ra_ptr]);
1323
1324
1325
	return ret;
}

1326
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1327
		       struct ext4_dir_entry_2 **res_dir, int *err)
1328
{
1329
	struct super_block * sb = dir->i_sb;
1330
1331
1332
	struct dx_hash_info	hinfo;
	struct dx_frame frames[2], *frame;
	struct buffer_head *bh;
Aneesh Kumar K.V's avatar
Aneesh Kumar K.V committed
1333
	ext4_lblk_t block;
1334
1335
	int retval;

1336
1337
	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
		return NULL;
1338
1339
	do {
		block = dx_get_block(frame->at);
Carlos Maiolino's avatar
Carlos Maiolino committed
1340
1341
1342
1343
1344
1345
1346
		if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
			if (!(*err)) {
				*err = -EIO;
				ext4_error(dir->i_sb,
					   "Directory hole detected on inode %lu\n",
					   dir->i_ino);
			}
1347
			goto errout;
Carlos Maiolino's avatar
Carlos Maiolino committed
1348
		}
1349

1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
		if (!buffer_verified(bh) &&
		    !ext4_dirent_csum_verify(dir,
				(struct ext4_dir_entry *)bh->b_data)) {
			EXT4_ERROR_INODE(dir, "checksumming directory "
					 "block %lu", (unsigned long)block);
			brelse(bh);
			*err = -EIO;
			goto errout;
		}
		set_buffer_verified(bh);
1360
1361
1362
1363
1364
1365
		retval = search_dirblock(bh, dir, d_name,
					 block << EXT4_BLOCK_SIZE_BITS(sb),
					 res_dir);
		if (retval == 1) { 	/* Success! */
			dx_release(frames);
			return bh;
1366
		}
1367
		brelse(bh);
1368
1369
1370
1371
1372
		if (retval == -1) {
			*err = ERR_BAD_DX_DIR;
			goto errout;
		}

1373
		/* Check to see if we should continue to search */
1374
		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1375
1376
					       frames, NULL);
		if (retval < 0) {
1377
			ext4_warning(sb,
1378
1379
1380
1381
1382
1383
1384
1385
1386
			     "error reading index page in directory #%lu",
			     dir->i_ino);
			*err = retval;
			goto errout;
		}
	} while (retval == 1);

	*err = -ENOENT;
errout:
1387
	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1388
1389
1390
1391
	dx_release (frames);
	return NULL;
}

Al Viro's avatar
Al Viro committed
1392
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1393
{
1394
1395
1396
	struct inode *inode;
	struct ext4_dir_entry_2 *de;
	struct buffer_head *bh;
Dave Kleikamp's avatar